Index: head/sys/net/if.c =================================================================== --- head/sys/net/if.c (revision 120726) +++ head/sys/net/if.c (revision 120727) @@ -1,2008 +1,2010 @@ /* * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)if.c 8.5 (Berkeley) 1/9/95 * $FreeBSD$ */ #include "opt_compat.h" #include "opt_inet6.h" #include "opt_inet.h" #include "opt_mac.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(INET) || defined(INET6) /*XXX*/ #include #include #ifdef INET6 #include #include #endif #endif #ifdef INET #include #endif static int ifconf(u_long, caddr_t); static void if_grow(void); static void if_init(void *); static void if_check(void *); static int if_findindex(struct ifnet *); static void if_qflush(struct ifqueue *); static void if_slowtimo(void *); static void link_rtrequest(int, struct rtentry *, struct rt_addrinfo *); static int if_rtdel(struct radix_node *, void *); static struct if_clone *if_clone_lookup(const char *, int *); static int if_clone_list(struct if_clonereq *); static int ifhwioctl(u_long, struct ifnet *, caddr_t, struct thread *); #ifdef INET6 /* * XXX: declare here to avoid to include many inet6 related files.. * should be more generalized? */ extern void nd6_setmtu(struct ifnet *); #endif int if_index = 0; struct ifindex_entry *ifindex_table = NULL; int ifqmaxlen = IFQ_MAXLEN; struct ifnethead ifnet; /* depend on static init XXX */ struct mtx ifnet_lock; static int if_cloners_count; LIST_HEAD(, if_clone) if_cloners = LIST_HEAD_INITIALIZER(if_cloners); static int if_indexlim = 8; static struct klist ifklist; static void filt_netdetach(struct knote *kn); static int filt_netdev(struct knote *kn, long hint); static struct filterops netdev_filtops = { 1, NULL, filt_netdetach, filt_netdev }; /* * System initialization */ SYSINIT(interfaces, SI_SUB_INIT_IF, SI_ORDER_FIRST, if_init, NULL) SYSINIT(interface_check, SI_SUB_PROTO_IF, SI_ORDER_FIRST, if_check, NULL) MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address"); MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address"); MALLOC_DEFINE(M_CLONE, "clone", "interface cloning framework"); static d_open_t netopen; static d_close_t netclose; static d_ioctl_t netioctl; static d_kqfilter_t netkqfilter; static struct cdevsw net_cdevsw = { .d_open = netopen, .d_close = netclose, .d_ioctl = netioctl, .d_name = "net", .d_kqfilter = netkqfilter, }; static int netopen(dev_t dev, int flag, int mode, struct thread *td) { return (0); } static int netclose(dev_t dev, int flags, int fmt, struct thread *td) { return (0); } static int netioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct thread *td) { struct ifnet *ifp; int error, idx; /* only support interface specific ioctls */ if (IOCGROUP(cmd) != 'i') return (EOPNOTSUPP); idx = minor(dev); if (idx == 0) { /* * special network device, not interface. */ if (cmd == SIOCGIFCONF) return (ifconf(cmd, data)); /* XXX remove cmd */ return (EOPNOTSUPP); } ifp = ifnet_byindex(idx); if (ifp == NULL) return (ENXIO); error = ifhwioctl(cmd, ifp, data, td); if (error == ENOIOCTL) error = EOPNOTSUPP; return (error); } static int netkqfilter(dev_t dev, struct knote *kn) { struct klist *klist; struct ifnet *ifp; int idx; idx = minor(dev); if (idx == 0) { klist = &ifklist; } else { ifp = ifnet_byindex(idx); if (ifp == NULL) return (1); klist = &ifp->if_klist; } switch (kn->kn_filter) { case EVFILT_NETDEV: kn->kn_fop = &netdev_filtops; break; default: return (1); } kn->kn_hook = (caddr_t)klist; /* XXX locking? */ SLIST_INSERT_HEAD(klist, kn, kn_selnext); return (0); } static void filt_netdetach(struct knote *kn) { struct klist *klist = (struct klist *)kn->kn_hook; if (kn->kn_status & KN_DETACHED) return; SLIST_REMOVE(klist, kn, knote, kn_selnext); } static int filt_netdev(struct knote *kn, long hint) { /* * Currently NOTE_EXIT is abused to indicate device detach. */ if (hint == NOTE_EXIT) { kn->kn_data = NOTE_LINKINV; kn->kn_status |= KN_DETACHED; kn->kn_flags |= (EV_EOF | EV_ONESHOT); return (1); } kn->kn_data = hint; /* current status */ if (kn->kn_sfflags & hint) kn->kn_fflags |= hint; return (kn->kn_fflags != 0); } /* * Network interface utility routines. * * Routines with ifa_ifwith* names take sockaddr *'s as * parameters. */ /* ARGSUSED*/ static void if_init(dummy) void *dummy; { IFNET_LOCK_INIT(); TAILQ_INIT(&ifnet); SLIST_INIT(&ifklist); if_grow(); /* create initial table */ ifdev_byindex(0) = make_dev(&net_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "network"); } static void if_grow(void) { u_int n; struct ifindex_entry *e; if_indexlim <<= 1; n = if_indexlim * sizeof(*e); e = malloc(n, M_IFADDR, M_WAITOK | M_ZERO); if (ifindex_table != NULL) { memcpy((caddr_t)e, (caddr_t)ifindex_table, n/2); free((caddr_t)ifindex_table, M_IFADDR); } ifindex_table = e; } /* ARGSUSED*/ static void if_check(dummy) void *dummy; { struct ifnet *ifp; int s; s = splimp(); IFNET_RLOCK(); /* could sleep on rare error; mostly okay XXX */ TAILQ_FOREACH(ifp, &ifnet, if_link) { if (ifp->if_snd.ifq_maxlen == 0) { printf("%s%d XXX: driver didn't set ifq_maxlen\n", ifp->if_name, ifp->if_unit); ifp->if_snd.ifq_maxlen = ifqmaxlen; } if (!mtx_initialized(&ifp->if_snd.ifq_mtx)) { printf("%s%d XXX: driver didn't initialize queue mtx\n", ifp->if_name, ifp->if_unit); mtx_init(&ifp->if_snd.ifq_mtx, "unknown", MTX_NETWORK_LOCK, MTX_DEF); } } IFNET_RUNLOCK(); splx(s); if_slowtimo(0); } static int if_findindex(struct ifnet *ifp) { int i, unit; char eaddr[18], devname[32]; const char *name, *p; switch (ifp->if_type) { case IFT_ETHER: /* these types use struct arpcom */ case IFT_FDDI: case IFT_XETHER: case IFT_ISO88025: case IFT_L2VLAN: snprintf(eaddr, 18, "%6D", ((struct arpcom *)ifp->if_softc)->ac_enaddr, ":"); break; default: eaddr[0] = '\0'; break; } snprintf(devname, 32, "%s%d", ifp->if_name, ifp->if_unit); name = net_cdevsw.d_name; i = 0; while ((resource_find_dev(&i, name, &unit, NULL, NULL)) == 0) { if (resource_string_value(name, unit, "ether", &p) == 0) if (strcmp(p, eaddr) == 0) goto found; if (resource_string_value(name, unit, "dev", &p) == 0) if (strcmp(p, devname) == 0) goto found; } unit = 0; found: if (unit != 0) { if (ifaddr_byindex(unit) == NULL) return (unit); printf("%s%d in use, cannot hardwire it to %s.\n", name, unit, devname); } for (unit = 1; ; unit++) { if (unit <= if_index && ifaddr_byindex(unit) != NULL) continue; if (resource_string_value(name, unit, "ether", &p) == 0 || resource_string_value(name, unit, "dev", &p) == 0) continue; break; } return (unit); } /* * Attach an interface to the * list of "active" interfaces. */ void if_attach(ifp) struct ifnet *ifp; { unsigned socksize, ifasize; int namelen, masklen; char workbuf[64]; register struct sockaddr_dl *sdl; register struct ifaddr *ifa; IFNET_WLOCK(); TAILQ_INSERT_TAIL(&ifnet, ifp, if_link); IFNET_WUNLOCK(); /* * XXX - * The old code would work if the interface passed a pre-existing * chain of ifaddrs to this code. We don't trust our callers to * properly initialize the tailq, however, so we no longer allow * this unlikely case. */ TAILQ_INIT(&ifp->if_addrhead); TAILQ_INIT(&ifp->if_prefixhead); TAILQ_INIT(&ifp->if_multiaddrs); SLIST_INIT(&ifp->if_klist); getmicrotime(&ifp->if_lastchange); #ifdef MAC mac_init_ifnet(ifp); mac_create_ifnet(ifp); #endif ifp->if_index = if_findindex(ifp); if (ifp->if_index > if_index) if_index = ifp->if_index; if (if_index >= if_indexlim) if_grow(); ifnet_byindex(ifp->if_index) = ifp; ifdev_byindex(ifp->if_index) = make_dev(&net_cdevsw, ifp->if_index, UID_ROOT, GID_WHEEL, 0600, "%s/%s%d", net_cdevsw.d_name, ifp->if_name, ifp->if_unit); make_dev_alias(ifdev_byindex(ifp->if_index), "%s%d", net_cdevsw.d_name, ifp->if_index); mtx_init(&ifp->if_snd.ifq_mtx, ifp->if_name, "if send queue", MTX_DEF); /* * create a Link Level name for this device */ namelen = snprintf(workbuf, sizeof(workbuf), "%s%d", ifp->if_name, ifp->if_unit); #define _offsetof(t, m) ((int)((caddr_t)&((t *)0)->m)) masklen = _offsetof(struct sockaddr_dl, sdl_data[0]) + namelen; socksize = masklen + ifp->if_addrlen; #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1))) if (socksize < sizeof(*sdl)) socksize = sizeof(*sdl); socksize = ROUNDUP(socksize); ifasize = sizeof(*ifa) + 2 * socksize; ifa = (struct ifaddr *)malloc(ifasize, M_IFADDR, M_WAITOK | M_ZERO); if (ifa) { IFA_LOCK_INIT(ifa); sdl = (struct sockaddr_dl *)(ifa + 1); sdl->sdl_len = socksize; sdl->sdl_family = AF_LINK; bcopy(workbuf, sdl->sdl_data, namelen); sdl->sdl_nlen = namelen; sdl->sdl_index = ifp->if_index; sdl->sdl_type = ifp->if_type; ifaddr_byindex(ifp->if_index) = ifa; ifa->ifa_ifp = ifp; ifa->ifa_rtrequest = link_rtrequest; ifa->ifa_addr = (struct sockaddr *)sdl; sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl); ifa->ifa_netmask = (struct sockaddr *)sdl; sdl->sdl_len = masklen; while (namelen != 0) sdl->sdl_data[--namelen] = 0xff; ifa->ifa_refcnt = 1; TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link); } ifp->if_broadcastaddr = 0; /* reliably crash if used uninitialized */ /* Announce the interface. */ rt_ifannouncemsg(ifp, IFAN_ARRIVAL); } /* * Detach an interface, removing it from the * list of "active" interfaces. */ void if_detach(ifp) struct ifnet *ifp; { struct ifaddr *ifa; struct radix_node_head *rnh; int s; int i; /* * Remove routes and flush queues. */ s = splnet(); if_down(ifp); /* * Remove address from ifindex_table[] and maybe decrement if_index. * Clean up all addresses. */ ifaddr_byindex(ifp->if_index) = NULL; destroy_dev(ifdev_byindex(ifp->if_index)); ifdev_byindex(ifp->if_index) = NULL; while (if_index > 0 && ifaddr_byindex(if_index) == NULL) if_index--; for (ifa = TAILQ_FIRST(&ifp->if_addrhead); ifa; ifa = TAILQ_FIRST(&ifp->if_addrhead)) { #ifdef INET /* XXX: Ugly!! ad hoc just for INET */ if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET) { struct ifaliasreq ifr; bzero(&ifr, sizeof(ifr)); ifr.ifra_addr = *ifa->ifa_addr; if (ifa->ifa_dstaddr) ifr.ifra_broadaddr = *ifa->ifa_dstaddr; if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp, NULL) == 0) continue; } #endif /* INET */ #ifdef INET6 if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET6) { in6_purgeaddr(ifa); /* ifp_addrhead is already updated */ continue; } #endif /* INET6 */ TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link); IFAFREE(ifa); } #ifdef INET6 /* * Remove all IPv6 kernel structs related to ifp. This should be done * before removing routing entries below, since IPv6 interface direct * routes are expected to be removed by the IPv6-specific kernel API. * Otherwise, the kernel will detect some inconsistency and bark it. */ in6_ifdetach(ifp); #endif /* * Delete all remaining routes using this interface * Unfortuneatly the only way to do this is to slog through * the entire routing table looking for routes which point * to this interface...oh well... */ for (i = 1; i <= AF_MAX; i++) { if ((rnh = rt_tables[i]) == NULL) continue; RADIX_NODE_HEAD_LOCK(rnh); (void) rnh->rnh_walktree(rnh, if_rtdel, ifp); RADIX_NODE_HEAD_UNLOCK(rnh); } /* Announce that the interface is gone. */ rt_ifannouncemsg(ifp, IFAN_DEPARTURE); #ifdef MAC mac_destroy_ifnet(ifp); #endif /* MAC */ KNOTE(&ifp->if_klist, NOTE_EXIT); IFNET_WLOCK(); TAILQ_REMOVE(&ifnet, ifp, if_link); IFNET_WUNLOCK(); mtx_destroy(&ifp->if_snd.ifq_mtx); splx(s); } /* * Delete Routes for a Network Interface * * Called for each routing entry via the rnh->rnh_walktree() call above * to delete all route entries referencing a detaching network interface. * * Arguments: * rn pointer to node in the routing table * arg argument passed to rnh->rnh_walktree() - detaching interface * * Returns: * 0 successful * errno failed - reason indicated * */ static int if_rtdel(rn, arg) struct radix_node *rn; void *arg; { struct rtentry *rt = (struct rtentry *)rn; struct ifnet *ifp = arg; int err; if (rt->rt_ifp == ifp) { /* * Protect (sorta) against walktree recursion problems * with cloned routes */ if ((rt->rt_flags & RTF_UP) == 0) return (0); err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway, rt_mask(rt), rt->rt_flags, (struct rtentry **) NULL); if (err) { log(LOG_WARNING, "if_rtdel: error %d\n", err); } } return (0); } /* * Create a clone network interface. */ int if_clone_create(name, len) char *name; int len; { struct if_clone *ifc; char *dp; int wildcard, bytoff, bitoff; int unit; int err; ifc = if_clone_lookup(name, &unit); if (ifc == NULL) return (EINVAL); if (ifunit(name) != NULL) return (EEXIST); bytoff = bitoff = 0; wildcard = (unit < 0); /* * Find a free unit if none was given. */ if (wildcard) { while ((bytoff < ifc->ifc_bmlen) && (ifc->ifc_units[bytoff] == 0xff)) bytoff++; if (bytoff >= ifc->ifc_bmlen) return (ENOSPC); while ((ifc->ifc_units[bytoff] & (1 << bitoff)) != 0) bitoff++; unit = (bytoff << 3) + bitoff; } if (unit > ifc->ifc_maxunit) return (ENXIO); err = (*ifc->ifc_create)(ifc, unit); if (err != 0) return (err); if (!wildcard) { bytoff = unit >> 3; bitoff = unit - (bytoff << 3); } /* * Allocate the unit in the bitmap. */ KASSERT((ifc->ifc_units[bytoff] & (1 << bitoff)) == 0, ("%s: bit is already set", __func__)); ifc->ifc_units[bytoff] |= (1 << bitoff); /* In the wildcard case, we need to update the name. */ if (wildcard) { for (dp = name; *dp != '\0'; dp++); if (snprintf(dp, len - (dp-name), "%d", unit) > len - (dp-name) - 1) { /* * This can only be a programmer error and * there's no straightforward way to recover if * it happens. */ panic("if_clone_create(): interface name too long"); } } return (0); } /* * Destroy a clone network interface. */ int if_clone_destroy(name) const char *name; { struct if_clone *ifc; struct ifnet *ifp; int bytoff, bitoff; int unit; ifc = if_clone_lookup(name, &unit); if (ifc == NULL) return (EINVAL); if (unit < ifc->ifc_minifs) return (EINVAL); ifp = ifunit(name); if (ifp == NULL) return (ENXIO); if (ifc->ifc_destroy == NULL) return (EOPNOTSUPP); (*ifc->ifc_destroy)(ifp); /* * Compute offset in the bitmap and deallocate the unit. */ bytoff = unit >> 3; bitoff = unit - (bytoff << 3); KASSERT((ifc->ifc_units[bytoff] & (1 << bitoff)) != 0, ("%s: bit is already cleared", __func__)); ifc->ifc_units[bytoff] &= ~(1 << bitoff); return (0); } /* * Look up a network interface cloner. */ static struct if_clone * if_clone_lookup(name, unitp) const char *name; int *unitp; { struct if_clone *ifc; const char *cp; int i; for (ifc = LIST_FIRST(&if_cloners); ifc != NULL;) { for (cp = name, i = 0; i < ifc->ifc_namelen; i++, cp++) { if (ifc->ifc_name[i] != *cp) goto next_ifc; } goto found_name; next_ifc: ifc = LIST_NEXT(ifc, ifc_list); } /* No match. */ return ((struct if_clone *)NULL); found_name: if (*cp == '\0') { i = -1; } else { for (i = 0; *cp != '\0'; cp++) { if (*cp < '0' || *cp > '9') { /* Bogus unit number. */ return (NULL); } i = (i * 10) + (*cp - '0'); } } if (unitp != NULL) *unitp = i; return (ifc); } /* * Register a network interface cloner. */ void if_clone_attach(ifc) struct if_clone *ifc; { int bytoff, bitoff; int err; int len, maxclone; int unit; KASSERT(ifc->ifc_minifs - 1 <= ifc->ifc_maxunit, ("%s: %s requested more units then allowed (%d > %d)", __func__, ifc->ifc_name, ifc->ifc_minifs, ifc->ifc_maxunit + 1)); /* * Compute bitmap size and allocate it. */ maxclone = ifc->ifc_maxunit + 1; len = maxclone >> 3; if ((len << 3) < maxclone) len++; ifc->ifc_units = malloc(len, M_CLONE, M_WAITOK | M_ZERO); ifc->ifc_bmlen = len; LIST_INSERT_HEAD(&if_cloners, ifc, ifc_list); if_cloners_count++; for (unit = 0; unit < ifc->ifc_minifs; unit++) { err = (*ifc->ifc_create)(ifc, unit); KASSERT(err == 0, ("%s: failed to create required interface %s%d", __func__, ifc->ifc_name, unit)); /* Allocate the unit in the bitmap. */ bytoff = unit >> 3; bitoff = unit - (bytoff << 3); ifc->ifc_units[bytoff] |= (1 << bitoff); } } /* * Unregister a network interface cloner. */ void if_clone_detach(ifc) struct if_clone *ifc; { LIST_REMOVE(ifc, ifc_list); free(ifc->ifc_units, M_CLONE); if_cloners_count--; } /* * Provide list of interface cloners to userspace. */ static int if_clone_list(ifcr) struct if_clonereq *ifcr; { char outbuf[IFNAMSIZ], *dst; struct if_clone *ifc; int count, error = 0; ifcr->ifcr_total = if_cloners_count; if ((dst = ifcr->ifcr_buffer) == NULL) { /* Just asking how many there are. */ return (0); } if (ifcr->ifcr_count < 0) return (EINVAL); count = (if_cloners_count < ifcr->ifcr_count) ? if_cloners_count : ifcr->ifcr_count; for (ifc = LIST_FIRST(&if_cloners); ifc != NULL && count != 0; ifc = LIST_NEXT(ifc, ifc_list), count--, dst += IFNAMSIZ) { strncpy(outbuf, ifc->ifc_name, IFNAMSIZ); outbuf[IFNAMSIZ - 1] = '\0'; /* sanity */ error = copyout(outbuf, dst, IFNAMSIZ); if (error) break; } return (error); } -#define equal(a1, a2) \ - (bcmp((caddr_t)(a1), (caddr_t)(a2), ((struct sockaddr *)(a1))->sa_len) == 0) +#define equal(a1, a2) (bcmp((a1), (a2), ((a1))->sa_len) == 0) /* * Locate an interface based on a complete address. */ /*ARGSUSED*/ struct ifaddr * ifa_ifwithaddr(addr) struct sockaddr *addr; { struct ifnet *ifp; struct ifaddr *ifa; IFNET_RLOCK(); TAILQ_FOREACH(ifp, &ifnet, if_link) TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != addr->sa_family) continue; if (equal(addr, ifa->ifa_addr)) goto done; /* IP6 doesn't have broadcast */ if ((ifp->if_flags & IFF_BROADCAST) && ifa->ifa_broadaddr && ifa->ifa_broadaddr->sa_len != 0 && equal(ifa->ifa_broadaddr, addr)) goto done; } ifa = NULL; done: IFNET_RUNLOCK(); return (ifa); } /* * Locate the point to point interface with a given destination address. */ /*ARGSUSED*/ struct ifaddr * ifa_ifwithdstaddr(addr) struct sockaddr *addr; { struct ifnet *ifp; struct ifaddr *ifa; IFNET_RLOCK(); TAILQ_FOREACH(ifp, &ifnet, if_link) { if ((ifp->if_flags & IFF_POINTOPOINT) == 0) continue; TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != addr->sa_family) continue; if (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr)) goto done; } } ifa = NULL; done: IFNET_RUNLOCK(); return (ifa); } /* * Find an interface on a specific network. If many, choice * is most specific found. */ struct ifaddr * ifa_ifwithnet(addr) struct sockaddr *addr; { register struct ifnet *ifp; register struct ifaddr *ifa; struct ifaddr *ifa_maybe = (struct ifaddr *) 0; u_int af = addr->sa_family; char *addr_data = addr->sa_data, *cplim; /* * AF_LINK addresses can be looked up directly by their index number, * so do that if we can. */ if (af == AF_LINK) { - register struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr; + struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr; if (sdl->sdl_index && sdl->sdl_index <= if_index) return (ifaddr_byindex(sdl->sdl_index)); } /* * Scan though each interface, looking for ones that have * addresses in this address family. */ IFNET_RLOCK(); TAILQ_FOREACH(ifp, &ifnet, if_link) { TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { register char *cp, *cp2, *cp3; if (ifa->ifa_addr->sa_family != af) next: continue; if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) { /* * This is a bit broken as it doesn't * take into account that the remote end may * be a single node in the network we are * looking for. * The trouble is that we don't know the * netmask for the remote end. */ if (ifa->ifa_dstaddr != 0 && equal(addr, ifa->ifa_dstaddr)) goto done; } else { /* * if we have a special address handler, * then use it instead of the generic one. */ if (ifa->ifa_claim_addr) { if ((*ifa->ifa_claim_addr)(ifa, addr)) goto done; continue; } /* * Scan all the bits in the ifa's address. * If a bit dissagrees with what we are * looking for, mask it with the netmask * to see if it really matters. * (A byte at a time) */ if (ifa->ifa_netmask == 0) continue; cp = addr_data; cp2 = ifa->ifa_addr->sa_data; cp3 = ifa->ifa_netmask->sa_data; cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask; while (cp3 < cplim) if ((*cp++ ^ *cp2++) & *cp3++) goto next; /* next address! */ /* * If the netmask of what we just found * is more specific than what we had before * (if we had one) then remember the new one * before continuing to search * for an even better one. */ if (ifa_maybe == 0 || rn_refines((caddr_t)ifa->ifa_netmask, (caddr_t)ifa_maybe->ifa_netmask)) ifa_maybe = ifa; } } } ifa = ifa_maybe; done: IFNET_RUNLOCK(); return (ifa); } /* * Find an interface address specific to an interface best matching * a given address. */ struct ifaddr * ifaof_ifpforaddr(addr, ifp) struct sockaddr *addr; register struct ifnet *ifp; { register struct ifaddr *ifa; register char *cp, *cp2, *cp3; register char *cplim; struct ifaddr *ifa_maybe = 0; u_int af = addr->sa_family; if (af >= AF_MAX) return (0); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != af) continue; if (ifa_maybe == 0) ifa_maybe = ifa; if (ifa->ifa_netmask == 0) { if (equal(addr, ifa->ifa_addr) || (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr))) goto done; continue; } if (ifp->if_flags & IFF_POINTOPOINT) { if (equal(addr, ifa->ifa_dstaddr)) goto done; } else { cp = addr->sa_data; cp2 = ifa->ifa_addr->sa_data; cp3 = ifa->ifa_netmask->sa_data; cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask; for (; cp3 < cplim; cp3++) if ((*cp++ ^ *cp2++) & *cp3) break; if (cp3 == cplim) goto done; } } ifa = ifa_maybe; done: return (ifa); } #include /* * Default action when installing a route with a Link Level gateway. * Lookup an appropriate real ifa to point to. * This should be moved to /sys/net/link.c eventually. */ static void link_rtrequest(cmd, rt, info) int cmd; register struct rtentry *rt; struct rt_addrinfo *info; { - register struct ifaddr *ifa; + register struct ifaddr *ifa, *oifa; struct sockaddr *dst; struct ifnet *ifp; + RT_LOCK_ASSERT(rt); + if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == 0) || ((ifp = ifa->ifa_ifp) == 0) || ((dst = rt_key(rt)) == 0)) return; ifa = ifaof_ifpforaddr(dst, ifp); if (ifa) { - IFAFREE(rt->rt_ifa); IFAREF(ifa); /* XXX */ + oifa = rt->rt_ifa; rt->rt_ifa = ifa; + IFAFREE(oifa); if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest) ifa->ifa_rtrequest(cmd, rt, info); } } /* * Mark an interface down and notify protocols of * the transition. * NOTE: must be called at splnet or eqivalent. */ void if_unroute(ifp, flag, fam) register struct ifnet *ifp; int flag, fam; { register struct ifaddr *ifa; ifp->if_flags &= ~flag; getmicrotime(&ifp->if_lastchange); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family)) pfctlinput(PRC_IFDOWN, ifa->ifa_addr); if_qflush(&ifp->if_snd); rt_ifmsg(ifp); } /* * Mark an interface up and notify protocols of * the transition. * NOTE: must be called at splnet or eqivalent. */ void if_route(ifp, flag, fam) register struct ifnet *ifp; int flag, fam; { register struct ifaddr *ifa; ifp->if_flags |= flag; getmicrotime(&ifp->if_lastchange); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family)) pfctlinput(PRC_IFUP, ifa->ifa_addr); rt_ifmsg(ifp); #ifdef INET6 in6_if_up(ifp); #endif } /* * Mark an interface down and notify protocols of * the transition. * NOTE: must be called at splnet or eqivalent. */ void if_down(ifp) register struct ifnet *ifp; { if_unroute(ifp, IFF_UP, AF_UNSPEC); } /* * Mark an interface up and notify protocols of * the transition. * NOTE: must be called at splnet or eqivalent. */ void if_up(ifp) register struct ifnet *ifp; { if_route(ifp, IFF_UP, AF_UNSPEC); } /* * Flush an interface queue. */ static void if_qflush(ifq) register struct ifqueue *ifq; { register struct mbuf *m, *n; n = ifq->ifq_head; while ((m = n) != 0) { n = m->m_act; m_freem(m); } ifq->ifq_head = 0; ifq->ifq_tail = 0; ifq->ifq_len = 0; } /* * Handle interface watchdog timer routines. Called * from softclock, we decrement timers (if set) and * call the appropriate interface routine on expiration. */ static void if_slowtimo(arg) void *arg; { register struct ifnet *ifp; int s = splimp(); IFNET_RLOCK(); TAILQ_FOREACH(ifp, &ifnet, if_link) { if (ifp->if_timer == 0 || --ifp->if_timer) continue; if (ifp->if_watchdog) (*ifp->if_watchdog)(ifp); } IFNET_RUNLOCK(); splx(s); timeout(if_slowtimo, (void *)0, hz / IFNET_SLOWHZ); } /* * Map interface name to * interface structure pointer. */ struct ifnet * ifunit(const char *name) { char namebuf[IFNAMSIZ + 1]; struct ifnet *ifp; dev_t dev; /* * Now search all the interfaces for this name/number */ /* * XXX * Devices should really be known as /dev/fooN, not /dev/net/fooN. */ snprintf(namebuf, IFNAMSIZ, "%s/%s", net_cdevsw.d_name, name); IFNET_RLOCK(); TAILQ_FOREACH(ifp, &ifnet, if_link) { dev = ifdev_byindex(ifp->if_index); if (strcmp(devtoname(dev), namebuf) == 0) break; if (dev_named(dev, name)) break; } IFNET_RUNLOCK(); return (ifp); } /* * Map interface name in a sockaddr_dl to * interface structure pointer. */ struct ifnet * if_withname(sa) struct sockaddr *sa; { char ifname[IFNAMSIZ+1]; struct sockaddr_dl *sdl = (struct sockaddr_dl *)sa; if ( (sa->sa_family != AF_LINK) || (sdl->sdl_nlen == 0) || (sdl->sdl_nlen > IFNAMSIZ) ) return NULL; /* * ifunit wants a NUL-terminated string. It may not be NUL-terminated * in the sockaddr, and we don't want to change the caller's sockaddr * (there might not be room to add the trailing NUL anyway), so we make * a local copy that we know we can NUL-terminate safely. */ bcopy(sdl->sdl_data, ifname, sdl->sdl_nlen); ifname[sdl->sdl_nlen] = '\0'; return ifunit(ifname); } /* * Hardware specific interface ioctls. */ static int ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td) { struct ifreq *ifr; struct ifstat *ifs; int error = 0; int new_flags; ifr = (struct ifreq *)data; switch (cmd) { case SIOCGIFINDEX: ifr->ifr_index = ifp->if_index; break; case SIOCGIFFLAGS: ifr->ifr_flags = ifp->if_flags & 0xffff; ifr->ifr_flagshigh = ifp->if_flags >> 16; break; case SIOCGIFCAP: ifr->ifr_reqcap = ifp->if_capabilities; ifr->ifr_curcap = ifp->if_capenable; break; #ifdef MAC case SIOCGIFMAC: error = mac_ioctl_ifnet_get(td->td_ucred, ifr, ifp); break; #endif case SIOCGIFMETRIC: ifr->ifr_metric = ifp->if_metric; break; case SIOCGIFMTU: ifr->ifr_mtu = ifp->if_mtu; break; case SIOCGIFPHYS: ifr->ifr_phys = ifp->if_physical; break; case SIOCSIFFLAGS: error = suser(td); if (error) return (error); new_flags = (ifr->ifr_flags & 0xffff) | (ifr->ifr_flagshigh << 16); if (ifp->if_flags & IFF_SMART) { /* Smart drivers twiddle their own routes */ } else if (ifp->if_flags & IFF_UP && (new_flags & IFF_UP) == 0) { int s = splimp(); if_down(ifp); splx(s); } else if (new_flags & IFF_UP && (ifp->if_flags & IFF_UP) == 0) { int s = splimp(); if_up(ifp); splx(s); } ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) | (new_flags &~ IFF_CANTCHANGE); if (new_flags & IFF_PPROMISC) { /* Permanently promiscuous mode requested */ ifp->if_flags |= IFF_PROMISC; } else if (ifp->if_pcount == 0) { ifp->if_flags &= ~IFF_PROMISC; } if (ifp->if_ioctl) (void) (*ifp->if_ioctl)(ifp, cmd, data); getmicrotime(&ifp->if_lastchange); break; case SIOCSIFCAP: error = suser(td); if (error) return (error); if (ifr->ifr_reqcap & ~ifp->if_capabilities) return (EINVAL); (void) (*ifp->if_ioctl)(ifp, cmd, data); break; #ifdef MAC case SIOCSIFMAC: error = mac_ioctl_ifnet_set(td->td_ucred, ifr, ifp); break; #endif case SIOCSIFMETRIC: error = suser(td); if (error) return (error); ifp->if_metric = ifr->ifr_metric; getmicrotime(&ifp->if_lastchange); break; case SIOCSIFPHYS: error = suser(td); if (error) return error; if (!ifp->if_ioctl) return EOPNOTSUPP; error = (*ifp->if_ioctl)(ifp, cmd, data); if (error == 0) getmicrotime(&ifp->if_lastchange); return(error); case SIOCSIFMTU: { u_long oldmtu = ifp->if_mtu; error = suser(td); if (error) return (error); if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU) return (EINVAL); if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); error = (*ifp->if_ioctl)(ifp, cmd, data); if (error == 0) { getmicrotime(&ifp->if_lastchange); rt_ifmsg(ifp); } /* * If the link MTU changed, do network layer specific procedure. */ if (ifp->if_mtu != oldmtu) { #ifdef INET6 nd6_setmtu(ifp); #endif } break; } case SIOCADDMULTI: case SIOCDELMULTI: error = suser(td); if (error) return (error); /* Don't allow group membership on non-multicast interfaces. */ if ((ifp->if_flags & IFF_MULTICAST) == 0) return (EOPNOTSUPP); /* Don't let users screw up protocols' entries. */ if (ifr->ifr_addr.sa_family != AF_LINK) return (EINVAL); if (cmd == SIOCADDMULTI) { struct ifmultiaddr *ifma; error = if_addmulti(ifp, &ifr->ifr_addr, &ifma); } else { error = if_delmulti(ifp, &ifr->ifr_addr); } if (error == 0) getmicrotime(&ifp->if_lastchange); break; case SIOCSIFPHYADDR: case SIOCDIFPHYADDR: #ifdef INET6 case SIOCSIFPHYADDR_IN6: #endif case SIOCSLIFPHYADDR: case SIOCSIFMEDIA: case SIOCSIFGENERIC: error = suser(td); if (error) return (error); if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); error = (*ifp->if_ioctl)(ifp, cmd, data); if (error == 0) getmicrotime(&ifp->if_lastchange); break; case SIOCGIFSTATUS: ifs = (struct ifstat *)data; ifs->ascii[0] = '\0'; case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: case SIOCGLIFPHYADDR: case SIOCGIFMEDIA: case SIOCGIFGENERIC: if (ifp->if_ioctl == 0) return (EOPNOTSUPP); error = (*ifp->if_ioctl)(ifp, cmd, data); break; case SIOCSIFLLADDR: error = suser(td); if (error) return (error); error = if_setlladdr(ifp, ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len); break; default: error = ENOIOCTL; break; } return (error); } /* * Interface ioctls. */ int ifioctl(so, cmd, data, td) struct socket *so; u_long cmd; caddr_t data; struct thread *td; { struct ifnet *ifp; struct ifreq *ifr; int error; int oif_flags; switch (cmd) { case SIOCGIFCONF: case OSIOCGIFCONF: return (ifconf(cmd, data)); } ifr = (struct ifreq *)data; switch (cmd) { case SIOCIFCREATE: case SIOCIFDESTROY: if ((error = suser(td)) != 0) return (error); return ((cmd == SIOCIFCREATE) ? if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name)) : if_clone_destroy(ifr->ifr_name)); case SIOCIFGCLONERS: return (if_clone_list((struct if_clonereq *)data)); } ifp = ifunit(ifr->ifr_name); if (ifp == 0) return (ENXIO); error = ifhwioctl(cmd, ifp, data, td); if (error != ENOIOCTL) return (error); oif_flags = ifp->if_flags; if (so->so_proto == 0) return (EOPNOTSUPP); #ifndef COMPAT_43 error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd, data, ifp, td)); #else { int ocmd = cmd; switch (cmd) { case SIOCSIFDSTADDR: case SIOCSIFADDR: case SIOCSIFBRDADDR: case SIOCSIFNETMASK: #if BYTE_ORDER != BIG_ENDIAN if (ifr->ifr_addr.sa_family == 0 && ifr->ifr_addr.sa_len < 16) { ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len; ifr->ifr_addr.sa_len = 16; } #else if (ifr->ifr_addr.sa_len == 0) ifr->ifr_addr.sa_len = 16; #endif break; case OSIOCGIFADDR: cmd = SIOCGIFADDR; break; case OSIOCGIFDSTADDR: cmd = SIOCGIFDSTADDR; break; case OSIOCGIFBRDADDR: cmd = SIOCGIFBRDADDR; break; case OSIOCGIFNETMASK: cmd = SIOCGIFNETMASK; } error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd, data, ifp, td)); switch (ocmd) { case OSIOCGIFADDR: case OSIOCGIFDSTADDR: case OSIOCGIFBRDADDR: case OSIOCGIFNETMASK: *(u_short *)&ifr->ifr_addr = ifr->ifr_addr.sa_family; } } #endif /* COMPAT_43 */ if ((oif_flags ^ ifp->if_flags) & IFF_UP) { #ifdef INET6 DELAY(100);/* XXX: temporary workaround for fxp issue*/ if (ifp->if_flags & IFF_UP) { int s = splimp(); in6_if_up(ifp); splx(s); } #endif } return (error); } /* * Set/clear promiscuous mode on interface ifp based on the truth value * of pswitch. The calls are reference counted so that only the first * "on" request actually has an effect, as does the final "off" request. * Results are undefined if the "off" and "on" requests are not matched. */ int ifpromisc(ifp, pswitch) struct ifnet *ifp; int pswitch; { struct ifreq ifr; int error; int oldflags, oldpcount; oldpcount = ifp->if_pcount; oldflags = ifp->if_flags; if (ifp->if_flags & IFF_PPROMISC) { /* Do nothing if device is in permanently promiscuous mode */ ifp->if_pcount += pswitch ? 1 : -1; return (0); } if (pswitch) { /* * If the device is not configured up, we cannot put it in * promiscuous mode. */ if ((ifp->if_flags & IFF_UP) == 0) return (ENETDOWN); if (ifp->if_pcount++ != 0) return (0); ifp->if_flags |= IFF_PROMISC; } else { if (--ifp->if_pcount > 0) return (0); ifp->if_flags &= ~IFF_PROMISC; } ifr.ifr_flags = ifp->if_flags & 0xffff; ifr.ifr_flagshigh = ifp->if_flags >> 16; error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr); if (error == 0) { log(LOG_INFO, "%s%d: promiscuous mode %s\n", ifp->if_name, ifp->if_unit, (ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled"); rt_ifmsg(ifp); } else { ifp->if_pcount = oldpcount; ifp->if_flags = oldflags; } return error; } /* * Return interface configuration * of system. List may be used * in later ioctl's (above) to get * other information. */ /*ARGSUSED*/ static int ifconf(cmd, data) u_long cmd; caddr_t data; { struct ifconf *ifc = (struct ifconf *)data; struct ifnet *ifp; struct ifaddr *ifa; struct ifreq ifr, *ifrp; int space = ifc->ifc_len, error = 0; ifrp = ifc->ifc_req; IFNET_RLOCK(); /* could sleep XXX */ TAILQ_FOREACH(ifp, &ifnet, if_link) { char workbuf[64]; int ifnlen, addrs; if (space < sizeof(ifr)) break; ifnlen = snprintf(workbuf, sizeof(workbuf), "%s%d", ifp->if_name, ifp->if_unit); if(ifnlen + 1 > sizeof ifr.ifr_name) { error = ENAMETOOLONG; break; } else { strcpy(ifr.ifr_name, workbuf); } addrs = 0; TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { struct sockaddr *sa = ifa->ifa_addr; if (space < sizeof(ifr)) break; if (jailed(curthread->td_ucred) && prison_if(curthread->td_ucred, sa)) continue; addrs++; #ifdef COMPAT_43 if (cmd == OSIOCGIFCONF) { struct osockaddr *osa = (struct osockaddr *)&ifr.ifr_addr; ifr.ifr_addr = *sa; osa->sa_family = sa->sa_family; error = copyout((caddr_t)&ifr, (caddr_t)ifrp, sizeof (ifr)); ifrp++; } else #endif if (sa->sa_len <= sizeof(*sa)) { ifr.ifr_addr = *sa; error = copyout((caddr_t)&ifr, (caddr_t)ifrp, sizeof (ifr)); ifrp++; } else { if (space < sizeof (ifr) + sa->sa_len - sizeof(*sa)) break; space -= sa->sa_len - sizeof(*sa); error = copyout((caddr_t)&ifr, (caddr_t)ifrp, sizeof (ifr.ifr_name)); if (error == 0) error = copyout((caddr_t)sa, (caddr_t)&ifrp->ifr_addr, sa->sa_len); ifrp = (struct ifreq *) (sa->sa_len + (caddr_t)&ifrp->ifr_addr); } if (error) break; space -= sizeof (ifr); } if (error) break; if (!addrs) { bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr)); error = copyout((caddr_t)&ifr, (caddr_t)ifrp, sizeof (ifr)); if (error) break; space -= sizeof (ifr); ifrp++; } } IFNET_RUNLOCK(); ifc->ifc_len -= space; return (error); } /* * Just like if_promisc(), but for all-multicast-reception mode. */ int if_allmulti(ifp, onswitch) struct ifnet *ifp; int onswitch; { int error = 0; int s = splimp(); struct ifreq ifr; if (onswitch) { if (ifp->if_amcount++ == 0) { ifp->if_flags |= IFF_ALLMULTI; ifr.ifr_flags = ifp->if_flags & 0xffff; ifr.ifr_flagshigh = ifp->if_flags >> 16; error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr); } } else { if (ifp->if_amcount > 1) { ifp->if_amcount--; } else { ifp->if_amcount = 0; ifp->if_flags &= ~IFF_ALLMULTI; ifr.ifr_flags = ifp->if_flags & 0xffff;; ifr.ifr_flagshigh = ifp->if_flags >> 16; error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr); } } splx(s); if (error == 0) rt_ifmsg(ifp); return error; } /* * Add a multicast listenership to the interface in question. * The link layer provides a routine which converts */ int if_addmulti(ifp, sa, retifma) struct ifnet *ifp; /* interface to manipulate */ struct sockaddr *sa; /* address to add */ struct ifmultiaddr **retifma; { struct sockaddr *llsa, *dupsa; int error, s; struct ifmultiaddr *ifma; /* * If the matching multicast address already exists * then don't add a new one, just add a reference */ TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (equal(sa, ifma->ifma_addr)) { ifma->ifma_refcount++; if (retifma) *retifma = ifma; return 0; } } /* * Give the link layer a chance to accept/reject it, and also * find out which AF_LINK address this maps to, if it isn't one * already. */ if (ifp->if_resolvemulti) { error = ifp->if_resolvemulti(ifp, &llsa, sa); if (error) return error; } else { llsa = 0; } MALLOC(ifma, struct ifmultiaddr *, sizeof *ifma, M_IFMADDR, M_WAITOK); MALLOC(dupsa, struct sockaddr *, sa->sa_len, M_IFMADDR, M_WAITOK); bcopy(sa, dupsa, sa->sa_len); ifma->ifma_addr = dupsa; ifma->ifma_lladdr = llsa; ifma->ifma_ifp = ifp; ifma->ifma_refcount = 1; ifma->ifma_protospec = 0; rt_newmaddrmsg(RTM_NEWMADDR, ifma); /* * Some network interfaces can scan the address list at * interrupt time; lock them out. */ s = splimp(); TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link); splx(s); if (retifma != NULL) *retifma = ifma; if (llsa != 0) { TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (equal(ifma->ifma_addr, llsa)) break; } if (ifma) { ifma->ifma_refcount++; } else { MALLOC(ifma, struct ifmultiaddr *, sizeof *ifma, M_IFMADDR, M_WAITOK); MALLOC(dupsa, struct sockaddr *, llsa->sa_len, M_IFMADDR, M_WAITOK); bcopy(llsa, dupsa, llsa->sa_len); ifma->ifma_addr = dupsa; ifma->ifma_lladdr = NULL; ifma->ifma_ifp = ifp; ifma->ifma_refcount = 1; ifma->ifma_protospec = 0; s = splimp(); TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link); splx(s); } } /* * We are certain we have added something, so call down to the * interface to let them know about it. */ s = splimp(); ifp->if_ioctl(ifp, SIOCADDMULTI, 0); splx(s); return 0; } /* * Remove a reference to a multicast address on this interface. Yell * if the request does not match an existing membership. */ int if_delmulti(ifp, sa) struct ifnet *ifp; struct sockaddr *sa; { struct ifmultiaddr *ifma; int s; TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) if (equal(sa, ifma->ifma_addr)) break; if (ifma == 0) return ENOENT; if (ifma->ifma_refcount > 1) { ifma->ifma_refcount--; return 0; } rt_newmaddrmsg(RTM_DELMADDR, ifma); sa = ifma->ifma_lladdr; s = splimp(); TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link); /* * Make sure the interface driver is notified * in the case of a link layer mcast group being left. */ if (ifma->ifma_addr->sa_family == AF_LINK && sa == 0) ifp->if_ioctl(ifp, SIOCDELMULTI, 0); splx(s); free(ifma->ifma_addr, M_IFMADDR); free(ifma, M_IFMADDR); if (sa == 0) return 0; /* * Now look for the link-layer address which corresponds to * this network address. It had been squirreled away in * ifma->ifma_lladdr for this purpose (so we don't have * to call ifp->if_resolvemulti() again), and we saved that * value in sa above. If some nasty deleted the * link-layer address out from underneath us, we can deal because * the address we stored was is not the same as the one which was * in the record for the link-layer address. (So we don't complain * in that case.) */ TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) if (equal(sa, ifma->ifma_addr)) break; if (ifma == 0) return 0; if (ifma->ifma_refcount > 1) { ifma->ifma_refcount--; return 0; } s = splimp(); TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link); ifp->if_ioctl(ifp, SIOCDELMULTI, 0); splx(s); free(ifma->ifma_addr, M_IFMADDR); free(sa, M_IFMADDR); free(ifma, M_IFMADDR); return 0; } /* * Set the link layer address on an interface. * * At this time we only support certain types of interfaces, * and we don't allow the length of the address to change. */ int if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len) { struct sockaddr_dl *sdl; struct ifaddr *ifa; struct ifreq ifr; ifa = ifaddr_byindex(ifp->if_index); if (ifa == NULL) return (EINVAL); sdl = (struct sockaddr_dl *)ifa->ifa_addr; if (sdl == NULL) return (EINVAL); if (len != sdl->sdl_alen) /* don't allow length to change */ return (EINVAL); switch (ifp->if_type) { case IFT_ETHER: /* these types use struct arpcom */ case IFT_FDDI: case IFT_XETHER: case IFT_ISO88025: case IFT_L2VLAN: bcopy(lladdr, ((struct arpcom *)ifp->if_softc)->ac_enaddr, len); /* FALLTHROUGH */ case IFT_ARCNET: bcopy(lladdr, LLADDR(sdl), len); break; default: return (ENODEV); } /* * If the interface is already up, we need * to re-init it in order to reprogram its * address filter. */ if ((ifp->if_flags & IFF_UP) != 0) { ifp->if_flags &= ~IFF_UP; ifr.ifr_flags = ifp->if_flags & 0xffff; ifr.ifr_flagshigh = ifp->if_flags >> 16; (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr); ifp->if_flags |= IFF_UP; ifr.ifr_flags = ifp->if_flags & 0xffff; ifr.ifr_flagshigh = ifp->if_flags >> 16; (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr); #ifdef INET /* * Also send gratuitous ARPs to notify other nodes about * the address change. */ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr != NULL && ifa->ifa_addr->sa_family == AF_INET) arp_ifinit(ifp, ifa); } #endif } return (0); } struct ifmultiaddr * ifmaof_ifpforaddr(sa, ifp) struct sockaddr *sa; struct ifnet *ifp; { struct ifmultiaddr *ifma; TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) if (equal(ifma->ifma_addr, sa)) break; return ifma; } int if_printf(struct ifnet *ifp, const char * fmt, ...) { va_list ap; int retval; retval = printf("%s%d: ", ifp->if_name, ifp->if_unit); va_start(ap, fmt); retval += vprintf(fmt, ap); va_end(ap); return (retval); } SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers"); SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management"); Index: head/sys/net/if_disc.c =================================================================== --- head/sys/net/if_disc.c (revision 120726) +++ head/sys/net/if_disc.c (revision 120727) @@ -1,254 +1,256 @@ /* * Copyright (c) 1982, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * From: @(#)if_loop.c 8.1 (Berkeley) 6/10/93 * $FreeBSD$ */ /* * Discard interface driver for protocol testing and timing. * (Based on the loopback.) */ #include #include #include #include #include #include #include #include #include #include #include #include #include "opt_inet.h" #include "opt_inet6.h" #ifdef TINY_DSMTU #define DSMTU (1024+512) #else #define DSMTU 65532 #endif #define DISCNAME "disc" struct disc_softc { struct ifnet sc_if; /* must be first */ LIST_ENTRY(disc_softc) sc_list; }; static int discoutput(struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *); static void discrtrequest(int, struct rtentry *, struct rt_addrinfo *); static int discioctl(struct ifnet *, u_long, caddr_t); static int disc_clone_create(struct if_clone *, int); static void disc_clone_destroy(struct ifnet *); static MALLOC_DEFINE(M_DISC, DISCNAME, "Discard interface"); static LIST_HEAD(, disc_softc) disc_softc_list; static struct if_clone disc_cloner = IF_CLONE_INITIALIZER(DISCNAME, disc_clone_create, disc_clone_destroy, 0, IF_MAXUNIT); static int disc_clone_create(struct if_clone *ifc, int unit) { struct ifnet *ifp; struct disc_softc *sc; sc = malloc(sizeof(struct disc_softc), M_DISC, M_WAITOK); bzero(sc, sizeof(struct disc_softc)); ifp = &sc->sc_if; ifp->if_softc = sc; ifp->if_name = DISCNAME; ifp->if_unit = unit; ifp->if_mtu = DSMTU; ifp->if_flags = IFF_LOOPBACK | IFF_MULTICAST; ifp->if_ioctl = discioctl; ifp->if_output = discoutput; ifp->if_type = IFT_LOOP; ifp->if_hdrlen = 0; ifp->if_addrlen = 0; ifp->if_snd.ifq_maxlen = 20; if_attach(ifp); bpfattach(ifp, DLT_NULL, sizeof(u_int)); LIST_INSERT_HEAD(&disc_softc_list, sc, sc_list); return (0); } static void disc_clone_destroy(struct ifnet *ifp) { struct disc_softc *sc; sc = ifp->if_softc; LIST_REMOVE(sc, sc_list); bpfdetach(ifp); if_detach(ifp); free(sc, M_DISC); } static int disc_modevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: LIST_INIT(&disc_softc_list); if_clone_attach(&disc_cloner); break; case MOD_UNLOAD: if_clone_detach(&disc_cloner); while (!LIST_EMPTY(&disc_softc_list)) disc_clone_destroy( &LIST_FIRST(&disc_softc_list)->sc_if); break; } return 0; } static moduledata_t disc_mod = { "if_disc", disc_modevent, NULL }; DECLARE_MODULE(if_disc, disc_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); static int discoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, struct rtentry *rt) { M_ASSERTPKTHDR(m); /* BPF write needs to be handled specially */ if (dst->sa_family == AF_UNSPEC) { dst->sa_family = *(mtod(m, int *)); m->m_len -= sizeof(int); m->m_pkthdr.len -= sizeof(int); m->m_data += sizeof(int); } if (ifp->if_bpf) { /* * We need to prepend the address family as * a four byte field. Cons up a dummy header * to pacify bpf. This is safe because bpf * will only read from the mbuf (i.e., it won't * try to free it or keep a pointer a to it). */ struct mbuf m0; u_int af = dst->sa_family; m0.m_next = m; m0.m_len = 4; m0.m_data = (char *)⁡ BPF_MTAP(ifp, &m0); } m->m_pkthdr.rcvif = ifp; ifp->if_opackets++; ifp->if_obytes += m->m_pkthdr.len; m_freem(m); return 0; } /* ARGSUSED */ static void discrtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info) { + RT_LOCK_ASSERT(rt); + if (rt) rt->rt_rmx.rmx_mtu = DSMTU; } /* * Process an ioctl request. */ static int discioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct ifaddr *ifa; struct ifreq *ifr = (struct ifreq *)data; int error = 0; switch (cmd) { case SIOCSIFADDR: ifp->if_flags |= IFF_UP; ifa = (struct ifaddr *)data; if (ifa != 0) ifa->ifa_rtrequest = discrtrequest; /* * Everything else is done at a higher level. */ break; case SIOCADDMULTI: case SIOCDELMULTI: if (ifr == 0) { error = EAFNOSUPPORT; /* XXX */ break; } switch (ifr->ifr_addr.sa_family) { #ifdef INET case AF_INET: break; #endif #ifdef INET6 case AF_INET6: break; #endif default: error = EAFNOSUPPORT; break; } break; case SIOCSIFMTU: ifp->if_mtu = ifr->ifr_mtu; break; default: error = EINVAL; } return (error); } Index: head/sys/net/if_faith.c =================================================================== --- head/sys/net/if_faith.c (revision 120726) +++ head/sys/net/if_faith.c (revision 120727) @@ -1,377 +1,379 @@ /* $KAME: if_faith.c,v 1.23 2001/12/17 13:55:29 sumikawa Exp $ */ /* * Copyright (c) 1982, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * derived from * @(#)if_loop.c 8.1 (Berkeley) 6/10/93 * Id: if_loop.c,v 1.22 1996/06/19 16:24:10 wollman Exp */ /* * Loopback interface driver for protocol testing and timing. */ #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET #include #include #include #include #endif #ifdef INET6 #ifndef INET #include #endif #include #include #include #endif #include #define FAITHNAME "faith" struct faith_softc { struct ifnet sc_if; /* must be first */ LIST_ENTRY(faith_softc) sc_list; }; static int faithioctl(struct ifnet *, u_long, caddr_t); int faithoutput(struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *); static void faithrtrequest(int, struct rtentry *, struct rt_addrinfo *); #ifdef INET6 static int faithprefix(struct in6_addr *); #endif static int faithmodevent(module_t, int, void *); static MALLOC_DEFINE(M_FAITH, FAITHNAME, "Firewall Assisted Tunnel Interface"); static LIST_HEAD(, faith_softc) faith_softc_list; int faith_clone_create(struct if_clone *, int); void faith_clone_destroy(struct ifnet *); struct if_clone faith_cloner = IF_CLONE_INITIALIZER(FAITHNAME, faith_clone_create, faith_clone_destroy, 0, IF_MAXUNIT); #define FAITHMTU 1500 static int faithmodevent(mod, type, data) module_t mod; int type; void *data; { switch (type) { case MOD_LOAD: LIST_INIT(&faith_softc_list); if_clone_attach(&faith_cloner); #ifdef INET6 faithprefix_p = faithprefix; #endif break; case MOD_UNLOAD: #ifdef INET6 faithprefix_p = NULL; #endif if_clone_detach(&faith_cloner); while (!LIST_EMPTY(&faith_softc_list)) faith_clone_destroy( &LIST_FIRST(&faith_softc_list)->sc_if); break; } return 0; } static moduledata_t faith_mod = { "if_faith", faithmodevent, 0 }; DECLARE_MODULE(if_faith, faith_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); MODULE_VERSION(if_faith, 1); int faith_clone_create(ifc, unit) struct if_clone *ifc; int unit; { struct faith_softc *sc; sc = malloc(sizeof(struct faith_softc), M_FAITH, M_WAITOK); bzero(sc, sizeof(struct faith_softc)); sc->sc_if.if_softc = sc; sc->sc_if.if_name = FAITHNAME; sc->sc_if.if_unit = unit; sc->sc_if.if_mtu = FAITHMTU; /* Change to BROADCAST experimentaly to announce its prefix. */ sc->sc_if.if_flags = /* IFF_LOOPBACK */ IFF_BROADCAST | IFF_MULTICAST; sc->sc_if.if_ioctl = faithioctl; sc->sc_if.if_output = faithoutput; sc->sc_if.if_type = IFT_FAITH; sc->sc_if.if_hdrlen = 0; sc->sc_if.if_addrlen = 0; sc->sc_if.if_snd.ifq_maxlen = ifqmaxlen; if_attach(&sc->sc_if); bpfattach(&sc->sc_if, DLT_NULL, sizeof(u_int)); LIST_INSERT_HEAD(&faith_softc_list, sc, sc_list); return (0); } void faith_clone_destroy(ifp) struct ifnet *ifp; { struct faith_softc *sc = (void *) ifp; LIST_REMOVE(sc, sc_list); bpfdetach(ifp); if_detach(ifp); free(sc, M_FAITH); } int faithoutput(ifp, m, dst, rt) struct ifnet *ifp; struct mbuf *m; struct sockaddr *dst; struct rtentry *rt; { int isr; M_ASSERTPKTHDR(m); /* BPF write needs to be handled specially */ if (dst->sa_family == AF_UNSPEC) { dst->sa_family = *(mtod(m, int *)); m->m_len -= sizeof(int); m->m_pkthdr.len -= sizeof(int); m->m_data += sizeof(int); } if (ifp->if_bpf) { /* * We need to prepend the address family as * a four byte field. Cons up a faith header * to pacify bpf. This is safe because bpf * will only read from the mbuf (i.e., it won't * try to free it or keep a pointer a to it). */ struct mbuf m0; u_int32_t af = dst->sa_family; m0.m_next = m; m0.m_len = 4; m0.m_data = (char *)⁡ BPF_MTAP(ifp, &m0); } if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) { m_freem(m); return (rt->rt_flags & RTF_BLACKHOLE ? 0 : rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); } ifp->if_opackets++; ifp->if_obytes += m->m_pkthdr.len; switch (dst->sa_family) { #ifdef INET case AF_INET: isr = NETISR_IP; break; #endif #ifdef INET6 case AF_INET6: isr = NETISR_IPV6; break; #endif default: m_freem(m); return EAFNOSUPPORT; } /* XXX do we need more sanity checks? */ m->m_pkthdr.rcvif = ifp; ifp->if_ipackets++; ifp->if_ibytes += m->m_pkthdr.len; netisr_dispatch(isr, m); return (0); } /* ARGSUSED */ static void faithrtrequest(cmd, rt, info) int cmd; struct rtentry *rt; struct rt_addrinfo *info; { + RT_LOCK_ASSERT(rt); + if (rt) { rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; /* for ISO */ /* * For optimal performance, the send and receive buffers * should be at least twice the MTU plus a little more for * overhead. */ rt->rt_rmx.rmx_recvpipe = rt->rt_rmx.rmx_sendpipe = 3 * FAITHMTU; } } /* * Process an ioctl request. */ /* ARGSUSED */ static int faithioctl(ifp, cmd, data) struct ifnet *ifp; u_long cmd; caddr_t data; { struct ifaddr *ifa; struct ifreq *ifr = (struct ifreq *)data; int error = 0; switch (cmd) { case SIOCSIFADDR: ifp->if_flags |= IFF_UP | IFF_RUNNING; ifa = (struct ifaddr *)data; ifa->ifa_rtrequest = faithrtrequest; /* * Everything else is done at a higher level. */ break; case SIOCADDMULTI: case SIOCDELMULTI: if (ifr == 0) { error = EAFNOSUPPORT; /* XXX */ break; } switch (ifr->ifr_addr.sa_family) { #ifdef INET case AF_INET: break; #endif #ifdef INET6 case AF_INET6: break; #endif default: error = EAFNOSUPPORT; break; } break; #ifdef SIOCSIFMTU case SIOCSIFMTU: ifp->if_mtu = ifr->ifr_mtu; break; #endif case SIOCSIFFLAGS: break; default: error = EINVAL; } return (error); } #ifdef INET6 /* * XXX could be slow * XXX could be layer violation to call sys/net from sys/netinet6 */ static int faithprefix(in6) struct in6_addr *in6; { struct rtentry *rt; struct sockaddr_in6 sin6; int ret; if (ip6_keepfaith == 0) return 0; bzero(&sin6, sizeof(sin6)); sin6.sin6_family = AF_INET6; sin6.sin6_len = sizeof(struct sockaddr_in6); sin6.sin6_addr = *in6; rt = rtalloc1((struct sockaddr *)&sin6, 0, 0UL); if (rt && rt->rt_ifp && rt->rt_ifp->if_type == IFT_FAITH && (rt->rt_ifp->if_flags & IFF_UP) != 0) ret = 1; else ret = 0; if (rt) - RTFREE(rt); + RTFREE_LOCKED(rt); return ret; } #endif Index: head/sys/net/if_loop.c =================================================================== --- head/sys/net/if_loop.c (revision 120726) +++ head/sys/net/if_loop.c (revision 120727) @@ -1,429 +1,431 @@ /* * Copyright (c) 1982, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)if_loop.c 8.2 (Berkeley) 1/9/95 * $FreeBSD$ */ /* * Loopback interface driver for protocol testing and timing. */ #include "opt_atalk.h" #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipx.h" #include "opt_mac.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET #include #include #endif #ifdef IPX #include #include #endif #ifdef INET6 #ifndef INET #include #endif #include #include #endif #ifdef NETATALK #include #include #endif #ifdef TINY_LOMTU #define LOMTU (1024+512) #elif defined(LARGE_LOMTU) #define LOMTU 131072 #else #define LOMTU 16384 #endif #define LONAME "lo" struct lo_softc { struct ifnet sc_if; /* network-visible interface */ LIST_ENTRY(lo_softc) sc_next; }; int loioctl(struct ifnet *, u_long, caddr_t); static void lortrequest(int, struct rtentry *, struct rt_addrinfo *); int looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, struct rtentry *rt); int lo_clone_create(struct if_clone *, int); void lo_clone_destroy(struct ifnet *); struct ifnet *loif = NULL; /* Used externally */ static MALLOC_DEFINE(M_LO, LONAME, "Loopback Interface"); static LIST_HEAD(lo_list, lo_softc) lo_list; struct if_clone lo_cloner = IF_CLONE_INITIALIZER(LONAME, lo_clone_create, lo_clone_destroy, 1, IF_MAXUNIT); void lo_clone_destroy(ifp) struct ifnet *ifp; { struct lo_softc *sc; sc = ifp->if_softc; /* XXX: destroying lo0 will lead to panics. */ KASSERT(loif != ifp, ("%s: destroying lo0", __func__)); bpfdetach(ifp); if_detach(ifp); LIST_REMOVE(sc, sc_next); free(sc, M_LO); } int lo_clone_create(ifc, unit) struct if_clone *ifc; int unit; { struct lo_softc *sc; MALLOC(sc, struct lo_softc *, sizeof(*sc), M_LO, M_WAITOK | M_ZERO); sc->sc_if.if_name = LONAME; sc->sc_if.if_unit = unit; sc->sc_if.if_mtu = LOMTU; sc->sc_if.if_flags = IFF_LOOPBACK | IFF_MULTICAST; sc->sc_if.if_ioctl = loioctl; sc->sc_if.if_output = looutput; sc->sc_if.if_type = IFT_LOOP; sc->sc_if.if_snd.ifq_maxlen = ifqmaxlen; sc->sc_if.if_softc = sc; if_attach(&sc->sc_if); bpfattach(&sc->sc_if, DLT_NULL, sizeof(u_int)); LIST_INSERT_HEAD(&lo_list, sc, sc_next); if (loif == NULL) loif = &sc->sc_if; return (0); } static int loop_modevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: LIST_INIT(&lo_list); if_clone_attach(&lo_cloner); break; case MOD_UNLOAD: printf("loop module unload - not possible for this module type\n"); return EINVAL; } return 0; } static moduledata_t loop_mod = { "loop", loop_modevent, 0 }; DECLARE_MODULE(loop, loop_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); int looutput(ifp, m, dst, rt) struct ifnet *ifp; register struct mbuf *m; struct sockaddr *dst; register struct rtentry *rt; { #ifdef INET6 struct mbuf *n; #endif M_ASSERTPKTHDR(m); /* check if we have the packet header */ if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) { m_freem(m); return (rt->rt_flags & RTF_BLACKHOLE ? 0 : rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); } #ifdef INET6 /* * KAME requires that the packet to be contiguous on the * mbuf. We need to make that sure. * this kind of code should be avoided. * * XXX: KAME may no longer need contiguous packets. Once * that has been verified, the following code _should_ be * removed. */ if (m && m->m_next != NULL) { n = m_defrag(m, M_DONTWAIT); if (n == NULL) { m_freem(m); return (ENOBUFS); } else { m = n; } } #endif ifp->if_opackets++; ifp->if_obytes += m->m_pkthdr.len; #if 1 /* XXX */ switch (dst->sa_family) { case AF_INET: case AF_INET6: case AF_IPX: case AF_APPLETALK: break; default: printf("looutput: af=%d unexpected\n", dst->sa_family); m_freem(m); return (EAFNOSUPPORT); } #endif return(if_simloop(ifp, m, dst->sa_family, 0)); } /* * if_simloop() * * This function is to support software emulation of hardware loopback, * i.e., for interfaces with the IFF_SIMPLEX attribute. Since they can't * hear their own broadcasts, we create a copy of the packet that we * would normally receive via a hardware loopback. * * This function expects the packet to include the media header of length hlen. */ int if_simloop(ifp, m, af, hlen) struct ifnet *ifp; struct mbuf *m; int af; int hlen; { int isr; M_ASSERTPKTHDR(m); m->m_pkthdr.rcvif = ifp; /* BPF write needs to be handled specially */ if (af == AF_UNSPEC) { KASSERT(m->m_len >= sizeof(int), ("if_simloop: m_len")); af = *(mtod(m, int *)); m->m_len -= sizeof(int); m->m_pkthdr.len -= sizeof(int); m->m_data += sizeof(int); } /* Let BPF see incoming packet */ if (ifp->if_bpf) { struct mbuf m0, *n = m; if (ifp->if_bpf->bif_dlt == DLT_NULL) { /* * We need to prepend the address family as * a four byte field. Cons up a dummy header * to pacify bpf. This is safe because bpf * will only read from the mbuf (i.e., it won't * try to free it or keep a pointer a to it). */ m0.m_next = m; m0.m_len = 4; m0.m_data = (char *)⁡ n = &m0; } BPF_MTAP(ifp, n); } /* Strip away media header */ if (hlen > 0) { m_adj(m, hlen); #if defined(__alpha__) || defined(__ia64__) || defined(__sparc64__) /* The alpha doesn't like unaligned data. * We move data down in the first mbuf */ if (mtod(m, vm_offset_t) & 3) { KASSERT(hlen >= 3, ("if_simloop: hlen too small")); bcopy(m->m_data, (char *)(mtod(m, vm_offset_t) - (mtod(m, vm_offset_t) & 3)), m->m_len); mtod(m,vm_offset_t) -= (mtod(m, vm_offset_t) & 3); } #endif } /* Deliver to upper layer protocol */ switch (af) { #ifdef INET case AF_INET: isr = NETISR_IP; break; #endif #ifdef INET6 case AF_INET6: m->m_flags |= M_LOOP; isr = NETISR_IPV6; break; #endif #ifdef IPX case AF_IPX: isr = NETISR_IPX; break; #endif #ifdef NETATALK case AF_APPLETALK: isr = NETISR_ATALK2; break; #endif default: printf("if_simloop: can't handle af=%d\n", af); m_freem(m); return (EAFNOSUPPORT); } ifp->if_ipackets++; ifp->if_ibytes += m->m_pkthdr.len; netisr_dispatch(isr, m); return (0); } /* ARGSUSED */ static void lortrequest(cmd, rt, info) int cmd; struct rtentry *rt; struct rt_addrinfo *info; { + RT_LOCK_ASSERT(rt); + if (rt) { rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; /* for ISO */ /* * For optimal performance, the send and receive buffers * should be at least twice the MTU plus a little more for * overhead. */ rt->rt_rmx.rmx_recvpipe = rt->rt_rmx.rmx_sendpipe = 3 * LOMTU; } } /* * Process an ioctl request. */ /* ARGSUSED */ int loioctl(ifp, cmd, data) register struct ifnet *ifp; u_long cmd; caddr_t data; { register struct ifaddr *ifa; register struct ifreq *ifr = (struct ifreq *)data; register int error = 0; switch (cmd) { case SIOCSIFADDR: ifp->if_flags |= IFF_UP | IFF_RUNNING; ifa = (struct ifaddr *)data; ifa->ifa_rtrequest = lortrequest; /* * Everything else is done at a higher level. */ break; case SIOCADDMULTI: case SIOCDELMULTI: if (ifr == 0) { error = EAFNOSUPPORT; /* XXX */ break; } switch (ifr->ifr_addr.sa_family) { #ifdef INET case AF_INET: break; #endif #ifdef INET6 case AF_INET6: break; #endif default: error = EAFNOSUPPORT; break; } break; case SIOCSIFMTU: ifp->if_mtu = ifr->ifr_mtu; break; case SIOCSIFFLAGS: break; default: error = EINVAL; } return (error); } Index: head/sys/net/if_stf.c =================================================================== --- head/sys/net/if_stf.c (revision 120726) +++ head/sys/net/if_stf.c (revision 120727) @@ -1,776 +1,777 @@ /* $FreeBSD$ */ /* $KAME: if_stf.c,v 1.73 2001/12/03 11:08:30 keiichi Exp $ */ /* * Copyright (C) 2000 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * 6to4 interface, based on RFC3056. * * 6to4 interface is NOT capable of link-layer (I mean, IPv4) multicasting. * There is no address mapping defined from IPv6 multicast address to IPv4 * address. Therefore, we do not have IFF_MULTICAST on the interface. * * Due to the lack of address mapping for link-local addresses, we cannot * throw packets toward link-local addresses (fe80::x). Also, we cannot throw * packets to link-local multicast addresses (ff02::x). * * Here are interesting symptoms due to the lack of link-local address: * * Unicast routing exchange: * - RIPng: Impossible. Uses link-local multicast packet toward ff02::9, * and link-local addresses as nexthop. * - OSPFv6: Impossible. OSPFv6 assumes that there's link-local address * assigned to the link, and makes use of them. Also, HELLO packets use * link-local multicast addresses (ff02::5 and ff02::6). * - BGP4+: Maybe. You can only use global address as nexthop, and global * address as TCP endpoint address. * * Multicast routing protocols: * - PIM: Hello packet cannot be used to discover adjacent PIM routers. * Adjacent PIM routers must be configured manually (is it really spec-wise * correct thing to do?). * * ICMPv6: * - Redirects cannot be used due to the lack of link-local address. * * stf interface does not have, and will not need, a link-local address. * It seems to have no real benefit and does not help the above symptoms much. * Even if we assign link-locals to interface, we cannot really * use link-local unicast/multicast on top of 6to4 cloud (since there's no * encapsulation defined for link-local address), and the above analysis does * not change. RFC3056 does not mandate the assignment of link-local address * either. * * 6to4 interface has security issues. Refer to * http://playground.iijlab.net/i-d/draft-itojun-ipv6-transition-abuse-00.txt * for details. The code tries to filter out some of malicious packets. * Note that there is no way to be 100% secure. */ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_mac.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define STFNAME "stf" #define IN6_IS_ADDR_6TO4(x) (ntohs((x)->s6_addr16[0]) == 0x2002) /* * XXX: Return a pointer with 16-bit aligned. Don't cast it to * struct in_addr *; use bcopy() instead. */ #define GET_V4(x) ((caddr_t)(&(x)->s6_addr16[1])) struct stf_softc { struct ifnet sc_if; /* common area */ union { struct route __sc_ro4; struct route_in6 __sc_ro6; /* just for safety */ } __sc_ro46; #define sc_ro __sc_ro46.__sc_ro4 const struct encaptab *encap_cookie; LIST_ENTRY(stf_softc) sc_list; /* all stf's are linked */ }; static LIST_HEAD(, stf_softc) stf_softc_list; static MALLOC_DEFINE(M_STF, STFNAME, "6to4 Tunnel Interface"); static int ip_stf_ttl = 40; extern struct domain inetdomain; struct protosw in_stf_protosw = { SOCK_RAW, &inetdomain, IPPROTO_IPV6, PR_ATOMIC|PR_ADDR, in_stf_input, (pr_output_t*)rip_output, 0, rip_ctloutput, 0, 0, 0, 0, 0, &rip_usrreqs }; static int stfmodevent(module_t, int, void *); static int stf_encapcheck(const struct mbuf *, int, int, void *); static struct in6_ifaddr *stf_getsrcifa6(struct ifnet *); static int stf_output(struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *); static int isrfc1918addr(struct in_addr *); static int stf_checkaddr4(struct stf_softc *, struct in_addr *, struct ifnet *); static int stf_checkaddr6(struct stf_softc *, struct in6_addr *, struct ifnet *); static void stf_rtrequest(int, struct rtentry *, struct rt_addrinfo *); static int stf_ioctl(struct ifnet *, u_long, caddr_t); int stf_clone_create(struct if_clone *, int); void stf_clone_destroy(struct ifnet *); /* only one clone is currently allowed */ struct if_clone stf_cloner = IF_CLONE_INITIALIZER(STFNAME, stf_clone_create, stf_clone_destroy, 0, 0); int stf_clone_create(ifc, unit) struct if_clone *ifc; int unit; { struct stf_softc *sc; sc = malloc(sizeof(struct stf_softc), M_STF, M_WAITOK | M_ZERO); sc->sc_if.if_name = STFNAME; sc->sc_if.if_unit = unit; sc->encap_cookie = encap_attach_func(AF_INET, IPPROTO_IPV6, stf_encapcheck, &in_stf_protosw, sc); if (sc->encap_cookie == NULL) { printf("%s: attach failed\n", if_name(&sc->sc_if)); free(sc, M_STF); return (ENOMEM); } sc->sc_if.if_mtu = IPV6_MMTU; sc->sc_if.if_ioctl = stf_ioctl; sc->sc_if.if_output = stf_output; sc->sc_if.if_type = IFT_STF; sc->sc_if.if_snd.ifq_maxlen = IFQ_MAXLEN; if_attach(&sc->sc_if); bpfattach(&sc->sc_if, DLT_NULL, sizeof(u_int)); LIST_INSERT_HEAD(&stf_softc_list, sc, sc_list); return (0); } void stf_clone_destroy(ifp) struct ifnet *ifp; { int err; struct stf_softc *sc = (void *) ifp; LIST_REMOVE(sc, sc_list); err = encap_detach(sc->encap_cookie); KASSERT(err == 0, ("Unexpected error detaching encap_cookie")); bpfdetach(ifp); if_detach(ifp); free(sc, M_STF); } static int stfmodevent(mod, type, data) module_t mod; int type; void *data; { switch (type) { case MOD_LOAD: LIST_INIT(&stf_softc_list); if_clone_attach(&stf_cloner); break; case MOD_UNLOAD: if_clone_detach(&stf_cloner); while (!LIST_EMPTY(&stf_softc_list)) stf_clone_destroy(&LIST_FIRST(&stf_softc_list)->sc_if); break; } return (0); } static moduledata_t stf_mod = { "if_stf", stfmodevent, 0 }; DECLARE_MODULE(if_stf, stf_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); static int stf_encapcheck(m, off, proto, arg) const struct mbuf *m; int off; int proto; void *arg; { struct ip ip; struct in6_ifaddr *ia6; struct stf_softc *sc; struct in_addr a, b, mask; sc = (struct stf_softc *)arg; if (sc == NULL) return 0; if ((sc->sc_if.if_flags & IFF_UP) == 0) return 0; /* IFF_LINK0 means "no decapsulation" */ if ((sc->sc_if.if_flags & IFF_LINK0) != 0) return 0; if (proto != IPPROTO_IPV6) return 0; /* LINTED const cast */ m_copydata((struct mbuf *)(uintptr_t)m, 0, sizeof(ip), (caddr_t)&ip); if (ip.ip_v != 4) return 0; ia6 = stf_getsrcifa6(&sc->sc_if); if (ia6 == NULL) return 0; /* * check if IPv4 dst matches the IPv4 address derived from the * local 6to4 address. * success on: dst = 10.1.1.1, ia6->ia_addr = 2002:0a01:0101:... */ if (bcmp(GET_V4(&ia6->ia_addr.sin6_addr), &ip.ip_dst, sizeof(ip.ip_dst)) != 0) return 0; /* * check if IPv4 src matches the IPv4 address derived from the * local 6to4 address masked by prefixmask. * success on: src = 10.1.1.1, ia6->ia_addr = 2002:0a00:.../24 * fail on: src = 10.1.1.1, ia6->ia_addr = 2002:0b00:.../24 */ bzero(&a, sizeof(a)); bcopy(GET_V4(&ia6->ia_addr.sin6_addr), &a, sizeof(a)); bcopy(GET_V4(&ia6->ia_prefixmask.sin6_addr), &mask, sizeof(mask)); a.s_addr &= mask.s_addr; b = ip.ip_src; b.s_addr &= mask.s_addr; if (a.s_addr != b.s_addr) return 0; /* stf interface makes single side match only */ return 32; } static struct in6_ifaddr * stf_getsrcifa6(ifp) struct ifnet *ifp; { struct ifaddr *ia; struct in_ifaddr *ia4; struct sockaddr_in6 *sin6; struct in_addr in; for (ia = TAILQ_FIRST(&ifp->if_addrlist); ia; ia = TAILQ_NEXT(ia, ifa_list)) { if (ia->ifa_addr == NULL) continue; if (ia->ifa_addr->sa_family != AF_INET6) continue; sin6 = (struct sockaddr_in6 *)ia->ifa_addr; if (!IN6_IS_ADDR_6TO4(&sin6->sin6_addr)) continue; bcopy(GET_V4(&sin6->sin6_addr), &in, sizeof(in)); LIST_FOREACH(ia4, INADDR_HASH(in.s_addr), ia_hash) if (ia4->ia_addr.sin_addr.s_addr == in.s_addr) break; if (ia4 == NULL) continue; return (struct in6_ifaddr *)ia; } return NULL; } static int stf_output(ifp, m, dst, rt) struct ifnet *ifp; struct mbuf *m; struct sockaddr *dst; struct rtentry *rt; { struct stf_softc *sc; struct sockaddr_in6 *dst6; struct in_addr in4; caddr_t ptr; struct sockaddr_in *dst4; u_int8_t tos; struct ip *ip; struct ip6_hdr *ip6; struct in6_ifaddr *ia6; #ifdef MAC int error; error = mac_check_ifnet_transmit(ifp, m); if (error) { m_freem(m); return (error); } #endif sc = (struct stf_softc*)ifp; dst6 = (struct sockaddr_in6 *)dst; /* just in case */ if ((ifp->if_flags & IFF_UP) == 0) { m_freem(m); ifp->if_oerrors++; return ENETDOWN; } /* * If we don't have an ip4 address that match my inner ip6 address, * we shouldn't generate output. Without this check, we'll end up * using wrong IPv4 source. */ ia6 = stf_getsrcifa6(ifp); if (ia6 == NULL) { m_freem(m); ifp->if_oerrors++; return ENETDOWN; } if (m->m_len < sizeof(*ip6)) { m = m_pullup(m, sizeof(*ip6)); if (!m) { ifp->if_oerrors++; return ENOBUFS; } } ip6 = mtod(m, struct ip6_hdr *); tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff; /* * Pickup the right outer dst addr from the list of candidates. * ip6_dst has priority as it may be able to give us shorter IPv4 hops. */ ptr = NULL; if (IN6_IS_ADDR_6TO4(&ip6->ip6_dst)) ptr = GET_V4(&ip6->ip6_dst); else if (IN6_IS_ADDR_6TO4(&dst6->sin6_addr)) ptr = GET_V4(&dst6->sin6_addr); else { m_freem(m); ifp->if_oerrors++; return ENETUNREACH; } bcopy(ptr, &in4, sizeof(in4)); #if NBPFILTER > 0 if (ifp->if_bpf) { /* * We need to prepend the address family as * a four byte field. Cons up a dummy header * to pacify bpf. This is safe because bpf * will only read from the mbuf (i.e., it won't * try to free it or keep a pointer a to it). */ struct mbuf m0; u_int32_t af = AF_INET6; m0.m_next = m; m0.m_len = 4; m0.m_data = (char *)⁡ #ifdef HAVE_OLD_BPF BPF_MTAP(ifp, &m0); #else bpf_mtap(ifp->if_bpf, &m0); #endif } #endif /*NBPFILTER > 0*/ M_PREPEND(m, sizeof(struct ip), M_DONTWAIT); if (m && m->m_len < sizeof(struct ip)) m = m_pullup(m, sizeof(struct ip)); if (m == NULL) { ifp->if_oerrors++; return ENOBUFS; } ip = mtod(m, struct ip *); bzero(ip, sizeof(*ip)); bcopy(GET_V4(&((struct sockaddr_in6 *)&ia6->ia_addr)->sin6_addr), &ip->ip_src, sizeof(ip->ip_src)); bcopy(&in4, &ip->ip_dst, sizeof(ip->ip_dst)); ip->ip_p = IPPROTO_IPV6; ip->ip_ttl = ip_stf_ttl; ip->ip_len = m->m_pkthdr.len; /*host order*/ if (ifp->if_flags & IFF_LINK1) ip_ecn_ingress(ECN_ALLOWED, &ip->ip_tos, &tos); else ip_ecn_ingress(ECN_NOCARE, &ip->ip_tos, &tos); dst4 = (struct sockaddr_in *)&sc->sc_ro.ro_dst; if (dst4->sin_family != AF_INET || bcmp(&dst4->sin_addr, &ip->ip_dst, sizeof(ip->ip_dst)) != 0) { /* cache route doesn't match */ dst4->sin_family = AF_INET; dst4->sin_len = sizeof(struct sockaddr_in); bcopy(&ip->ip_dst, &dst4->sin_addr, sizeof(dst4->sin_addr)); if (sc->sc_ro.ro_rt) { RTFREE(sc->sc_ro.ro_rt); sc->sc_ro.ro_rt = NULL; } } if (sc->sc_ro.ro_rt == NULL) { rtalloc(&sc->sc_ro); if (sc->sc_ro.ro_rt == NULL) { m_freem(m); ifp->if_oerrors++; return ENETUNREACH; } } ifp->if_opackets++; return ip_output(m, NULL, &sc->sc_ro, 0, NULL, NULL); } static int isrfc1918addr(in) struct in_addr *in; { /* * returns 1 if private address range: * 10.0.0.0/8 172.16.0.0/12 192.168.0.0/16 */ if ((ntohl(in->s_addr) & 0xff000000) >> 24 == 10 || (ntohl(in->s_addr) & 0xfff00000) >> 16 == 172 * 256 + 16 || (ntohl(in->s_addr) & 0xffff0000) >> 16 == 192 * 256 + 168) return 1; return 0; } static int stf_checkaddr4(sc, in, inifp) struct stf_softc *sc; struct in_addr *in; struct ifnet *inifp; /* incoming interface */ { struct in_ifaddr *ia4; /* * reject packets with the following address: * 224.0.0.0/4 0.0.0.0/8 127.0.0.0/8 255.0.0.0/8 */ if (IN_MULTICAST(ntohl(in->s_addr))) return -1; switch ((ntohl(in->s_addr) & 0xff000000) >> 24) { case 0: case 127: case 255: return -1; } /* * reject packets with private address range. * (requirement from RFC3056 section 2 1st paragraph) */ if (isrfc1918addr(in)) return -1; /* * reject packets with broadcast */ for (ia4 = TAILQ_FIRST(&in_ifaddrhead); ia4; ia4 = TAILQ_NEXT(ia4, ia_link)) { if ((ia4->ia_ifa.ifa_ifp->if_flags & IFF_BROADCAST) == 0) continue; if (in->s_addr == ia4->ia_broadaddr.sin_addr.s_addr) return -1; } /* * perform ingress filter */ if (sc && (sc->sc_if.if_flags & IFF_LINK2) == 0 && inifp) { struct sockaddr_in sin; struct rtentry *rt; bzero(&sin, sizeof(sin)); sin.sin_family = AF_INET; sin.sin_len = sizeof(struct sockaddr_in); sin.sin_addr = *in; rt = rtalloc1((struct sockaddr *)&sin, 0, 0UL); if (!rt || rt->rt_ifp != inifp) { #if 0 log(LOG_WARNING, "%s: packet from 0x%x dropped " "due to ingress filter\n", if_name(&sc->sc_if), (u_int32_t)ntohl(sin.sin_addr.s_addr)); #endif if (rt) rtfree(rt); return -1; } rtfree(rt); } return 0; } static int stf_checkaddr6(sc, in6, inifp) struct stf_softc *sc; struct in6_addr *in6; struct ifnet *inifp; /* incoming interface */ { /* * check 6to4 addresses */ if (IN6_IS_ADDR_6TO4(in6)) { struct in_addr in4; bcopy(GET_V4(in6), &in4, sizeof(in4)); return stf_checkaddr4(sc, &in4, inifp); } /* * reject anything that look suspicious. the test is implemented * in ip6_input too, but we check here as well to * (1) reject bad packets earlier, and * (2) to be safe against future ip6_input change. */ if (IN6_IS_ADDR_V4COMPAT(in6) || IN6_IS_ADDR_V4MAPPED(in6)) return -1; return 0; } void in_stf_input(m, off) struct mbuf *m; int off; { int proto; struct stf_softc *sc; struct ip *ip; struct ip6_hdr *ip6; u_int8_t otos, itos; struct ifnet *ifp; proto = mtod(m, struct ip *)->ip_p; if (proto != IPPROTO_IPV6) { m_freem(m); return; } ip = mtod(m, struct ip *); sc = (struct stf_softc *)encap_getarg(m); if (sc == NULL || (sc->sc_if.if_flags & IFF_UP) == 0) { m_freem(m); return; } ifp = &sc->sc_if; #ifdef MAC mac_create_mbuf_from_ifnet(ifp, m); #endif /* * perform sanity check against outer src/dst. * for source, perform ingress filter as well. */ if (stf_checkaddr4(sc, &ip->ip_dst, NULL) < 0 || stf_checkaddr4(sc, &ip->ip_src, m->m_pkthdr.rcvif) < 0) { m_freem(m); return; } otos = ip->ip_tos; m_adj(m, off); if (m->m_len < sizeof(*ip6)) { m = m_pullup(m, sizeof(*ip6)); if (!m) return; } ip6 = mtod(m, struct ip6_hdr *); /* * perform sanity check against inner src/dst. * for source, perform ingress filter as well. */ if (stf_checkaddr6(sc, &ip6->ip6_dst, NULL) < 0 || stf_checkaddr6(sc, &ip6->ip6_src, m->m_pkthdr.rcvif) < 0) { m_freem(m); return; } itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff; if ((ifp->if_flags & IFF_LINK1) != 0) ip_ecn_egress(ECN_ALLOWED, &otos, &itos); else ip_ecn_egress(ECN_NOCARE, &otos, &itos); ip6->ip6_flow &= ~htonl(0xff << 20); ip6->ip6_flow |= htonl((u_int32_t)itos << 20); m->m_pkthdr.rcvif = ifp; if (ifp->if_bpf) { /* * We need to prepend the address family as * a four byte field. Cons up a dummy header * to pacify bpf. This is safe because bpf * will only read from the mbuf (i.e., it won't * try to free it or keep a pointer a to it). */ struct mbuf m0; u_int32_t af = AF_INET6; m0.m_next = m; m0.m_len = 4; m0.m_data = (char *)⁡ #ifdef HAVE_OLD_BPF BPF_MTAP(ifp, &m0); #else bpf_mtap(ifp->if_bpf, &m0); #endif } /* * Put the packet to the network layer input queue according to the * specified address family. * See net/if_gif.c for possible issues with packet processing * reorder due to extra queueing. */ ifp->if_ipackets++; ifp->if_ibytes += m->m_pkthdr.len; netisr_dispatch(NETISR_IPV6, m); } /* ARGSUSED */ static void stf_rtrequest(cmd, rt, info) int cmd; struct rtentry *rt; struct rt_addrinfo *info; { + RT_LOCK_ASSERT(rt); if (rt) rt->rt_rmx.rmx_mtu = IPV6_MMTU; } static int stf_ioctl(ifp, cmd, data) struct ifnet *ifp; u_long cmd; caddr_t data; { struct ifaddr *ifa; struct ifreq *ifr; struct sockaddr_in6 *sin6; struct in_addr addr; int error; error = 0; switch (cmd) { case SIOCSIFADDR: ifa = (struct ifaddr *)data; if (ifa == NULL || ifa->ifa_addr->sa_family != AF_INET6) { error = EAFNOSUPPORT; break; } sin6 = (struct sockaddr_in6 *)ifa->ifa_addr; if (!IN6_IS_ADDR_6TO4(&sin6->sin6_addr)) { error = EINVAL; break; } bcopy(GET_V4(&sin6->sin6_addr), &addr, sizeof(addr)); if (isrfc1918addr(&addr)) { error = EINVAL; break; } ifa->ifa_rtrequest = stf_rtrequest; ifp->if_flags |= IFF_UP; break; case SIOCADDMULTI: case SIOCDELMULTI: ifr = (struct ifreq *)data; if (ifr && ifr->ifr_addr.sa_family == AF_INET6) ; else error = EAFNOSUPPORT; break; default: error = EINVAL; break; } return error; } Index: head/sys/net/route.c =================================================================== --- head/sys/net/route.c (revision 120726) +++ head/sys/net/route.c (revision 120727) @@ -1,1187 +1,1220 @@ /* * Copyright (c) 1980, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)route.c 8.3.1.1 (Berkeley) 2/23/95 * $FreeBSD$ */ #include "opt_inet.h" #include "opt_mrouting.h" #include #include #include #include #include #include #include #include #include #include #include #define SA(p) ((struct sockaddr *)(p)) static struct rtstat rtstat; struct radix_node_head *rt_tables[AF_MAX+1]; static int rttrash; /* routes not in table but not freed */ static void rt_maskedcopy(struct sockaddr *, struct sockaddr *, struct sockaddr *); static void rtable_init(void **); static void -rtable_init(table) - void **table; +rtable_init(void **table) { struct domain *dom; for (dom = domains; dom; dom = dom->dom_next) if (dom->dom_rtattach) dom->dom_rtattach(&table[dom->dom_family], dom->dom_rtoffset); } void route_init() { rn_init(); /* initialize all zeroes, all ones, mask table */ rtable_init((void **)rt_tables); } /* * Packet routing routines. */ void -rtalloc(ro) - register struct route *ro; +rtalloc(struct route *ro) { rtalloc_ign(ro, 0UL); } void -rtalloc_ign(ro, ignore) - register struct route *ro; - u_long ignore; +rtalloc_ign(struct route *ro, u_long ignore) { struct rtentry *rt; - int s; if ((rt = ro->ro_rt) != NULL) { if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP) return; - /* XXX - We are probably always at splnet here already. */ - s = splnet(); RTFREE(rt); ro->ro_rt = NULL; - splx(s); } ro->ro_rt = rtalloc1(&ro->ro_dst, 1, ignore); + if (ro->ro_rt) + RT_UNLOCK(ro->ro_rt); } /* * Look up the route that matches the address given * Or, at least try.. Create a cloned route if needed. + * + * The returned route, if any, is locked. */ struct rtentry * -rtalloc1(dst, report, ignflags) - register struct sockaddr *dst; - int report; - u_long ignflags; +rtalloc1(struct sockaddr *dst, int report, u_long ignflags) { - register struct radix_node_head *rnh = rt_tables[dst->sa_family]; - register struct rtentry *rt; - register struct radix_node *rn; - struct rtentry *newrt = 0; + struct radix_node_head *rnh = rt_tables[dst->sa_family]; + struct rtentry *rt; + struct radix_node *rn; + struct rtentry *newrt; struct rt_addrinfo info; u_long nflags; - int s = splnet(), err = 0, msgtype = RTM_MISS; + int err = 0, msgtype = RTM_MISS; + newrt = 0; /* * Look up the address in the table for that Address Family */ if (rnh == NULL) { rtstat.rts_unreach++; goto miss2; } + bzero(&info, sizeof(info)); RADIX_NODE_HEAD_LOCK(rnh); - if ((rn = rnh->rnh_matchaddr((caddr_t)dst, rnh)) && - ((rn->rn_flags & RNF_ROOT) == 0)) { + if ((rn = rnh->rnh_matchaddr(dst, rnh)) && + (rn->rn_flags & RNF_ROOT) == 0) { /* * If we find it and it's not the root node, then * get a refernce on the rtentry associated. */ newrt = rt = (struct rtentry *)rn; nflags = rt->rt_flags & ~ignflags; if (report && (nflags & (RTF_CLONING | RTF_PRCLONING))) { /* * We are apparently adding (report = 0 in delete). * If it requires that it be cloned, do so. * (This implies it wasn't a HOST route.) */ err = rtrequest(RTM_RESOLVE, dst, SA(0), SA(0), 0, &newrt); if (err) { /* * If the cloning didn't succeed, maybe * what we have will do. Return that. */ - newrt = rt; - rt->rt_refcnt++; + newrt = rt; /* existing route */ + RT_LOCK(newrt); + newrt->rt_refcnt++; goto miss; } - if ((rt = newrt) && (rt->rt_flags & RTF_XRESOLVE)) { + KASSERT(newrt, ("no route and no error")); + RT_LOCK(newrt); + if (newrt->rt_flags & RTF_XRESOLVE) { /* * If the new route specifies it be * externally resolved, then go do that. */ msgtype = RTM_RESOLVE; goto miss; } /* Inform listeners of the new route. */ - bzero(&info, sizeof(info)); - info.rti_info[RTAX_DST] = rt_key(rt); - info.rti_info[RTAX_NETMASK] = rt_mask(rt); - info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; - if (rt->rt_ifp != NULL) { + info.rti_info[RTAX_DST] = rt_key(newrt); + info.rti_info[RTAX_NETMASK] = rt_mask(newrt); + info.rti_info[RTAX_GATEWAY] = newrt->rt_gateway; + if (newrt->rt_ifp != NULL) { info.rti_info[RTAX_IFP] = - TAILQ_FIRST(&rt->rt_ifp->if_addrhead)->ifa_addr; - info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; + TAILQ_FIRST(&newrt->rt_ifp->if_addrhead)->ifa_addr; + info.rti_info[RTAX_IFA] = newrt->rt_ifa->ifa_addr; } - rt_missmsg(RTM_ADD, &info, rt->rt_flags, 0); - } else - rt->rt_refcnt++; + rt_missmsg(RTM_ADD, &info, newrt->rt_flags, 0); + } else { + KASSERT(rt == newrt, ("locking wrong route")); + RT_LOCK(newrt); + newrt->rt_refcnt++; + } RADIX_NODE_HEAD_UNLOCK(rnh); } else { /* * Either we hit the root or couldn't find any match, * Which basically means * "caint get there frm here" */ rtstat.rts_unreach++; miss: RADIX_NODE_HEAD_UNLOCK(rnh); miss2: if (report) { /* * If required, report the failure to the supervising * Authorities. * For a delete, this is not an error. (report == 0) */ - bzero((caddr_t)&info, sizeof(info)); info.rti_info[RTAX_DST] = dst; rt_missmsg(msgtype, &info, 0, err); } } - splx(s); + if (newrt) + RT_LOCK_ASSERT(newrt); return (newrt); } /* * Remove a reference count from an rtentry. * If the count gets low enough, take it out of the routing table */ void -rtfree(rt) - register struct rtentry *rt; +rtfree(struct rtentry *rt) { /* * find the tree for that address family */ struct radix_node_head *rnh = rt_tables[rt_key(rt)->sa_family]; if (rt == 0 || rnh == 0) panic("rtfree"); + RT_LOCK_ASSERT(rt); + /* * decrement the reference count by one and if it reaches 0, * and there is a close function defined, call the close function */ - rt->rt_refcnt--; - if (rnh->rnh_close && rt->rt_refcnt == 0) { + if (--rt->rt_refcnt > 0) + goto done; + /* XXX refcount==0? */ + if (rt->rt_refcnt == 0 && rnh->rnh_close) rnh->rnh_close((struct radix_node *)rt, rnh); - } /* * If we are no longer "up" (and ref == 0) * then we can free the resources associated * with the route. */ - if (rt->rt_refcnt <= 0 && (rt->rt_flags & RTF_UP) == 0) { + if ((rt->rt_flags & RTF_UP) == 0) { if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT)) panic ("rtfree 2"); /* * the rtentry must have been removed from the routing table * so it is represented in rttrash.. remove that now. */ rttrash--; - #ifdef DIAGNOSTIC if (rt->rt_refcnt < 0) { printf("rtfree: %p not freed (neg refs)\n", rt); - return; + goto done; } #endif - /* * release references on items we hold them on.. * e.g other routes and ifaddrs. */ if (rt->rt_ifa) IFAFREE(rt->rt_ifa); - if (rt->rt_parent) - RTFREE(rt->rt_parent); + rt->rt_parent = NULL; /* NB: no refcnt on parent */ /* * The key is separatly alloc'd so free it (see rt_setgate()). * This also frees the gateway, as they are always malloc'd * together. */ Free(rt_key(rt)); /* * and the rtentry itself of course */ + RT_LOCK_DESTROY(rt); Free(rt); + return; } +done: + RT_UNLOCK(rt); } /* compare two sockaddr structures */ #define sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0) /* * Force a routing table entry to the specified * destination to go through the given gateway. * Normally called as a result of a routing redirect * message from the network layer. - * - * N.B.: must be called at splnet - * */ void -rtredirect(dst, gateway, netmask, flags, src, rtp) - struct sockaddr *dst, *gateway, *netmask, *src; - int flags; - struct rtentry **rtp; +rtredirect(struct sockaddr *dst, + struct sockaddr *gateway, + struct sockaddr *netmask, + int flags, + struct sockaddr *src) { struct rtentry *rt; int error = 0; short *stat = 0; struct rt_addrinfo info; struct ifaddr *ifa; /* verify the gateway is directly reachable */ if ((ifa = ifa_ifwithnet(gateway)) == 0) { error = ENETUNREACH; goto out; } - rt = rtalloc1(dst, 0, 0UL); + rt = rtalloc1(dst, 0, 0UL); /* NB: rt is locked */ /* * If the redirect isn't from our current router for this dst, * it's either old or wrong. If it redirects us to ourselves, * we have a routing loop, perhaps as a result of an interface * going down recently. */ if (!(flags & RTF_DONE) && rt && (!sa_equal(src, rt->rt_gateway) || rt->rt_ifa != ifa)) error = EINVAL; else if (ifa_ifwithaddr(gateway)) error = EHOSTUNREACH; if (error) goto done; /* * Create a new entry if we just got back a wildcard entry * or the the lookup failed. This is necessary for hosts * which use routing redirects generated by smart gateways * to dynamically build the routing tables. */ - if ((rt == 0) || (rt_mask(rt) && rt_mask(rt)->sa_len < 2)) + if (rt == 0 || (rt_mask(rt) && rt_mask(rt)->sa_len < 2)) goto create; /* * Don't listen to the redirect if it's * for a route to an interface. */ if (rt->rt_flags & RTF_GATEWAY) { if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) { /* * Changing from route to net => route to host. * Create new route, rather than smashing route to net. */ create: if (rt) rtfree(rt); flags |= RTF_GATEWAY | RTF_DYNAMIC; bzero((caddr_t)&info, sizeof(info)); info.rti_info[RTAX_DST] = dst; info.rti_info[RTAX_GATEWAY] = gateway; info.rti_info[RTAX_NETMASK] = netmask; info.rti_ifa = ifa; info.rti_flags = flags; rt = NULL; error = rtrequest1(RTM_ADD, &info, &rt); - if (rt != NULL) + if (rt != NULL) { + RT_UNLOCK(rt); flags = rt->rt_flags; + } stat = &rtstat.rts_dynamic; } else { /* * Smash the current notion of the gateway to * this destination. Should check about netmask!!! */ rt->rt_flags |= RTF_MODIFIED; flags |= RTF_MODIFIED; stat = &rtstat.rts_newgateway; /* * add the key and gateway (in one malloc'd chunk). */ rt_setgate(rt, rt_key(rt), gateway); } } else error = EHOSTUNREACH; done: - if (rt) { - if (rtp && !error) - *rtp = rt; - else - rtfree(rt); - } + if (rt) + rtfree(rt); out: if (error) rtstat.rts_badredirect++; else if (stat != NULL) (*stat)++; bzero((caddr_t)&info, sizeof(info)); info.rti_info[RTAX_DST] = dst; info.rti_info[RTAX_GATEWAY] = gateway; info.rti_info[RTAX_NETMASK] = netmask; info.rti_info[RTAX_AUTHOR] = src; rt_missmsg(RTM_REDIRECT, &info, flags, error); } /* * Routing table ioctl interface. */ int -rtioctl(req, data) - u_long req; - caddr_t data; +rtioctl(u_long req, caddr_t data) { #ifdef INET /* Multicast goop, grrr... */ return mrt_ioctl ? mrt_ioctl(req, data) : EOPNOTSUPP; #else /* INET */ return ENXIO; #endif /* INET */ } struct ifaddr * -ifa_ifwithroute(flags, dst, gateway) - int flags; - struct sockaddr *dst, *gateway; +ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway) { register struct ifaddr *ifa; + if ((flags & RTF_GATEWAY) == 0) { /* * If we are adding a route to an interface, * and the interface is a pt to pt link * we should search for the destination * as our clue to the interface. Otherwise * we can use the local address. */ ifa = 0; if (flags & RTF_HOST) { ifa = ifa_ifwithdstaddr(dst); } if (ifa == 0) ifa = ifa_ifwithaddr(gateway); } else { /* * If we are adding a route to a remote net * or host, the gateway may still be on the * other end of a pt to pt link. */ ifa = ifa_ifwithdstaddr(gateway); } if (ifa == 0) ifa = ifa_ifwithnet(gateway); if (ifa == 0) { struct rtentry *rt = rtalloc1(gateway, 0, 0UL); if (rt == 0) return (0); --rt->rt_refcnt; + RT_UNLOCK(rt); if ((ifa = rt->rt_ifa) == 0) return (0); } if (ifa->ifa_addr->sa_family != dst->sa_family) { struct ifaddr *oifa = ifa; ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp); if (ifa == 0) ifa = oifa; } return (ifa); } static int rt_fixdelete(struct radix_node *, void *); static int rt_fixchange(struct radix_node *, void *); struct rtfc_arg { struct rtentry *rt0; struct radix_node_head *rnh; }; /* * Do appropriate manipulations of a routing tree given * all the bits of info needed */ int -rtrequest(req, dst, gateway, netmask, flags, ret_nrt) - int req, flags; - struct sockaddr *dst, *gateway, *netmask; - struct rtentry **ret_nrt; +rtrequest(int req, + struct sockaddr *dst, + struct sockaddr *gateway, + struct sockaddr *netmask, + int flags, + struct rtentry **ret_nrt) { struct rt_addrinfo info; bzero((caddr_t)&info, sizeof(info)); info.rti_flags = flags; info.rti_info[RTAX_DST] = dst; info.rti_info[RTAX_GATEWAY] = gateway; info.rti_info[RTAX_NETMASK] = netmask; return rtrequest1(req, &info, ret_nrt); } /* * These (questionable) definitions of apparent local variables apply * to the next two functions. XXXXXX!!! */ #define dst info->rti_info[RTAX_DST] #define gateway info->rti_info[RTAX_GATEWAY] #define netmask info->rti_info[RTAX_NETMASK] #define ifaaddr info->rti_info[RTAX_IFA] #define ifpaddr info->rti_info[RTAX_IFP] #define flags info->rti_flags int -rt_getifa(info) - struct rt_addrinfo *info; +rt_getifa(struct rt_addrinfo *info) { struct ifaddr *ifa; int error = 0; /* * ifp may be specified by sockaddr_dl * when protocol address is ambiguous. */ if (info->rti_ifp == NULL && ifpaddr != NULL && ifpaddr->sa_family == AF_LINK && (ifa = ifa_ifwithnet(ifpaddr)) != NULL) info->rti_ifp = ifa->ifa_ifp; if (info->rti_ifa == NULL && ifaaddr != NULL) info->rti_ifa = ifa_ifwithaddr(ifaaddr); if (info->rti_ifa == NULL) { struct sockaddr *sa; sa = ifaaddr != NULL ? ifaaddr : (gateway != NULL ? gateway : dst); if (sa != NULL && info->rti_ifp != NULL) info->rti_ifa = ifaof_ifpforaddr(sa, info->rti_ifp); else if (dst != NULL && gateway != NULL) info->rti_ifa = ifa_ifwithroute(flags, dst, gateway); else if (sa != NULL) info->rti_ifa = ifa_ifwithroute(flags, sa, sa); } if ((ifa = info->rti_ifa) != NULL) { if (info->rti_ifp == NULL) info->rti_ifp = ifa->ifa_ifp; } else error = ENETUNREACH; return (error); } int -rtrequest1(req, info, ret_nrt) - int req; - struct rt_addrinfo *info; - struct rtentry **ret_nrt; +rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt) { - int s = splnet(); int error = 0; + int error = 0; register struct rtentry *rt; register struct radix_node *rn; register struct radix_node_head *rnh; struct ifaddr *ifa; struct sockaddr *ndst; #define senderr(x) { error = x ; goto bad; } /* * Find the correct routing tree to use for this Address Family */ - if ((rnh = rt_tables[dst->sa_family]) == 0) { - splx(s); + rnh = rt_tables[dst->sa_family]; + if (rnh == 0) return (EAFNOSUPPORT); - } RADIX_NODE_HEAD_LOCK(rnh); /* * If we are adding a host route then we don't want to put * a netmask in the tree, nor do we want to clone it. */ if (flags & RTF_HOST) { netmask = 0; flags &= ~(RTF_CLONING | RTF_PRCLONING); } switch (req) { case RTM_DELETE: /* * Remove the item from the tree and return it. * Complain if it is not there and do no more processing. */ - if ((rn = rnh->rnh_deladdr(dst, netmask, rnh)) == 0) + rn = rnh->rnh_deladdr(dst, netmask, rnh); + if (rn == 0) senderr(ESRCH); if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) panic ("rtrequest delete"); rt = (struct rtentry *)rn; + RT_LOCK(rt); rt->rt_refcnt++; rt->rt_flags &= ~RTF_UP; /* * Now search what's left of the subtree for any cloned * routes which might have been formed from this node. */ if ((rt->rt_flags & (RTF_CLONING | RTF_PRCLONING)) && rt_mask(rt)) { rnh->rnh_walktree_from(rnh, dst, rt_mask(rt), rt_fixdelete, rt); } /* * Remove any external references we may have. * This might result in another rtentry being freed if * we held its last reference. */ if (rt->rt_gwroute) { - rt = rt->rt_gwroute; - RTFREE(rt); - (rt = (struct rtentry *)rn)->rt_gwroute = 0; + struct rtentry *gwrt = rt->rt_gwroute; + RTFREE(gwrt); + rt->rt_gwroute = 0; } /* * give the protocol a chance to keep things in sync. */ if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest) ifa->ifa_rtrequest(RTM_DELETE, rt, info); /* * one more rtentry floating around that is not * linked to the routing table. */ rttrash++; /* * If the caller wants it, then it can have it, * but it's up to it to free the rtentry as we won't be * doing it. */ - if (ret_nrt) + if (ret_nrt) { *ret_nrt = rt; - else - RTFREE(rt); + RT_UNLOCK(rt); + } else + RTFREE_LOCKED(rt); break; case RTM_RESOLVE: if (ret_nrt == 0 || (rt = *ret_nrt) == 0) senderr(EINVAL); ifa = rt->rt_ifa; + /* XXX locking? */ flags = rt->rt_flags & ~(RTF_CLONING | RTF_PRCLONING | RTF_STATIC); flags |= RTF_WASCLONED; gateway = rt->rt_gateway; if ((netmask = rt->rt_genmask) == 0) flags |= RTF_HOST; goto makeroute; case RTM_ADD: if ((flags & RTF_GATEWAY) && !gateway) panic("rtrequest: GATEWAY but no gateway"); if (info->rti_ifa == NULL && (error = rt_getifa(info))) senderr(error); ifa = info->rti_ifa; makeroute: - R_Malloc(rt, struct rtentry *, sizeof(*rt)); + R_Zalloc(rt, struct rtentry *, sizeof(*rt)); if (rt == 0) senderr(ENOBUFS); - Bzero(rt, sizeof(*rt)); + RT_LOCK_INIT(rt); rt->rt_flags = RTF_UP | flags; /* * Add the gateway. Possibly re-malloc-ing the storage for it * also add the rt_gwroute if possible. */ + RT_LOCK(rt); if ((error = rt_setgate(rt, dst, gateway)) != 0) { + RT_LOCK_DESTROY(rt); Free(rt); senderr(error); } /* * point to the (possibly newly malloc'd) dest address. */ - ndst = rt_key(rt); + ndst = (struct sockaddr *)rt_key(rt); /* * make sure it contains the value we want (masked if needed). */ if (netmask) { rt_maskedcopy(dst, ndst, netmask); } else Bcopy(dst, ndst, dst->sa_len); /* * Note that we now have a reference to the ifa. * This moved from below so that rnh->rnh_addaddr() can * examine the ifa and ifa->ifa_ifp if it so desires. */ IFAREF(ifa); rt->rt_ifa = ifa; rt->rt_ifp = ifa->ifa_ifp; - /* XXX mtu manipulation will be done in rnh_addaddr -- itojun */ - rn = rnh->rnh_addaddr((caddr_t)ndst, (caddr_t)netmask, - rnh, rt->rt_nodes); + /* XXX mtu manipulation will be done in rnh_addaddr -- itojun */ + rn = rnh->rnh_addaddr(ndst, netmask, rnh, rt->rt_nodes); if (rn == 0) { struct rtentry *rt2; /* * Uh-oh, we already have one of these in the tree. * We do a special hack: if the route that's already * there was generated by the protocol-cloning * mechanism, then we just blow it away and retry * the insertion of the new one. */ rt2 = rtalloc1(dst, 0, RTF_PRCLONING); if (rt2 && rt2->rt_parent) { rtrequest(RTM_DELETE, - (struct sockaddr *)rt_key(rt2), + rt_key(rt2), rt2->rt_gateway, rt_mask(rt2), rt2->rt_flags, 0); - RTFREE(rt2); - rn = rnh->rnh_addaddr((caddr_t)ndst, - (caddr_t)netmask, + RTFREE_LOCKED(rt2); + rn = rnh->rnh_addaddr(ndst, netmask, rnh, rt->rt_nodes); } else if (rt2) { /* undo the extra ref we got */ - RTFREE(rt2); + RTFREE_LOCKED(rt2); } } /* * If it still failed to go into the tree, * then un-make it (this should be a function) */ if (rn == 0) { if (rt->rt_gwroute) - rtfree(rt->rt_gwroute); - if (rt->rt_ifa) { + RTFREE(rt->rt_gwroute); + if (rt->rt_ifa) IFAFREE(rt->rt_ifa); - } Free(rt_key(rt)); + RT_LOCK_DESTROY(rt); Free(rt); senderr(EEXIST); } rt->rt_parent = 0; /* * If we got here from RESOLVE, then we are cloning * so clone the rest, and note that we * are a clone (and increment the parent's references) */ if (req == RTM_RESOLVE) { + KASSERT(ret_nrt && *ret_nrt, + ("no route to clone from")); rt->rt_rmx = (*ret_nrt)->rt_rmx; /* copy metrics */ rt->rt_rmx.rmx_pksent = 0; /* reset packet counter */ if ((*ret_nrt)->rt_flags & (RTF_CLONING | RTF_PRCLONING)) { - rt->rt_parent = (*ret_nrt); - (*ret_nrt)->rt_refcnt++; + /* + * NB: We do not bump the refcnt on the parent + * entry under the assumption that it will + * remain so long as we do. This is + * important when deleting the parent route + * as this operation requires traversing + * the tree to delete all clones and futzing + * with refcnts requires us to double-lock + * parent through this back reference. + */ + rt->rt_parent = *ret_nrt; } } /* * if this protocol has something to add to this then * allow it to do that as well. */ if (ifa->ifa_rtrequest) ifa->ifa_rtrequest(req, rt, info); /* * We repeat the same procedure from rt_setgate() here because * it doesn't fire when we call it there because the node * hasn't been added to the tree yet. */ if (req == RTM_ADD && !(rt->rt_flags & RTF_HOST) && rt_mask(rt) != 0) { struct rtfc_arg arg; arg.rnh = rnh; arg.rt0 = rt; rnh->rnh_walktree_from(rnh, rt_key(rt), rt_mask(rt), rt_fixchange, &arg); } /* * actually return a resultant rtentry and * give the caller a single reference. */ if (ret_nrt) { *ret_nrt = rt; rt->rt_refcnt++; } + RT_UNLOCK(rt); break; default: error = EOPNOTSUPP; } bad: RADIX_NODE_HEAD_UNLOCK(rnh); - splx(s); return (error); +#undef senderr +} + #undef dst #undef gateway #undef netmask #undef ifaaddr #undef ifpaddr #undef flags -} /* * Called from rtrequest(RTM_DELETE, ...) to fix up the route's ``family'' * (i.e., the routes related to it by the operation of cloning). This * routine is iterated over all potential former-child-routes by way of * rnh->rnh_walktree_from() above, and those that actually are children of * the late parent (passed in as VP here) are themselves deleted. */ static int -rt_fixdelete(rn, vp) - struct radix_node *rn; - void *vp; +rt_fixdelete(struct radix_node *rn, void *vp) { struct rtentry *rt = (struct rtentry *)rn; struct rtentry *rt0 = vp; if (rt->rt_parent == rt0 && !(rt->rt_flags & (RTF_PINNED | RTF_CLONING | RTF_PRCLONING))) { return rtrequest(RTM_DELETE, rt_key(rt), (struct sockaddr *)0, rt_mask(rt), rt->rt_flags, (struct rtentry **)0); } return 0; } /* * This routine is called from rt_setgate() to do the analogous thing for * adds and changes. There is the added complication in this case of a * middle insert; i.e., insertion of a new network route between an older * network route and (cloned) host routes. For this reason, a simple check * of rt->rt_parent is insufficient; each candidate route must be tested * against the (mask, value) of the new route (passed as before in vp) * to see if the new route matches it. * * XXX - it may be possible to do fixdelete() for changes and reserve this * routine just for adds. I'm not sure why I thought it was necessary to do * changes this way. */ #ifdef DEBUG static int rtfcdebug = 0; #endif static int -rt_fixchange(rn, vp) - struct radix_node *rn; - void *vp; +rt_fixchange(struct radix_node *rn, void *vp) { struct rtentry *rt = (struct rtentry *)rn; struct rtfc_arg *ap = vp; struct rtentry *rt0 = ap->rt0; struct radix_node_head *rnh = ap->rnh; u_char *xk1, *xm1, *xk2, *xmp; int i, len, mlen; #ifdef DEBUG if (rtfcdebug) printf("rt_fixchange: rt %p, rt0 %p\n", rt, rt0); #endif if (!rt->rt_parent || (rt->rt_flags & (RTF_PINNED | RTF_CLONING | RTF_PRCLONING))) { #ifdef DEBUG if(rtfcdebug) printf("no parent, pinned or cloning\n"); #endif return 0; } if (rt->rt_parent == rt0) { #ifdef DEBUG if(rtfcdebug) printf("parent match\n"); #endif return rtrequest(RTM_DELETE, rt_key(rt), (struct sockaddr *)0, rt_mask(rt), rt->rt_flags, (struct rtentry **)0); } /* * There probably is a function somewhere which does this... * if not, there should be. */ - len = imin(((struct sockaddr *)rt_key(rt0))->sa_len, - ((struct sockaddr *)rt_key(rt))->sa_len); + len = imin(rt_key(rt0)->sa_len, rt_key(rt)->sa_len); xk1 = (u_char *)rt_key(rt0); xm1 = (u_char *)rt_mask(rt0); xk2 = (u_char *)rt_key(rt); /* avoid applying a less specific route */ xmp = (u_char *)rt_mask(rt->rt_parent); - mlen = ((struct sockaddr *)rt_key(rt->rt_parent))->sa_len; - if (mlen > ((struct sockaddr *)rt_key(rt0))->sa_len) { + mlen = rt_key(rt->rt_parent)->sa_len; + if (mlen > rt_key(rt0)->sa_len) { #ifdef DEBUG if (rtfcdebug) printf("rt_fixchange: inserting a less " "specific route\n"); #endif return 0; } for (i = rnh->rnh_treetop->rn_offset; i < mlen; i++) { if ((xmp[i] & ~(xmp[i] ^ xm1[i])) != xmp[i]) { #ifdef DEBUG if (rtfcdebug) printf("rt_fixchange: inserting a less " "specific route\n"); #endif return 0; } } for (i = rnh->rnh_treetop->rn_offset; i < len; i++) { if ((xk2[i] & xm1[i]) != xk1[i]) { #ifdef DEBUG if(rtfcdebug) printf("no match\n"); #endif return 0; } } /* * OK, this node is a clone, and matches the node currently being * changed/added under the node's mask. So, get rid of it. */ #ifdef DEBUG if(rtfcdebug) printf("deleting\n"); #endif return rtrequest(RTM_DELETE, rt_key(rt), (struct sockaddr *)0, rt_mask(rt), rt->rt_flags, (struct rtentry **)0); } #define ROUNDUP(a) (a>0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long)) int -rt_setgate(rt0, dst, gate) - struct rtentry *rt0; - struct sockaddr *dst, *gate; +rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate) { + /* XXX dst may be overwritten, can we move this to below */ + struct radix_node_head *rnh = rt_tables[dst->sa_family]; caddr_t new, old; int dlen = ROUNDUP(dst->sa_len), glen = ROUNDUP(gate->sa_len); - register struct rtentry *rt = rt0; - struct radix_node_head *rnh = rt_tables[dst->sa_family]; + RT_LOCK_ASSERT(rt); + /* * A host route with the destination equal to the gateway * will interfere with keeping LLINFO in the routing * table, so disallow it. */ - if (((rt0->rt_flags & (RTF_HOST|RTF_GATEWAY|RTF_LLINFO)) == + if (((rt->rt_flags & (RTF_HOST|RTF_GATEWAY|RTF_LLINFO)) == (RTF_HOST|RTF_GATEWAY)) && - (dst->sa_len == gate->sa_len) && - (bcmp(dst, gate, dst->sa_len) == 0)) { + dst->sa_len == gate->sa_len && + bcmp(dst, gate, dst->sa_len) == 0) { /* * The route might already exist if this is an RTM_CHANGE * or a routing redirect, so try to delete it. */ - if (rt_key(rt0)) - rtrequest(RTM_DELETE, (struct sockaddr *)rt_key(rt0), - rt0->rt_gateway, rt_mask(rt0), rt0->rt_flags, 0); + if (rt_key(rt)) + rtrequest(RTM_DELETE, rt_key(rt), + rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0); return EADDRNOTAVAIL; } /* * Both dst and gateway are stored in the same malloc'd chunk * (If I ever get my hands on....) * if we need to malloc a new chunk, then keep the old one around * till we don't need it any more. */ if (rt->rt_gateway == 0 || glen > ROUNDUP(rt->rt_gateway->sa_len)) { old = (caddr_t)rt_key(rt); R_Malloc(new, caddr_t, dlen + glen); if (new == 0) return ENOBUFS; - rt->rt_nodes->rn_key = new; + rt_key(rt) = new; } else { /* * otherwise just overwrite the old one */ - new = rt->rt_nodes->rn_key; + new = (caddr_t)rt_key(rt); old = 0; } /* * copy the new gateway value into the memory chunk */ Bcopy(gate, (rt->rt_gateway = (struct sockaddr *)(new + dlen)), glen); /* * if we are replacing the chunk (or it's new) we need to * replace the dst as well */ if (old) { Bcopy(dst, new, dlen); Free(old); + dst = gate = 0; /* XXX??? */ } /* * If there is already a gwroute, it's now almost definitly wrong * so drop it. */ if (rt->rt_gwroute != NULL) { RTFREE(rt->rt_gwroute); rt->rt_gwroute = NULL; } /* * Cloning loop avoidance: * In the presence of protocol-cloning and bad configuration, * it is possible to get stuck in bottomless mutual recursion * (rtrequest rt_setgate rtalloc1). We avoid this by not allowing * protocol-cloning to operate for gateways (which is probably the * correct choice anyway), and avoid the resulting reference loops * by disallowing any route to run through itself as a gateway. * This is obviously mandatory when we get rt->rt_output(). */ if (rt->rt_flags & RTF_GATEWAY) { rt->rt_gwroute = rtalloc1(gate, 1, RTF_PRCLONING); if (rt->rt_gwroute == rt) { - RTFREE(rt->rt_gwroute); + RTFREE_LOCKED(rt->rt_gwroute); rt->rt_gwroute = 0; return EDQUOT; /* failure */ } + RT_UNLOCK(rt->rt_gwroute); } /* * This isn't going to do anything useful for host routes, so * don't bother. Also make sure we have a reasonable mask * (we don't yet have one during adds). */ if (!(rt->rt_flags & RTF_HOST) && rt_mask(rt) != 0) { struct rtfc_arg arg; + arg.rnh = rnh; arg.rt0 = rt; RADIX_NODE_HEAD_LOCK(rnh); rnh->rnh_walktree_from(rnh, rt_key(rt), rt_mask(rt), rt_fixchange, &arg); RADIX_NODE_HEAD_UNLOCK(rnh); } return 0; } static void -rt_maskedcopy(src, dst, netmask) - struct sockaddr *src, *dst, *netmask; +rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst, struct sockaddr *netmask) { register u_char *cp1 = (u_char *)src; register u_char *cp2 = (u_char *)dst; register u_char *cp3 = (u_char *)netmask; u_char *cplim = cp2 + *cp3; u_char *cplim2 = cp2 + *cp1; *cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */ cp3 += 2; if (cplim > cplim2) cplim = cplim2; while (cp2 < cplim) *cp2++ = *cp1++ & *cp3++; if (cp2 < cplim2) bzero((caddr_t)cp2, (unsigned)(cplim2 - cp2)); } /* * Set up a routing table entry, normally * for an interface. */ int -rtinit(ifa, cmd, flags) - register struct ifaddr *ifa; - int cmd, flags; +rtinit(struct ifaddr *ifa, int cmd, int flags) { register struct rtentry *rt; register struct sockaddr *dst; register struct sockaddr *deldst; struct sockaddr *netmask; struct mbuf *m = 0; struct rtentry *nrt = 0; struct radix_node_head *rnh; struct radix_node *rn; int error; struct rt_addrinfo info; if (flags & RTF_HOST) { dst = ifa->ifa_dstaddr; netmask = NULL; } else { dst = ifa->ifa_addr; netmask = ifa->ifa_netmask; } /* * If it's a delete, check that if it exists, it's on the correct * interface or we might scrub a route to another ifa which would * be confusing at best and possibly worse. */ if (cmd == RTM_DELETE) { /* * It's a delete, so it should already exist.. * If it's a net, mask off the host bits * (Assuming we have a mask) */ if (netmask != NULL) { m = m_get(M_DONTWAIT, MT_SONAME); if (m == NULL) return(ENOBUFS); deldst = mtod(m, struct sockaddr *); rt_maskedcopy(dst, deldst, netmask); dst = deldst; } /* * Look up an rtentry that is in the routing tree and * contains the correct info. */ if ((rnh = rt_tables[dst->sa_family]) == NULL) goto bad; RADIX_NODE_HEAD_LOCK(rnh); error = ((rn = rnh->rnh_lookup(dst, netmask, rnh)) == NULL || (rn->rn_flags & RNF_ROOT) || ((struct rtentry *)rn)->rt_ifa != ifa || !sa_equal(SA(rn->rn_key), dst)); RADIX_NODE_HEAD_UNLOCK(rnh); if (error) { bad: if (m) (void) m_free(m); return (flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); } } /* * Do the actual request */ bzero((caddr_t)&info, sizeof(info)); info.rti_ifa = ifa; info.rti_flags = flags | ifa->ifa_flags; info.rti_info[RTAX_DST] = dst; info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; info.rti_info[RTAX_NETMASK] = netmask; error = rtrequest1(cmd, &info, &nrt); if (error == 0 && (rt = nrt) != NULL) { /* * notify any listening routing agents of the change */ + RT_LOCK(rt); rt_newaddrmsg(cmd, ifa, error, rt); if (cmd == RTM_DELETE) { /* * If we are deleting, and we found an entry, then * it's been removed from the tree.. now throw it away. */ - RTFREE(rt); - } else if (cmd == RTM_ADD) { - /* - * We just wanted to add it.. we don't actually - * need a reference. - */ - rt->rt_refcnt--; + RTFREE_LOCKED(rt); + } else { + if (cmd == RTM_ADD) { + /* + * We just wanted to add it.. we don't actually + * need a reference. + */ + rt->rt_refcnt--; + } + RT_UNLOCK(rt); } } if (m) (void) m_free(m); return (error); } +/* + * Validate the route rt0 to the specified destination. If the + * route is marked down try to find a new route. If the route + * to the gateway is gone, try to setup a new route. Otherwise, + * if the route is marked for packets to be rejected, enforce that. + * + * On return lrt contains the route to the destination and lrt0 + * contains the route to the next hop. Their values are meaningul + * ONLY if no error is returned. + * + * This routine is invoked on each layer 2 output path, prior to + * encapsulating outbound packets. + */ int -rt_check(lrt, lrt0, dst) - struct rtentry **lrt; - struct rtentry **lrt0; - struct sockaddr *dst; +rt_check(struct rtentry **lrt, struct rtentry **lrt0, struct sockaddr *dst) { +#define senderr(x) { error = x ; goto bad; } struct rtentry *rt; struct rtentry *rt0; int error; - rt = *lrt; rt0 = *lrt0; - error = 0; - rt = rt0; - - if (rt != NULL) { + if (rt) { + /* NB: the locking here is tortuous... */ + RT_LOCK(rt); if ((rt->rt_flags & RTF_UP) == 0) { - rt0 = rt = rtalloc1(dst, 1, 0UL); - if (rt0 != NULL) + RT_UNLOCK(rt); + rt = rtalloc1(dst, 1, 0UL); + if (rt != NULL) { rt->rt_refcnt--; - else + RT_UNLOCK(rt); + } else senderr(EHOSTUNREACH); + rt0 = rt; } + /* XXX BSD/OS checks dst->sa_family != AF_NS */ if (rt->rt_flags & RTF_GATEWAY) { - if (rt->rt_gwroute == NULL) + if (rt->rt_gwroute == 0) goto lookup; - rt = rt->rt_gwroute; + RT_LOCK(rt); /* NB: gwroute */ if ((rt->rt_flags & RTF_UP) == 0) { - rtfree(rt); + rtfree(rt); /* unlock gwroute */ rt = rt0; lookup: - rt->rt_gwroute = rtalloc1(rt->rt_gateway, 1, 0UL); - rt = rt->rt_gwroute; - if (rt == NULL) + RT_UNLOCK(rt0); + rt = rtalloc1(rt->rt_gateway, 1, 0UL); + RT_LOCK(rt0); + rt0->rt_gwroute = rt; + if (rt == 0) { + RT_UNLOCK(rt0); senderr(EHOSTUNREACH); + } } + RT_UNLOCK(rt0); } - if (rt->rt_flags & RTF_REJECT) - if (rt->rt_rmx.rmx_expire == 0 || - time_second < rt->rt_rmx.rmx_expire) - senderr(rt == rt0 ? EHOSTDOWN : EHOSTUNREACH); + /* XXX why are we inspecting rmx_expire? */ + error = (rt->rt_flags & RTF_REJECT) && + (rt->rt_rmx.rmx_expire == 0 || + time_second < rt->rt_rmx.rmx_expire); + RT_UNLOCK(rt); + if (error) + senderr(rt == rt0 ? EHOSTDOWN : EHOSTUNREACH); } - -bad: - *lrt = rt; + *lrt = rt; /* NB: return unlocked */ *lrt0 = rt0; + return (0); +bad: + /* NB: lrt and lrt0 should not be interpreted if error is non-zero */ return (error); +#undef senderr } /* This must be before ip6_init2(), which is now SI_ORDER_MIDDLE */ SYSINIT(route, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, route_init, 0); Index: head/sys/net/route.h =================================================================== --- head/sys/net/route.h (revision 120726) +++ head/sys/net/route.h (revision 120727) @@ -1,300 +1,311 @@ /* * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)route.h 8.4 (Berkeley) 1/9/95 * $FreeBSD$ */ #ifndef _NET_ROUTE_H_ #define _NET_ROUTE_H_ /* * Kernel resident routing tables. * * The routing tables are initialized when interface addresses * are set by making entries for all directly connected interfaces. */ /* * A route consists of a destination address and a reference * to a routing entry. These are often held by protocols * in their control blocks, e.g. inpcb. */ struct route { struct rtentry *ro_rt; struct sockaddr ro_dst; }; /* * These numbers are used by reliable protocols for determining * retransmission behavior and are included in the routing structure. */ struct rt_metrics { u_long rmx_locks; /* Kernel must leave these values alone */ u_long rmx_mtu; /* MTU for this path */ u_long rmx_hopcount; /* max hops expected */ u_long rmx_expire; /* lifetime for route, e.g. redirect */ u_long rmx_recvpipe; /* inbound delay-bandwidth product */ u_long rmx_sendpipe; /* outbound delay-bandwidth product */ u_long rmx_ssthresh; /* outbound gateway buffer limit */ u_long rmx_rtt; /* estimated round trip time */ u_long rmx_rttvar; /* estimated rtt variance */ u_long rmx_pksent; /* packets sent using this route */ u_long rmx_filler[4]; /* will be used for T/TCP later */ }; /* * rmx_rtt and rmx_rttvar are stored as microseconds; * RTTTOPRHZ(rtt) converts to a value suitable for use * by a protocol slowtimo counter. */ #define RTM_RTTUNIT 1000000 /* units for rtt, rttvar, as units per sec */ #define RTTTOPRHZ(r) ((r) / (RTM_RTTUNIT / PR_SLOWHZ)) /* * XXX kernel function pointer `rt_output' is visible to applications. */ struct mbuf; /* * We distinguish between routes to hosts and routes to networks, * preferring the former if available. For each route we infer * the interface to use from the gateway address supplied when * the route was entered. Routes that forward packets through * gateways are marked so that the output routines know to address the * gateway rather than the ultimate destination. */ #ifndef RNF_NORMAL #include #endif struct rtentry { struct radix_node rt_nodes[2]; /* tree glue, and other values */ #define rt_key(r) ((struct sockaddr *)((r)->rt_nodes->rn_key)) #define rt_mask(r) ((struct sockaddr *)((r)->rt_nodes->rn_mask)) struct sockaddr *rt_gateway; /* value */ long rt_refcnt; /* # held references */ u_long rt_flags; /* up/down?, host/net */ struct ifnet *rt_ifp; /* the answer: interface to use */ struct ifaddr *rt_ifa; /* the answer: interface to use */ struct sockaddr *rt_genmask; /* for generation of cloned routes */ caddr_t rt_llinfo; /* pointer to link level info cache */ struct rt_metrics rt_rmx; /* metrics used by rx'ing protocols */ struct rtentry *rt_gwroute; /* implied entry for gatewayed routes */ int (*rt_output)(struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *); /* output routine for this (rt,if) */ struct rtentry *rt_parent; /* cloning parent of this route */ - struct mtx *rt_mtx; /* mutex for routing entry */ +#ifdef _KERNEL + /* XXX ugly, user apps use this definition but don't have a mtx def */ + struct mtx rt_mtx; /* mutex for routing entry */ +#endif }; /* * Following structure necessary for 4.3 compatibility; * We should eventually move it to a compat file. */ struct ortentry { u_long rt_hash; /* to speed lookups */ struct sockaddr rt_dst; /* key */ struct sockaddr rt_gateway; /* value */ short rt_flags; /* up/down?, host/net */ short rt_refcnt; /* # held references */ u_long rt_use; /* raw # packets forwarded */ struct ifnet *rt_ifp; /* the answer: interface to use */ }; #define rt_use rt_rmx.rmx_pksent #define RTF_UP 0x1 /* route usable */ #define RTF_GATEWAY 0x2 /* destination is a gateway */ #define RTF_HOST 0x4 /* host entry (net otherwise) */ #define RTF_REJECT 0x8 /* host or net unreachable */ #define RTF_DYNAMIC 0x10 /* created dynamically (by redirect) */ #define RTF_MODIFIED 0x20 /* modified dynamically (by redirect) */ #define RTF_DONE 0x40 /* message confirmed */ /* 0x80 unused, was RTF_DELCLONE */ #define RTF_CLONING 0x100 /* generate new routes on use */ #define RTF_XRESOLVE 0x200 /* external daemon resolves name */ #define RTF_LLINFO 0x400 /* generated by link layer (e.g. ARP) */ #define RTF_STATIC 0x800 /* manually added */ #define RTF_BLACKHOLE 0x1000 /* just discard pkts (during updates) */ #define RTF_PROTO2 0x4000 /* protocol specific routing flag */ #define RTF_PROTO1 0x8000 /* protocol specific routing flag */ #define RTF_PRCLONING 0x10000 /* protocol requires cloning */ #define RTF_WASCLONED 0x20000 /* route generated through cloning */ #define RTF_PROTO3 0x40000 /* protocol specific routing flag */ /* 0x80000 unused */ #define RTF_PINNED 0x100000 /* future use */ #define RTF_LOCAL 0x200000 /* route represents a local address */ #define RTF_BROADCAST 0x400000 /* route represents a bcast address */ #define RTF_MULTICAST 0x800000 /* route represents a mcast address */ /* 0x1000000 and up unassigned */ /* * Routing statistics. */ struct rtstat { short rts_badredirect; /* bogus redirect calls */ short rts_dynamic; /* routes created by redirects */ short rts_newgateway; /* routes modified by redirects */ short rts_unreach; /* lookups which failed */ short rts_wildcard; /* lookups satisfied by a wildcard */ }; /* * Structures for routing messages. */ struct rt_msghdr { u_short rtm_msglen; /* to skip over non-understood messages */ u_char rtm_version; /* future binary compatibility */ u_char rtm_type; /* message type */ u_short rtm_index; /* index for associated ifp */ int rtm_flags; /* flags, incl. kern & message, e.g. DONE */ int rtm_addrs; /* bitmask identifying sockaddrs in msg */ pid_t rtm_pid; /* identify sender */ int rtm_seq; /* for sender to identify action */ int rtm_errno; /* why failed */ int rtm_use; /* from rtentry */ u_long rtm_inits; /* which metrics we are initializing */ struct rt_metrics rtm_rmx; /* metrics themselves */ }; #define RTM_VERSION 5 /* Up the ante and ignore older versions */ /* * Message types. */ #define RTM_ADD 0x1 /* Add Route */ #define RTM_DELETE 0x2 /* Delete Route */ #define RTM_CHANGE 0x3 /* Change Metrics or flags */ #define RTM_GET 0x4 /* Report Metrics */ #define RTM_LOSING 0x5 /* Kernel Suspects Partitioning */ #define RTM_REDIRECT 0x6 /* Told to use different route */ #define RTM_MISS 0x7 /* Lookup failed on this address */ #define RTM_LOCK 0x8 /* fix specified metrics */ #define RTM_OLDADD 0x9 /* caused by SIOCADDRT */ #define RTM_OLDDEL 0xa /* caused by SIOCDELRT */ #define RTM_RESOLVE 0xb /* req to resolve dst to LL addr */ #define RTM_NEWADDR 0xc /* address being added to iface */ #define RTM_DELADDR 0xd /* address being removed from iface */ #define RTM_IFINFO 0xe /* iface going up/down etc. */ #define RTM_NEWMADDR 0xf /* mcast group membership being added to if */ #define RTM_DELMADDR 0x10 /* mcast group membership being deleted */ #define RTM_IFANNOUNCE 0x11 /* iface arrival/departure */ /* * Bitmask values for rtm_inits and rmx_locks. */ #define RTV_MTU 0x1 /* init or lock _mtu */ #define RTV_HOPCOUNT 0x2 /* init or lock _hopcount */ #define RTV_EXPIRE 0x4 /* init or lock _expire */ #define RTV_RPIPE 0x8 /* init or lock _recvpipe */ #define RTV_SPIPE 0x10 /* init or lock _sendpipe */ #define RTV_SSTHRESH 0x20 /* init or lock _ssthresh */ #define RTV_RTT 0x40 /* init or lock _rtt */ #define RTV_RTTVAR 0x80 /* init or lock _rttvar */ /* * Bitmask values for rtm_addrs. */ #define RTA_DST 0x1 /* destination sockaddr present */ #define RTA_GATEWAY 0x2 /* gateway sockaddr present */ #define RTA_NETMASK 0x4 /* netmask sockaddr present */ #define RTA_GENMASK 0x8 /* cloning mask sockaddr present */ #define RTA_IFP 0x10 /* interface name sockaddr present */ #define RTA_IFA 0x20 /* interface addr sockaddr present */ #define RTA_AUTHOR 0x40 /* sockaddr for author of redirect */ #define RTA_BRD 0x80 /* for NEWADDR, broadcast or p-p dest addr */ /* * Index offsets for sockaddr array for alternate internal encoding. */ #define RTAX_DST 0 /* destination sockaddr present */ #define RTAX_GATEWAY 1 /* gateway sockaddr present */ #define RTAX_NETMASK 2 /* netmask sockaddr present */ #define RTAX_GENMASK 3 /* cloning mask sockaddr present */ #define RTAX_IFP 4 /* interface name sockaddr present */ #define RTAX_IFA 5 /* interface addr sockaddr present */ #define RTAX_AUTHOR 6 /* sockaddr for author of redirect */ #define RTAX_BRD 7 /* for NEWADDR, broadcast or p-p dest addr */ #define RTAX_MAX 8 /* size of array to allocate */ struct rt_addrinfo { int rti_addrs; struct sockaddr *rti_info[RTAX_MAX]; int rti_flags; struct ifaddr *rti_ifa; struct ifnet *rti_ifp; }; #ifdef _KERNEL -#define RT_LOCK_INIT(rt) \ - mtx_init((rt)->rt_mtx, "rtentry", NULL, MTX_DEF | MTX_DUPOK) -#define RT_LOCK(rt) mtx_lock((rt)->rt_mtx) -#define RT_UNLOCK(rt) mtx_unlock((rt)->rt_mtx) -#define RT_LOCK_DESTROY(rt) mtx_destroy((rt)->rt_mtx) +#define RT_LOCK_INIT(_rt) \ + mtx_init(&(_rt)->rt_mtx, "rtentry", NULL, MTX_DEF | MTX_DUPOK) +#define RT_LOCK(_rt) mtx_lock(&(_rt)->rt_mtx) +#define RT_UNLOCK(_rt) mtx_unlock(&(_rt)->rt_mtx) +#define RT_LOCK_DESTROY(_rt) mtx_destroy(&(_rt)->rt_mtx) +#define RT_LOCK_ASSERT(_rt) mtx_assert(&(_rt)->rt_mtx, MA_OWNED) -#define RTFREE(rt) \ - do { \ - if ((rt)->rt_refcnt <= 1) \ - rtfree(rt); \ - else \ - (rt)->rt_refcnt--; \ +#define RTFREE_LOCKED(_rt) do { \ + if ((_rt)->rt_refcnt <= 1) \ + rtfree(_rt); \ + else { \ + (_rt)->rt_refcnt--; \ + RT_UNLOCK(_rt); \ + } \ + /* guard against invalid refs */ \ + _rt = 0; \ } while (0) +#define RTFREE(_rt) do { \ + RT_LOCK(_rt); \ + RTFREE_LOCKED(_rt); \ + } while (0) extern struct radix_node_head *rt_tables[AF_MAX+1]; struct ifmultiaddr; void route_init(void); int rt_getifa(struct rt_addrinfo *); void rt_ifannouncemsg(struct ifnet *, int); void rt_ifmsg(struct ifnet *); void rt_missmsg(int, struct rt_addrinfo *, int, int); void rt_newaddrmsg(int, struct ifaddr *, int, struct rtentry *); void rt_newmaddrmsg(int, struct ifmultiaddr *); -int rt_setgate(struct rtentry *, struct sockaddr *, struct sockaddr *); void rtalloc(struct route *); +int rt_setgate(struct rtentry *, struct sockaddr *, struct sockaddr *); void rtalloc_ign(struct route *, u_long); -struct rtentry * - rtalloc1(struct sockaddr *, int, u_long); +/* NB: the rtentry is returned locked */ +struct rtentry *rtalloc1(struct sockaddr *, int, u_long); void rtfree(struct rtentry *); int rtinit(struct ifaddr *, int, int); int rtioctl(u_long, caddr_t); void rtredirect(struct sockaddr *, struct sockaddr *, - struct sockaddr *, int, struct sockaddr *, struct rtentry **); + struct sockaddr *, int, struct sockaddr *); int rtrequest(int, struct sockaddr *, struct sockaddr *, struct sockaddr *, int, struct rtentry **); int rtrequest1(int, struct rt_addrinfo *, struct rtentry **); int rt_check(struct rtentry **, struct rtentry **, struct sockaddr *); #endif #endif Index: head/sys/net/rtsock.c =================================================================== --- head/sys/net/rtsock.c (revision 120726) +++ head/sys/net/rtsock.c (revision 120727) @@ -1,1089 +1,1098 @@ /* * Copyright (c) 1988, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)rtsock.c 8.7 (Berkeley) 10/12/95 * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables"); /* NB: these are not modified */ static struct sockaddr route_dst = { 2, PF_ROUTE, }; static struct sockaddr route_src = { 2, PF_ROUTE, }; static struct sockaddr sa_zero = { sizeof(sa_zero), AF_INET, }; static struct { int ip_count; /* attacked w/ AF_INET */ int ip6_count; /* attached w/ AF_INET6 */ int ipx_count; /* attached w/ AF_IPX */ int any_count; /* total attached */ } route_cb; struct mtx rtsock_mtx; MTX_SYSINIT(rtsock, &rtsock_mtx, "rtsock route_cb lock", MTX_DEF); #define RTSOCK_LOCK() mtx_lock(&rtsock_mtx) #define RTSOCK_UNLOCK() mtx_unlock(&rtsock_mtx) #define RTSOCK_LOCK_ASSERT() mtx_assert(&rtsock_mtx, MA_OWNED) struct walkarg { int w_tmemsize; int w_op, w_arg; caddr_t w_tmem; struct sysctl_req *w_req; }; static struct mbuf *rt_msg1(int, struct rt_addrinfo *); static int rt_msg2(int, struct rt_addrinfo *, caddr_t, struct walkarg *); static int rt_xaddrs(caddr_t, caddr_t, struct rt_addrinfo *); static int sysctl_dumpentry(struct radix_node *rn, void *vw); static int sysctl_iflist(int af, struct walkarg *w); static int route_output(struct mbuf *, struct socket *); static void rt_setmetrics(u_long, struct rt_metrics *, struct rt_metrics *); static void rt_dispatch(struct mbuf *, struct sockaddr *); /* * It really doesn't make any sense at all for this code to share much * with raw_usrreq.c, since its functionality is so restricted. XXX */ static int rts_abort(struct socket *so) { int s, error; s = splnet(); error = raw_usrreqs.pru_abort(so); splx(s); return error; } /* pru_accept is EOPNOTSUPP */ static int rts_attach(struct socket *so, int proto, struct thread *td) { struct rawcb *rp; int s, error; if (sotorawcb(so) != 0) return EISCONN; /* XXX panic? */ /* XXX */ MALLOC(rp, struct rawcb *, sizeof *rp, M_PCB, M_WAITOK | M_ZERO); if (rp == 0) return ENOBUFS; /* * The splnet() is necessary to block protocols from sending * error notifications (like RTM_REDIRECT or RTM_LOSING) while * this PCB is extant but incompletely initialized. * Probably we should try to do more of this work beforehand and * eliminate the spl. */ s = splnet(); so->so_pcb = (caddr_t)rp; error = raw_attach(so, proto); rp = sotorawcb(so); if (error) { splx(s); so->so_pcb = NULL; free(rp, M_PCB); return error; } RTSOCK_LOCK(); switch(rp->rcb_proto.sp_protocol) { case AF_INET: route_cb.ip_count++; break; case AF_INET6: route_cb.ip6_count++; break; case AF_IPX: route_cb.ipx_count++; break; } rp->rcb_faddr = &route_src; route_cb.any_count++; RTSOCK_UNLOCK(); soisconnected(so); so->so_options |= SO_USELOOPBACK; splx(s); return 0; } static int rts_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { int s, error; s = splnet(); error = raw_usrreqs.pru_bind(so, nam, td); /* xxx just EINVAL */ splx(s); return error; } static int rts_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { int s, error; s = splnet(); error = raw_usrreqs.pru_connect(so, nam, td); /* XXX just EINVAL */ splx(s); return error; } /* pru_connect2 is EOPNOTSUPP */ /* pru_control is EOPNOTSUPP */ static int rts_detach(struct socket *so) { struct rawcb *rp = sotorawcb(so); int s, error; s = splnet(); if (rp != 0) { RTSOCK_LOCK(); switch(rp->rcb_proto.sp_protocol) { case AF_INET: route_cb.ip_count--; break; case AF_INET6: route_cb.ip6_count--; break; case AF_IPX: route_cb.ipx_count--; break; } route_cb.any_count--; RTSOCK_UNLOCK(); } error = raw_usrreqs.pru_detach(so); splx(s); return error; } static int rts_disconnect(struct socket *so) { int s, error; s = splnet(); error = raw_usrreqs.pru_disconnect(so); splx(s); return error; } /* pru_listen is EOPNOTSUPP */ static int rts_peeraddr(struct socket *so, struct sockaddr **nam) { int s, error; s = splnet(); error = raw_usrreqs.pru_peeraddr(so, nam); splx(s); return error; } /* pru_rcvd is EOPNOTSUPP */ /* pru_rcvoob is EOPNOTSUPP */ static int rts_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct thread *td) { int s, error; s = splnet(); error = raw_usrreqs.pru_send(so, flags, m, nam, control, td); splx(s); return error; } /* pru_sense is null */ static int rts_shutdown(struct socket *so) { int s, error; s = splnet(); error = raw_usrreqs.pru_shutdown(so); splx(s); return error; } static int rts_sockaddr(struct socket *so, struct sockaddr **nam) { int s, error; s = splnet(); error = raw_usrreqs.pru_sockaddr(so, nam); splx(s); return error; } static struct pr_usrreqs route_usrreqs = { rts_abort, pru_accept_notsupp, rts_attach, rts_bind, rts_connect, pru_connect2_notsupp, pru_control_notsupp, rts_detach, rts_disconnect, pru_listen_notsupp, rts_peeraddr, pru_rcvd_notsupp, pru_rcvoob_notsupp, rts_send, pru_sense_null, rts_shutdown, rts_sockaddr, sosend, soreceive, sopoll }; /*ARGSUSED*/ static int route_output(m, so) register struct mbuf *m; struct socket *so; { #define sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0) register struct rt_msghdr *rtm = 0; register struct rtentry *rt = 0; struct radix_node_head *rnh; struct rt_addrinfo info; int len, error = 0; struct ifnet *ifp = 0; struct ifaddr *ifa = 0; #define senderr(e) { error = e; goto flush;} if (m == 0 || ((m->m_len < sizeof(long)) && (m = m_pullup(m, sizeof(long))) == 0)) return (ENOBUFS); if ((m->m_flags & M_PKTHDR) == 0) panic("route_output"); len = m->m_pkthdr.len; if (len < sizeof(*rtm) || len != mtod(m, struct rt_msghdr *)->rtm_msglen) { info.rti_info[RTAX_DST] = 0; senderr(EINVAL); } R_Malloc(rtm, struct rt_msghdr *, len); if (rtm == 0) { info.rti_info[RTAX_DST] = 0; senderr(ENOBUFS); } m_copydata(m, 0, len, (caddr_t)rtm); if (rtm->rtm_version != RTM_VERSION) { info.rti_info[RTAX_DST] = 0; senderr(EPROTONOSUPPORT); } rtm->rtm_pid = curproc->p_pid; bzero(&info, sizeof(info)); info.rti_addrs = rtm->rtm_addrs; if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info)) { info.rti_info[RTAX_DST] = 0; senderr(EINVAL); } info.rti_flags = rtm->rtm_flags; if (info.rti_info[RTAX_DST] == 0 || info.rti_info[RTAX_DST]->sa_family >= AF_MAX || (info.rti_info[RTAX_GATEWAY] != 0 && info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX)) senderr(EINVAL); if (info.rti_info[RTAX_GENMASK]) { struct radix_node *t; t = rn_addmask((caddr_t) info.rti_info[RTAX_GENMASK], 0, 1); if (t && Bcmp((caddr_t *) info.rti_info[RTAX_GENMASK] + 1, (caddr_t *)t->rn_key + 1, *(u_char *)t->rn_key - 1) == 0) info.rti_info[RTAX_GENMASK] = (struct sockaddr *)(t->rn_key); else senderr(ENOBUFS); } /* * Verify that the caller has the appropriate privilege; RTM_GET * is the only operation the non-superuser is allowed. */ if (rtm->rtm_type != RTM_GET && (error = suser(curthread)) != 0) senderr(error); switch (rtm->rtm_type) { struct rtentry *saved_nrt; case RTM_ADD: if (info.rti_info[RTAX_GATEWAY] == 0) senderr(EINVAL); saved_nrt = 0; error = rtrequest1(RTM_ADD, &info, &saved_nrt); if (error == 0 && saved_nrt) { + RT_LOCK(saved_nrt); rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, &saved_nrt->rt_rmx); saved_nrt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits); saved_nrt->rt_rmx.rmx_locks |= (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks); saved_nrt->rt_refcnt--; saved_nrt->rt_genmask = info.rti_info[RTAX_GENMASK]; + RT_UNLOCK(saved_nrt); } break; case RTM_DELETE: saved_nrt = 0; error = rtrequest1(RTM_DELETE, &info, &saved_nrt); if (error == 0) { + RT_LOCK(saved_nrt); rt = saved_nrt; goto report; } break; case RTM_GET: case RTM_CHANGE: case RTM_LOCK: rnh = rt_tables[info.rti_info[RTAX_DST]->sa_family]; if (rnh == 0) senderr(EAFNOSUPPORT); RADIX_NODE_HEAD_LOCK(rnh); rt = (struct rtentry *) rnh->rnh_lookup(info.rti_info[RTAX_DST], info.rti_info[RTAX_NETMASK], rnh); RADIX_NODE_HEAD_UNLOCK(rnh); if (rt == NULL) /* XXX looks bogus */ senderr(ESRCH); + RT_LOCK(rt); rt->rt_refcnt++; switch(rtm->rtm_type) { case RTM_GET: report: + RT_LOCK_ASSERT(rt); info.rti_info[RTAX_DST] = rt_key(rt); info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; info.rti_info[RTAX_NETMASK] = rt_mask(rt); info.rti_info[RTAX_GENMASK] = rt->rt_genmask; if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) { ifp = rt->rt_ifp; if (ifp) { info.rti_info[RTAX_IFP] = TAILQ_FIRST(&ifp->if_addrhead)->ifa_addr; info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; if (ifp->if_flags & IFF_POINTOPOINT) info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; rtm->rtm_index = ifp->if_index; } else { info.rti_info[RTAX_IFP] = 0; info.rti_info[RTAX_IFA] = 0; } } len = rt_msg2(rtm->rtm_type, &info, (caddr_t)0, (struct walkarg *)0); if (len > rtm->rtm_msglen) { struct rt_msghdr *new_rtm; R_Malloc(new_rtm, struct rt_msghdr *, len); if (new_rtm == 0) { + RT_UNLOCK(rt); senderr(ENOBUFS); } Bcopy(rtm, new_rtm, rtm->rtm_msglen); Free(rtm); rtm = new_rtm; } (void)rt_msg2(rtm->rtm_type, &info, (caddr_t)rtm, (struct walkarg *)0); rtm->rtm_flags = rt->rt_flags; rtm->rtm_rmx = rt->rt_rmx; rtm->rtm_addrs = info.rti_addrs; break; case RTM_CHANGE: /* * New gateway could require new ifaddr, ifp; * flags may also be different; ifp may be specified * by ll sockaddr when protocol address is ambiguous */ if (((rt->rt_flags & RTF_GATEWAY) && info.rti_info[RTAX_GATEWAY] != NULL) || info.rti_info[RTAX_IFP] != NULL || (info.rti_info[RTAX_IFA] != NULL && !sa_equal(info.rti_info[RTAX_IFA], rt->rt_ifa->ifa_addr))) { if ((error = rt_getifa(&info)) != 0) { + RT_UNLOCK(rt); senderr(error); } } if (info.rti_info[RTAX_GATEWAY] != NULL && (error = rt_setgate(rt, rt_key(rt), info.rti_info[RTAX_GATEWAY])) != 0) { + RT_UNLOCK(rt); senderr(error); } if ((ifa = info.rti_ifa) != NULL) { struct ifaddr *oifa = rt->rt_ifa; if (oifa != ifa) { if (oifa) { if (oifa->ifa_rtrequest) oifa->ifa_rtrequest( RTM_DELETE, rt, &info); IFAFREE(oifa); } IFAREF(ifa); rt->rt_ifa = ifa; rt->rt_ifp = info.rti_ifp; } } rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, &rt->rt_rmx); if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest) rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, &info); if (info.rti_info[RTAX_GENMASK]) rt->rt_genmask = info.rti_info[RTAX_GENMASK]; /* FALLTHROUGH */ case RTM_LOCK: rt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits); rt->rt_rmx.rmx_locks |= (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks); break; } + RT_UNLOCK(rt); break; default: senderr(EOPNOTSUPP); } flush: if (rtm) { if (error) rtm->rtm_errno = error; else rtm->rtm_flags |= RTF_DONE; } if (rt) /* XXX can this be true? */ RTFREE(rt); { register struct rawcb *rp = 0; /* * Check to see if we don't want our own messages. */ if ((so->so_options & SO_USELOOPBACK) == 0) { if (route_cb.any_count <= 1) { if (rtm) Free(rtm); m_freem(m); return (error); } /* There is another listener, so construct message */ rp = sotorawcb(so); } if (rtm) { m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm); if (m->m_pkthdr.len < rtm->rtm_msglen) { m_freem(m); m = NULL; } else if (m->m_pkthdr.len > rtm->rtm_msglen) m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len); Free(rtm); } if (m) { if (rp) { /* * XXX insure we don't get a copy by * invalidating our protocol */ unsigned short family = rp->rcb_proto.sp_family; rp->rcb_proto.sp_family = 0; rt_dispatch(m, info.rti_info[RTAX_DST]); rp->rcb_proto.sp_family = family; } else rt_dispatch(m, info.rti_info[RTAX_DST]); } } return (error); #undef sa_equal } static void rt_setmetrics(u_long which, struct rt_metrics *in, struct rt_metrics *out) { #define metric(f, e) if (which & (f)) out->e = in->e; metric(RTV_RPIPE, rmx_recvpipe); metric(RTV_SPIPE, rmx_sendpipe); metric(RTV_SSTHRESH, rmx_ssthresh); metric(RTV_RTT, rmx_rtt); metric(RTV_RTTVAR, rmx_rttvar); metric(RTV_HOPCOUNT, rmx_hopcount); metric(RTV_MTU, rmx_mtu); metric(RTV_EXPIRE, rmx_expire); #undef metric } #define ROUNDUP(a) \ ((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long)) /* * Extract the addresses of the passed sockaddrs. * Do a little sanity checking so as to avoid bad memory references. * This data is derived straight from userland. */ static int rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo) { #define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len)) register struct sockaddr *sa; register int i; for (i = 0; i < RTAX_MAX && cp < cplim; i++) { if ((rtinfo->rti_addrs & (1 << i)) == 0) continue; sa = (struct sockaddr *)cp; /* * It won't fit. */ if (cp + sa->sa_len > cplim) return (EINVAL); /* * there are no more.. quit now * If there are more bits, they are in error. * I've seen this. route(1) can evidently generate these. * This causes kernel to core dump. * for compatibility, If we see this, point to a safe address. */ if (sa->sa_len == 0) { rtinfo->rti_info[i] = &sa_zero; return (0); /* should be EINVAL but for compat */ } /* accept it */ rtinfo->rti_info[i] = sa; ADVANCE(cp, sa); } return (0); #undef ADVANCE } static struct mbuf * rt_msg1(int type, struct rt_addrinfo *rtinfo) { register struct rt_msghdr *rtm; register struct mbuf *m; register int i; register struct sockaddr *sa; int len, dlen; switch (type) { case RTM_DELADDR: case RTM_NEWADDR: len = sizeof(struct ifa_msghdr); break; case RTM_DELMADDR: case RTM_NEWMADDR: len = sizeof(struct ifma_msghdr); break; case RTM_IFINFO: len = sizeof(struct if_msghdr); break; case RTM_IFANNOUNCE: len = sizeof(struct if_announcemsghdr); break; default: len = sizeof(struct rt_msghdr); } if (len > MCLBYTES) panic("rt_msg1"); m = m_gethdr(M_DONTWAIT, MT_DATA); if (m && len > MHLEN) { MCLGET(m, M_DONTWAIT); if ((m->m_flags & M_EXT) == 0) { m_free(m); m = NULL; } } if (m == 0) return (m); m->m_pkthdr.len = m->m_len = len; m->m_pkthdr.rcvif = 0; rtm = mtod(m, struct rt_msghdr *); bzero((caddr_t)rtm, len); for (i = 0; i < RTAX_MAX; i++) { if ((sa = rtinfo->rti_info[i]) == NULL) continue; rtinfo->rti_addrs |= (1 << i); dlen = ROUNDUP(sa->sa_len); m_copyback(m, len, dlen, (caddr_t)sa); len += dlen; } if (m->m_pkthdr.len != len) { m_freem(m); return (NULL); } rtm->rtm_msglen = len; rtm->rtm_version = RTM_VERSION; rtm->rtm_type = type; return (m); } static int rt_msg2(int type, struct rt_addrinfo *rtinfo, caddr_t cp, struct walkarg *w) { register int i; int len, dlen, second_time = 0; caddr_t cp0; rtinfo->rti_addrs = 0; again: switch (type) { case RTM_DELADDR: case RTM_NEWADDR: len = sizeof(struct ifa_msghdr); break; case RTM_IFINFO: len = sizeof(struct if_msghdr); break; default: len = sizeof(struct rt_msghdr); } cp0 = cp; if (cp0) cp += len; for (i = 0; i < RTAX_MAX; i++) { register struct sockaddr *sa; if ((sa = rtinfo->rti_info[i]) == 0) continue; rtinfo->rti_addrs |= (1 << i); dlen = ROUNDUP(sa->sa_len); if (cp) { bcopy((caddr_t)sa, cp, (unsigned)dlen); cp += dlen; } len += dlen; } len = ALIGN(len); if (cp == 0 && w != NULL && !second_time) { register struct walkarg *rw = w; if (rw->w_req) { if (rw->w_tmemsize < len) { if (rw->w_tmem) free(rw->w_tmem, M_RTABLE); rw->w_tmem = (caddr_t) malloc(len, M_RTABLE, M_NOWAIT); if (rw->w_tmem) rw->w_tmemsize = len; } if (rw->w_tmem) { cp = rw->w_tmem; second_time = 1; goto again; } } } if (cp) { register struct rt_msghdr *rtm = (struct rt_msghdr *)cp0; rtm->rtm_version = RTM_VERSION; rtm->rtm_type = type; rtm->rtm_msglen = len; } return (len); } /* * This routine is called to generate a message from the routing * socket indicating that a redirect has occured, a routing lookup * has failed, or that a protocol has detected timeouts to a particular * destination. */ void rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error) { struct rt_msghdr *rtm; struct mbuf *m; struct sockaddr *sa = rtinfo->rti_info[RTAX_DST]; if (route_cb.any_count == 0) return; m = rt_msg1(type, rtinfo); if (m == 0) return; rtm = mtod(m, struct rt_msghdr *); rtm->rtm_flags = RTF_DONE | flags; rtm->rtm_errno = error; rtm->rtm_addrs = rtinfo->rti_addrs; rt_dispatch(m, sa); } /* * This routine is called to generate a message from the routing * socket indicating that the status of a network interface has changed. */ void rt_ifmsg(struct ifnet *ifp) { struct if_msghdr *ifm; struct mbuf *m; struct rt_addrinfo info; if (route_cb.any_count == 0) return; bzero((caddr_t)&info, sizeof(info)); m = rt_msg1(RTM_IFINFO, &info); if (m == 0) return; ifm = mtod(m, struct if_msghdr *); ifm->ifm_index = ifp->if_index; ifm->ifm_flags = ifp->if_flags; ifm->ifm_data = ifp->if_data; ifm->ifm_addrs = 0; rt_dispatch(m, NULL); } /* * This is called to generate messages from the routing socket * indicating a network interface has had addresses associated with it. * if we ever reverse the logic and replace messages TO the routing * socket indicate a request to configure interfaces, then it will * be unnecessary as the routing socket will automatically generate * copies of it. */ void rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt) { struct rt_addrinfo info; struct sockaddr *sa = 0; int pass; struct mbuf *m = 0; struct ifnet *ifp = ifa->ifa_ifp; if (route_cb.any_count == 0) return; for (pass = 1; pass < 3; pass++) { bzero((caddr_t)&info, sizeof(info)); if ((cmd == RTM_ADD && pass == 1) || (cmd == RTM_DELETE && pass == 2)) { register struct ifa_msghdr *ifam; int ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR; info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr; info.rti_info[RTAX_IFP] = TAILQ_FIRST(&ifp->if_addrhead)->ifa_addr; info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; if ((m = rt_msg1(ncmd, &info)) == NULL) continue; ifam = mtod(m, struct ifa_msghdr *); ifam->ifam_index = ifp->if_index; ifam->ifam_metric = ifa->ifa_metric; ifam->ifam_flags = ifa->ifa_flags; ifam->ifam_addrs = info.rti_addrs; } if ((cmd == RTM_ADD && pass == 2) || (cmd == RTM_DELETE && pass == 1)) { register struct rt_msghdr *rtm; if (rt == 0) continue; info.rti_info[RTAX_NETMASK] = rt_mask(rt); info.rti_info[RTAX_DST] = sa = rt_key(rt); info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; if ((m = rt_msg1(cmd, &info)) == NULL) continue; rtm = mtod(m, struct rt_msghdr *); rtm->rtm_index = ifp->if_index; rtm->rtm_flags |= rt->rt_flags; rtm->rtm_errno = error; rtm->rtm_addrs = info.rti_addrs; } rt_dispatch(m, sa); } } /* * This is the analogue to the rt_newaddrmsg which performs the same * function but for multicast group memberhips. This is easier since * there is no route state to worry about. */ void rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma) { struct rt_addrinfo info; struct mbuf *m = 0; struct ifnet *ifp = ifma->ifma_ifp; struct ifma_msghdr *ifmam; if (route_cb.any_count == 0) return; bzero((caddr_t)&info, sizeof(info)); info.rti_info[RTAX_IFA] = ifma->ifma_addr; if (ifp && TAILQ_FIRST(&ifp->if_addrhead)) info.rti_info[RTAX_IFP] = TAILQ_FIRST(&ifp->if_addrhead)->ifa_addr; else info.rti_info[RTAX_IFP] = NULL; /* * If a link-layer address is present, present it as a ``gateway'' * (similarly to how ARP entries, e.g., are presented). */ info.rti_info[RTAX_GATEWAY] = ifma->ifma_lladdr; m = rt_msg1(cmd, &info); if (m == NULL) return; ifmam = mtod(m, struct ifma_msghdr *); ifmam->ifmam_index = ifp->if_index; ifmam->ifmam_addrs = info.rti_addrs; rt_dispatch(m, ifma->ifma_addr); } /* * This is called to generate routing socket messages indicating * network interface arrival and departure. */ void rt_ifannouncemsg(struct ifnet *ifp, int what) { struct if_announcemsghdr *ifan; struct mbuf *m; struct rt_addrinfo info; if (route_cb.any_count == 0) return; bzero((caddr_t)&info, sizeof(info)); m = rt_msg1(RTM_IFANNOUNCE, &info); if (m == NULL) return; ifan = mtod(m, struct if_announcemsghdr *); ifan->ifan_index = ifp->if_index; snprintf(ifan->ifan_name, sizeof(ifan->ifan_name), "%s%d", ifp->if_name, ifp->if_unit); ifan->ifan_what = what; rt_dispatch(m, NULL); } static void rt_dispatch(struct mbuf *m, struct sockaddr *sa) { struct sockproto route_proto; route_proto.sp_family = PF_ROUTE; route_proto.sp_protocol = sa ? sa->sa_family : 0; raw_input(m, &route_proto, &route_src, &route_dst); } /* * This is used in dumping the kernel table via sysctl(). */ static int sysctl_dumpentry(struct radix_node *rn, void *vw) { struct walkarg *w = vw; struct rtentry *rt = (struct rtentry *)rn; int error = 0, size; struct rt_addrinfo info; if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg)) return 0; bzero((caddr_t)&info, sizeof(info)); info.rti_info[RTAX_DST] = rt_key(rt); info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; info.rti_info[RTAX_NETMASK] = rt_mask(rt); info.rti_info[RTAX_GENMASK] = rt->rt_genmask; if (rt->rt_ifp) { info.rti_info[RTAX_IFP] = TAILQ_FIRST(&rt->rt_ifp->if_addrhead)->ifa_addr; info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; if (rt->rt_ifp->if_flags & IFF_POINTOPOINT) info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; } size = rt_msg2(RTM_GET, &info, 0, w); if (w->w_req && w->w_tmem) { struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem; rtm->rtm_flags = rt->rt_flags; rtm->rtm_use = rt->rt_use; rtm->rtm_rmx = rt->rt_rmx; rtm->rtm_index = rt->rt_ifp->if_index; rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0; rtm->rtm_addrs = info.rti_addrs; error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size); return (error); } return (error); } static int sysctl_iflist(int af, struct walkarg *w) { struct ifnet *ifp; struct ifaddr *ifa; struct rt_addrinfo info; int len, error = 0; bzero((caddr_t)&info, sizeof(info)); /* IFNET_RLOCK(); */ /* could sleep XXX */ TAILQ_FOREACH(ifp, &ifnet, if_link) { if (w->w_arg && w->w_arg != ifp->if_index) continue; ifa = TAILQ_FIRST(&ifp->if_addrhead); info.rti_info[RTAX_IFP] = ifa->ifa_addr; len = rt_msg2(RTM_IFINFO, &info, (caddr_t)0, w); info.rti_info[RTAX_IFP] = 0; if (w->w_req && w->w_tmem) { struct if_msghdr *ifm; ifm = (struct if_msghdr *)w->w_tmem; ifm->ifm_index = ifp->if_index; ifm->ifm_flags = ifp->if_flags; ifm->ifm_data = ifp->if_data; ifm->ifm_addrs = info.rti_addrs; error = SYSCTL_OUT(w->w_req,(caddr_t)ifm, len); if (error) goto done; } while ((ifa = TAILQ_NEXT(ifa, ifa_link)) != 0) { if (af && af != ifa->ifa_addr->sa_family) continue; if (jailed(curthread->td_ucred) && prison_if(curthread->td_ucred, ifa->ifa_addr)) continue; info.rti_info[RTAX_IFA] = ifa->ifa_addr; info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; len = rt_msg2(RTM_NEWADDR, &info, 0, w); if (w->w_req && w->w_tmem) { struct ifa_msghdr *ifam; ifam = (struct ifa_msghdr *)w->w_tmem; ifam->ifam_index = ifa->ifa_ifp->if_index; ifam->ifam_flags = ifa->ifa_flags; ifam->ifam_metric = ifa->ifa_metric; ifam->ifam_addrs = info.rti_addrs; error = SYSCTL_OUT(w->w_req, w->w_tmem, len); if (error) goto done; } } info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] = info.rti_info[RTAX_BRD] = 0; } done: /* IFNET_RUNLOCK(); */ /* XXX */ return (error); } static int sysctl_rtsock(SYSCTL_HANDLER_ARGS) { int *name = (int *)arg1; u_int namelen = arg2; struct radix_node_head *rnh; int i, s, error = EINVAL; u_char af; struct walkarg w; name ++; namelen--; if (req->newptr) return (EPERM); if (namelen != 3) return ((namelen < 3) ? EISDIR : ENOTDIR); af = name[0]; if (af > AF_MAX) return (EINVAL); Bzero(&w, sizeof(w)); w.w_op = name[1]; w.w_arg = name[2]; w.w_req = req; s = splnet(); switch (w.w_op) { case NET_RT_DUMP: case NET_RT_FLAGS: if (af != 0) { if ((rnh = rt_tables[af]) != NULL) { /* RADIX_NODE_HEAD_LOCK(rnh); */ error = rnh->rnh_walktree(rnh, sysctl_dumpentry, &w);/* could sleep XXX */ /* RADIX_NODE_HEAD_UNLOCK(rnh); */ } else error = EAFNOSUPPORT; } else { for (i = 1; i <= AF_MAX; i++) if ((rnh = rt_tables[i]) != NULL) { /* RADIX_NODE_HEAD_LOCK(rnh); */ error = rnh->rnh_walktree(rnh, sysctl_dumpentry, &w); /* RADIX_NODE_HEAD_UNLOCK(rnh); */ if (error) break; } } break; case NET_RT_IFLIST: error = sysctl_iflist(af, &w); } splx(s); if (w.w_tmem) free(w.w_tmem, M_RTABLE); return (error); } SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD, sysctl_rtsock, ""); /* * Definitions of protocols supported in the ROUTE domain. */ extern struct domain routedomain; /* or at least forward */ static struct protosw routesw[] = { { SOCK_RAW, &routedomain, 0, PR_ATOMIC|PR_ADDR, 0, route_output, raw_ctlinput, 0, 0, raw_init, 0, 0, 0, &route_usrreqs } }; static struct domain routedomain = { PF_ROUTE, "route", 0, 0, 0, routesw, &routesw[sizeof(routesw)/sizeof(routesw[0])] }; DOMAIN_SET(route); Index: head/sys/netinet/if_atm.c =================================================================== --- head/sys/netinet/if_atm.c (revision 120726) +++ head/sys/netinet/if_atm.c (revision 120727) @@ -1,358 +1,360 @@ /* $NetBSD: if_atm.c,v 1.6 1996/10/13 02:03:01 christos Exp $ */ /* * * Copyright (c) 1996 Charles D. Cranor and Washington University. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Charles D. Cranor and * Washington University. * 4. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * IP <=> ATM address resolution. */ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_natm.h" #if defined(INET) || defined(INET6) #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef NATM #include #endif #define SDL(s) ((struct sockaddr_dl *)s) #define GET3BYTE(V, A, L) do { \ (V) = ((A)[0] << 16) | ((A)[1] << 8) | (A)[2]; \ (A) += 3; \ (L) -= 3; \ } while (0) #define GET2BYTE(V, A, L) do { \ (V) = ((A)[0] << 8) | (A)[1]; \ (A) += 2; \ (L) -= 2; \ } while (0) #define GET1BYTE(V, A, L) do { \ (V) = *(A)++; \ (L)--; \ } while (0) /* * atm_rtrequest: handle ATM rt request (in support of generic code) * inputs: "req" = request code * "rt" = route entry * "info" = rt_addrinfo */ void atm_rtrequest(int req, struct rtentry *rt, struct rt_addrinfo *info) { struct sockaddr *gate = rt->rt_gateway; struct atmio_openvcc op; struct atmio_closevcc cl; u_char *addr; u_int alen; #ifdef NATM struct sockaddr_in *sin; struct natmpcb *npcb = NULL; #endif static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK}; if (rt->rt_flags & RTF_GATEWAY) /* link level requests only */ return; switch (req) { case RTM_RESOLVE: /* resolve: only happens when cloning */ printf("atm_rtrequest: RTM_RESOLVE request detected?\n"); break; case RTM_ADD: /* * route added by a command (e.g. ifconfig, route, arp...). * * first check to see if this is not a host route, in which * case we are being called via "ifconfig" to set the address. */ if ((rt->rt_flags & RTF_HOST) == 0) { rt_setgate(rt,rt_key(rt),(struct sockaddr *)&null_sdl); gate = rt->rt_gateway; SDL(gate)->sdl_type = rt->rt_ifp->if_type; SDL(gate)->sdl_index = rt->rt_ifp->if_index; break; } if ((rt->rt_flags & RTF_CLONING) != 0) { printf("atm_rtrequest: cloning route detected?\n"); break; } if (gate->sa_family != AF_LINK || gate->sa_len < sizeof(null_sdl)) { log(LOG_DEBUG, "atm_rtrequest: bad gateway value"); break; } KASSERT(rt->rt_ifp->if_ioctl != NULL, ("atm_rtrequest: null ioctl")); /* * Parse and verify the link level address as * an open request */ bzero(&op, sizeof(op)); addr = LLADDR(SDL(gate)); alen = SDL(gate)->sdl_alen; if (alen < 4) { printf("%s: bad link-level address\n", __func__); goto failed; } if (alen == 4) { /* old type address */ GET1BYTE(op.param.flags, addr, alen); GET1BYTE(op.param.vpi, addr, alen); GET2BYTE(op.param.vci, addr, alen); op.param.traffic = ATMIO_TRAFFIC_UBR; op.param.aal = (op.param.flags & ATM_PH_AAL5) ? ATMIO_AAL_5 : ATMIO_AAL_0; } else { /* new address */ op.param.aal = ATMIO_AAL_5; GET1BYTE(op.param.flags, addr, alen); op.param.flags &= ATM_PH_LLCSNAP; GET1BYTE(op.param.vpi, addr, alen); GET2BYTE(op.param.vci, addr, alen); GET1BYTE(op.param.traffic, addr, alen); switch (op.param.traffic) { case ATMIO_TRAFFIC_UBR: if (alen >= 3) GET3BYTE(op.param.tparam.pcr, addr, alen); break; case ATMIO_TRAFFIC_CBR: if (alen < 3) goto bad_param; GET3BYTE(op.param.tparam.pcr, addr, alen); break; case ATMIO_TRAFFIC_VBR: if (alen < 3 * 3) goto bad_param; GET3BYTE(op.param.tparam.pcr, addr, alen); GET3BYTE(op.param.tparam.scr, addr, alen); GET3BYTE(op.param.tparam.mbs, addr, alen); break; case ATMIO_TRAFFIC_ABR: if (alen < 4 * 3 + 2 + 1 * 2 + 3) goto bad_param; GET3BYTE(op.param.tparam.pcr, addr, alen); GET3BYTE(op.param.tparam.mcr, addr, alen); GET3BYTE(op.param.tparam.icr, addr, alen); GET3BYTE(op.param.tparam.tbe, addr, alen); GET1BYTE(op.param.tparam.nrm, addr, alen); GET1BYTE(op.param.tparam.trm, addr, alen); GET2BYTE(op.param.tparam.adtf, addr, alen); GET1BYTE(op.param.tparam.rif, addr, alen); GET1BYTE(op.param.tparam.rdf, addr, alen); GET1BYTE(op.param.tparam.cdf, addr, alen); break; default: bad_param: printf("%s: bad traffic params\n", __func__); goto failed; } } op.param.rmtu = op.param.tmtu = rt->rt_ifp->if_mtu; #ifdef NATM /* * let native ATM know we are using this VCI/VPI * (i.e. reserve it) */ sin = (struct sockaddr_in *) rt_key(rt); if (sin->sin_family != AF_INET) goto failed; npcb = npcb_add(NULL, rt->rt_ifp, op.param.vci, op.param.vpi); if (npcb == NULL) goto failed; npcb->npcb_flags |= NPCB_IP; npcb->ipaddr.s_addr = sin->sin_addr.s_addr; /* XXX: move npcb to llinfo when ATM ARP is ready */ rt->rt_llinfo = (caddr_t) npcb; rt->rt_flags |= RTF_LLINFO; #endif /* * let the lower level know this circuit is active */ op.rxhand = NULL; op.param.flags |= ATMIO_FLAG_ASYNC; if (rt->rt_ifp->if_ioctl(rt->rt_ifp, SIOCATMOPENVCC, (caddr_t)&op) != 0) { printf("atm: couldn't add VC\n"); goto failed; } SDL(gate)->sdl_type = rt->rt_ifp->if_type; SDL(gate)->sdl_index = rt->rt_ifp->if_index; break; failed: #ifdef NATM if (npcb) { npcb_free(npcb, NPCB_DESTROY); rt->rt_llinfo = NULL; rt->rt_flags &= ~RTF_LLINFO; } #endif /* mark as invalid. We cannot RTM_DELETE the route from * here, because the recursive call to rtrequest1 does * not really work. */ rt->rt_flags |= RTF_REJECT; break; case RTM_DELETE: #ifdef NATM /* * tell native ATM we are done with this VC */ if (rt->rt_flags & RTF_LLINFO) { npcb_free((struct natmpcb *)rt->rt_llinfo, NPCB_DESTROY); rt->rt_llinfo = NULL; rt->rt_flags &= ~RTF_LLINFO; } #endif /* * tell the lower layer to disable this circuit */ bzero(&op, sizeof(op)); addr = LLADDR(SDL(gate)); addr++; cl.vpi = *addr++; cl.vci = *addr++ << 8; cl.vci |= *addr++; (void)rt->rt_ifp->if_ioctl(rt->rt_ifp, SIOCATMCLOSEVCC, (caddr_t)&cl); break; } } /* * atmresolve: * inputs: * [1] "rt" = the link level route to use (or null if need to look one up) * [2] "m" = mbuf containing the data to be sent * [3] "dst" = sockaddr_in (IP) address of dest. * output: * [4] "desten" = ATM pseudo header which we will fill in VPI/VCI info * return: * 0 == resolve FAILED; note that "m" gets m_freem'd in this case * 1 == resolve OK; desten contains result * * XXX: will need more work if we wish to support ATMARP in the kernel, * but this is enough for PVCs entered via the "route" command. */ int atmresolve(struct rtentry *rt, struct mbuf *m, struct sockaddr *dst, struct atm_pseudohdr *desten) { struct sockaddr_dl *sdl; if (m->m_flags & (M_BCAST | M_MCAST)) { log(LOG_INFO, "atmresolve: BCAST/MCAST packet detected/dumped"); goto bad; } if (rt == NULL) { rt = RTALLOC1(dst, 0); if (rt == NULL) goto bad; /* failed */ rt->rt_refcnt--; /* don't keep LL references */ if ((rt->rt_flags & RTF_GATEWAY) != 0 || (rt->rt_flags & RTF_LLINFO) == 0 || /* XXX: are we using LLINFO? */ rt->rt_gateway->sa_family != AF_LINK) { + RT_UNLOCK(rt); goto bad; } + RT_UNLOCK(rt); } /* * note that rt_gateway is a sockaddr_dl which contains the * atm_pseudohdr data structure for this route. we currently * don't need any rt_llinfo info (but will if we want to support * ATM ARP [c.f. if_ether.c]). */ sdl = SDL(rt->rt_gateway); /* * Check the address family and length is valid, the address * is resolved; otherwise, try to resolve. */ if (sdl->sdl_family == AF_LINK && sdl->sdl_alen >= sizeof(*desten)) { bcopy(LLADDR(sdl), desten, sizeof(*desten)); return (1); /* ok, go for it! */ } /* * we got an entry, but it doesn't have valid link address * info in it (it is prob. the interface route, which has * sdl_alen == 0). dump packet. (fall through to "bad"). */ bad: m_freem(m); return (0); } #endif /* INET */ Index: head/sys/netinet/if_ether.c =================================================================== --- head/sys/netinet/if_ether.c (revision 120726) +++ head/sys/netinet/if_ether.c (revision 120727) @@ -1,970 +1,991 @@ /* * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)if_ether.c 8.1 (Berkeley) 6/10/93 * $FreeBSD$ */ /* * Ethernet address resolution protocol. * TODO: * add "inuse/lock" bit (or ref. count) along with valid bit */ #include "opt_inet.h" #include "opt_bdg.h" #include "opt_mac.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef BRIDGE #include #include #endif #include #include #include #include #include #define SIN(s) ((struct sockaddr_in *)s) #define SDL(s) ((struct sockaddr_dl *)s) SYSCTL_DECL(_net_link_ether); SYSCTL_NODE(_net_link_ether, PF_INET, inet, CTLFLAG_RW, 0, ""); /* timer values */ static int arpt_prune = (5*60*1); /* walk list every 5 minutes */ static int arpt_keep = (20*60); /* once resolved, good for 20 more minutes */ static int arpt_down = 20; /* once declared down, don't send for 20 sec */ SYSCTL_INT(_net_link_ether_inet, OID_AUTO, prune_intvl, CTLFLAG_RW, &arpt_prune, 0, ""); SYSCTL_INT(_net_link_ether_inet, OID_AUTO, max_age, CTLFLAG_RW, &arpt_keep, 0, ""); SYSCTL_INT(_net_link_ether_inet, OID_AUTO, host_down_time, CTLFLAG_RW, &arpt_down, 0, ""); #define rt_expire rt_rmx.rmx_expire struct llinfo_arp { LIST_ENTRY(llinfo_arp) la_le; struct rtentry *la_rt; struct mbuf *la_hold; /* last packet until resolved/timeout */ u_short la_preempt; /* countdown for pre-expiry arps */ u_short la_asked; /* #times we QUERIED following expiration */ #define la_timer la_rt->rt_rmx.rmx_expire /* deletion time in seconds */ }; static LIST_HEAD(, llinfo_arp) llinfo_arp; static struct ifqueue arpintrq; -static int arp_inuse, arp_allocated, arpinit_done; +static int arp_allocated; +static int arpinit_done; static int arp_maxtries = 5; static int useloopback = 1; /* use loopback interface for local traffic */ static int arp_proxyall = 0; +static struct callout arp_callout; SYSCTL_INT(_net_link_ether_inet, OID_AUTO, maxtries, CTLFLAG_RW, &arp_maxtries, 0, ""); SYSCTL_INT(_net_link_ether_inet, OID_AUTO, useloopback, CTLFLAG_RW, &useloopback, 0, ""); SYSCTL_INT(_net_link_ether_inet, OID_AUTO, proxyall, CTLFLAG_RW, &arp_proxyall, 0, ""); static void arp_init(void); static void arp_rtrequest(int, struct rtentry *, struct rt_addrinfo *); static void arprequest(struct ifnet *, struct in_addr *, struct in_addr *, u_char *); static void arpintr(struct mbuf *); static void arptfree(struct llinfo_arp *); static void arptimer(void *); static struct llinfo_arp *arplookup(u_long, int, int); #ifdef INET static void in_arpinput(struct mbuf *); #endif /* * Timeout routine. Age arp_tab entries periodically. */ /* ARGSUSED */ static void arptimer(ignored_arg) void *ignored_arg; { struct llinfo_arp *la, *ola; - int s = splnet(); RADIX_NODE_HEAD_LOCK(rt_tables[AF_INET]); la = LIST_FIRST(&llinfo_arp); while (la != NULL) { struct rtentry *rt = la->la_rt; ola = la; la = LIST_NEXT(la, la_le); if (rt->rt_expire && rt->rt_expire <= time_second) arptfree(ola); /* timer has expired, clear */ } RADIX_NODE_HEAD_UNLOCK(rt_tables[AF_INET]); - splx(s); - timeout(arptimer, NULL, arpt_prune * hz); + + callout_reset(&arp_callout, arpt_prune * hz, arptimer, NULL); } /* * Parallel to llc_rtrequest. */ static void arp_rtrequest(req, rt, info) int req; register struct rtentry *rt; struct rt_addrinfo *info; { - register struct sockaddr *gate = rt->rt_gateway; - register struct llinfo_arp *la = (struct llinfo_arp *)rt->rt_llinfo; + register struct sockaddr *gate; + register struct llinfo_arp *la; static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK}; + RT_LOCK_ASSERT(rt); + if (!arpinit_done) { arpinit_done = 1; - timeout(arptimer, (caddr_t)0, hz); + callout_reset(&arp_callout, hz, arptimer, NULL); } if (rt->rt_flags & RTF_GATEWAY) return; + gate = rt->rt_gateway; + la = (struct llinfo_arp *)rt->rt_llinfo; switch (req) { case RTM_ADD: /* * XXX: If this is a manually added route to interface * such as older version of routed or gated might provide, * restore cloning bit. */ if ((rt->rt_flags & RTF_HOST) == 0 && SIN(rt_mask(rt))->sin_addr.s_addr != 0xffffffff) rt->rt_flags |= RTF_CLONING; if (rt->rt_flags & RTF_CLONING) { /* * Case 1: This route should come from a route to iface. */ rt_setgate(rt, rt_key(rt), (struct sockaddr *)&null_sdl); gate = rt->rt_gateway; SDL(gate)->sdl_type = rt->rt_ifp->if_type; SDL(gate)->sdl_index = rt->rt_ifp->if_index; rt->rt_expire = time_second; break; } /* Announce a new entry if requested. */ if (rt->rt_flags & RTF_ANNOUNCE) arprequest(rt->rt_ifp, &SIN(rt_key(rt))->sin_addr, &SIN(rt_key(rt))->sin_addr, (u_char *)LLADDR(SDL(gate))); /*FALLTHROUGH*/ case RTM_RESOLVE: if (gate->sa_family != AF_LINK || gate->sa_len < sizeof(null_sdl)) { - log(LOG_DEBUG, "arp_rtrequest: bad gateway %s%s\n", + log(LOG_DEBUG, "%s: bad gateway %s%s\n", __func__, inet_ntoa(SIN(rt_key(rt))->sin_addr), (gate->sa_family != AF_LINK) ? " (!AF_LINK)": ""); break; } SDL(gate)->sdl_type = rt->rt_ifp->if_type; SDL(gate)->sdl_index = rt->rt_ifp->if_index; if (la != 0) break; /* This happens on a route change */ /* * Case 2: This route may come from cloning, or a manual route * add with a LL address. */ - R_Malloc(la, struct llinfo_arp *, sizeof(*la)); + R_Zalloc(la, struct llinfo_arp *, sizeof(*la)); rt->rt_llinfo = (caddr_t)la; if (la == 0) { - log(LOG_DEBUG, "arp_rtrequest: malloc failed\n"); + log(LOG_DEBUG, "%s: malloc failed\n", __func__); break; } - arp_inuse++, arp_allocated++; - Bzero(la, sizeof(*la)); + arp_allocated++; la->la_rt = rt; rt->rt_flags |= RTF_LLINFO; RADIX_NODE_HEAD_LOCK_ASSERT(rt_tables[AF_INET]); LIST_INSERT_HEAD(&llinfo_arp, la, la_le); #ifdef INET /* * This keeps the multicast addresses from showing up * in `arp -a' listings as unresolved. It's not actually * functional. Then the same for broadcast. */ if (IN_MULTICAST(ntohl(SIN(rt_key(rt))->sin_addr.s_addr)) && rt->rt_ifp->if_type != IFT_ARCNET) { ETHER_MAP_IP_MULTICAST(&SIN(rt_key(rt))->sin_addr, LLADDR(SDL(gate))); SDL(gate)->sdl_alen = 6; rt->rt_expire = 0; } if (in_broadcast(SIN(rt_key(rt))->sin_addr, rt->rt_ifp)) { memcpy(LLADDR(SDL(gate)), rt->rt_ifp->if_broadcastaddr, rt->rt_ifp->if_addrlen); SDL(gate)->sdl_alen = rt->rt_ifp->if_addrlen; rt->rt_expire = 0; } #endif if (SIN(rt_key(rt))->sin_addr.s_addr == (IA_SIN(rt->rt_ifa))->sin_addr.s_addr) { /* * This test used to be * if (loif.if_flags & IFF_UP) * It allowed local traffic to be forced * through the hardware by configuring the loopback down. * However, it causes problems during network configuration * for boards that can't receive packets they send. * It is now necessary to clear "useloopback" and remove * the route to force traffic out to the hardware. */ rt->rt_expire = 0; Bcopy(IF_LLADDR(rt->rt_ifp), LLADDR(SDL(gate)), SDL(gate)->sdl_alen = rt->rt_ifp->if_addrlen); if (useloopback) rt->rt_ifp = loif; } break; case RTM_DELETE: if (la == 0) break; - arp_inuse--; RADIX_NODE_HEAD_LOCK_ASSERT(rt_tables[AF_INET]); LIST_REMOVE(la, la_le); rt->rt_llinfo = 0; rt->rt_flags &= ~RTF_LLINFO; if (la->la_hold) m_freem(la->la_hold); Free((caddr_t)la); } } /* * Broadcast an ARP request. Caller specifies: * - arp header source ip address * - arp header target ip address * - arp header source ethernet address */ static void arprequest(ifp, sip, tip, enaddr) register struct ifnet *ifp; register struct in_addr *sip, *tip; register u_char *enaddr; { register struct mbuf *m; register struct ether_header *eh; register struct arc_header *arh; register struct arphdr *ah; struct sockaddr sa; static u_char llcx[] = { 0x82, 0x40, LLC_SNAP_LSAP, LLC_SNAP_LSAP, LLC_UI, 0x00, 0x00, 0x00, 0x08, 0x06 }; u_short ar_hrd; if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL) return; m->m_pkthdr.rcvif = (struct ifnet *)0; #ifdef MAC mac_create_mbuf_linklayer(ifp, m); #endif switch (ifp->if_type) { case IFT_ARCNET: ar_hrd = htons(ARPHRD_ARCNET); m->m_len = arphdr_len2(ifp->if_addrlen, sizeof(struct in_addr)); m->m_pkthdr.len = m->m_len; MH_ALIGN(m, m->m_len); arh = (struct arc_header *)sa.sa_data; arh->arc_dhost = *ifp->if_broadcastaddr; arh->arc_type = ARCTYPE_ARP; ah = mtod(m, struct arphdr *); break; case IFT_ISO88025: ar_hrd = htons(ARPHRD_IEEE802); m->m_len = sizeof(llcx) + arphdr_len2(ifp->if_addrlen, sizeof(struct in_addr)); m->m_pkthdr.len = m->m_len; MH_ALIGN(m, m->m_len); (void)memcpy(mtod(m, caddr_t), llcx, sizeof(llcx)); (void)memcpy(sa.sa_data, ifp->if_broadcastaddr, 6); (void)memcpy(sa.sa_data + 6, enaddr, 6); sa.sa_data[6] |= TR_RII; sa.sa_data[12] = TR_AC; sa.sa_data[13] = TR_LLC_FRAME; ah = (struct arphdr *)(mtod(m, char *) + sizeof(llcx)); break; case IFT_FDDI: case IFT_ETHER: /* * This may not be correct for types not explicitly * listed, but this is our best guess */ default: ar_hrd = htons(ARPHRD_ETHER); m->m_len = arphdr_len2(ifp->if_addrlen, sizeof(struct in_addr)); m->m_pkthdr.len = m->m_len; MH_ALIGN(m, m->m_len); eh = (struct ether_header *)sa.sa_data; /* if_output will not swap */ eh->ether_type = htons(ETHERTYPE_ARP); (void)memcpy(eh->ether_dhost, ifp->if_broadcastaddr, sizeof(eh->ether_dhost)); ah = mtod(m, struct arphdr *); break; } ah->ar_hrd = ar_hrd; ah->ar_pro = htons(ETHERTYPE_IP); ah->ar_hln = ifp->if_addrlen; /* hardware address length */ ah->ar_pln = sizeof(struct in_addr); /* protocol address length */ ah->ar_op = htons(ARPOP_REQUEST); (void)memcpy(ar_sha(ah), enaddr, ah->ar_hln); memset(ar_tha(ah), 0, ah->ar_hln); (void)memcpy(ar_spa(ah), sip, ah->ar_pln); (void)memcpy(ar_tpa(ah), tip, ah->ar_pln); sa.sa_family = AF_UNSPEC; sa.sa_len = sizeof(sa); (*ifp->if_output)(ifp, m, &sa, (struct rtentry *)0); } /* * Resolve an IP address into an ethernet address. If success, * desten is filled in. If there is no entry in arptab, * set one up and broadcast a request for the IP address. * Hold onto this mbuf and resend it once the address * is finally resolved. A return value of 1 indicates * that desten has been filled in and the packet should be sent * normally; a 0 return indicates that the packet has been * taken over here, either now or for later transmission. */ int arpresolve(ifp, rt, m, dst, desten, rt0) register struct ifnet *ifp; register struct rtentry *rt; struct mbuf *m; register struct sockaddr *dst; register u_char *desten; struct rtentry *rt0; { struct llinfo_arp *la = 0; struct sockaddr_dl *sdl; if (m->m_flags & M_BCAST) { /* broadcast */ (void)memcpy(desten, ifp->if_broadcastaddr, ifp->if_addrlen); return (1); } if (m->m_flags & M_MCAST && ifp->if_type != IFT_ARCNET) {/* multicast */ ETHER_MAP_IP_MULTICAST(&SIN(dst)->sin_addr, desten); return(1); } if (rt) la = (struct llinfo_arp *)rt->rt_llinfo; if (la == 0) { la = arplookup(SIN(dst)->sin_addr.s_addr, 1, 0); if (la) rt = la->la_rt; } if (la == 0 || rt == 0) { log(LOG_DEBUG, "arpresolve: can't allocate llinfo for %s%s%s\n", inet_ntoa(SIN(dst)->sin_addr), la ? "la" : "", rt ? "rt" : ""); m_freem(m); return (0); } sdl = SDL(rt->rt_gateway); /* * Check the address family and length is valid, the address * is resolved; otherwise, try to resolve. */ if ((rt->rt_expire == 0 || rt->rt_expire > time_second) && sdl->sdl_family == AF_LINK && sdl->sdl_alen != 0) { /* * If entry has an expiry time and it is approaching, * see if we need to send an ARP request within this * arpt_down interval. */ if ((rt->rt_expire != 0) && (time_second + la->la_preempt > rt->rt_expire)) { arprequest(ifp, &SIN(rt->rt_ifa->ifa_addr)->sin_addr, &SIN(dst)->sin_addr, IF_LLADDR(ifp)); la->la_preempt--; } bcopy(LLADDR(sdl), desten, sdl->sdl_alen); return 1; } /* * If ARP is disabled or static on this interface, stop. * XXX * Probably should not allocate empty llinfo struct if we are * not going to be sending out an arp request. */ if (ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) { m_freem(m); return (0); } /* * There is an arptab entry, but no ethernet address * response yet. Replace the held mbuf with this * latest one. */ if (la->la_hold) m_freem(la->la_hold); la->la_hold = m; if (rt->rt_expire) { + RT_LOCK(rt); rt->rt_flags &= ~RTF_REJECT; if (la->la_asked == 0 || rt->rt_expire != time_second) { rt->rt_expire = time_second; if (la->la_asked++ < arp_maxtries) { arprequest(ifp, &SIN(rt->rt_ifa->ifa_addr)->sin_addr, &SIN(dst)->sin_addr, IF_LLADDR(ifp)); } else { rt->rt_flags |= RTF_REJECT; rt->rt_expire += arpt_down; la->la_asked = 0; la->la_preempt = arp_maxtries; } } + RT_UNLOCK(rt); } return (0); } /* * Common length and type checks are done here, * then the protocol-specific routine is called. */ static void arpintr(struct mbuf *m) { struct arphdr *ar; if (!arpinit_done) { + /* NB: this race should not matter */ arpinit_done = 1; - timeout(arptimer, (caddr_t)0, hz); + callout_reset(&arp_callout, hz, arptimer, NULL); } if (m->m_len < sizeof(struct arphdr) && ((m = m_pullup(m, sizeof(struct arphdr))) == NULL)) { log(LOG_ERR, "arp: runt packet -- m_pullup failed\n"); return; } ar = mtod(m, struct arphdr *); if (ntohs(ar->ar_hrd) != ARPHRD_ETHER && ntohs(ar->ar_hrd) != ARPHRD_IEEE802 && ntohs(ar->ar_hrd) != ARPHRD_ARCNET) { log(LOG_ERR, "arp: unknown hardware address format (0x%2D)\n", (unsigned char *)&ar->ar_hrd, ""); m_freem(m); return; } if (m->m_pkthdr.len < arphdr_len(ar) && (m = m_pullup(m, arphdr_len(ar))) == NULL) { log(LOG_ERR, "arp: runt packet\n"); m_freem(m); return; } switch (ntohs(ar->ar_pro)) { #ifdef INET case ETHERTYPE_IP: in_arpinput(m); return; #endif } m_freem(m); } #ifdef INET /* * ARP for Internet protocols on 10 Mb/s Ethernet. * Algorithm is that given in RFC 826. * In addition, a sanity check is performed on the sender * protocol address, to catch impersonators. * We no longer handle negotiations for use of trailer protocol: * Formerly, ARP replied for protocol type ETHERTYPE_TRAIL sent * along with IP replies if we wanted trailers sent to us, * and also sent them in response to IP replies. * This allowed either end to announce the desire to receive * trailer packets. * We no longer reply to requests for ETHERTYPE_TRAIL protocol either, * but formerly didn't normally send requests. */ static int log_arp_wrong_iface = 1; static int log_arp_movements = 1; SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_wrong_iface, CTLFLAG_RW, &log_arp_wrong_iface, 0, "log arp packets arriving on the wrong interface"); SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_movements, CTLFLAG_RW, &log_arp_movements, 0, "log arp replies from MACs different than the one in the cache"); static void in_arpinput(m) struct mbuf *m; { register struct arphdr *ah; register struct ifnet *ifp = m->m_pkthdr.rcvif; struct ether_header *eh; struct arc_header *arh; struct iso88025_header *th = (struct iso88025_header *)0; struct iso88025_sockaddr_dl_data *trld; register struct llinfo_arp *la = 0; register struct rtentry *rt; struct ifaddr *ifa; struct in_ifaddr *ia; struct sockaddr_dl *sdl; struct sockaddr sa; struct in_addr isaddr, itaddr, myaddr; int op, rif_len; int req_len; req_len = arphdr_len2(ifp->if_addrlen, sizeof(struct in_addr)); if (m->m_len < req_len && (m = m_pullup(m, req_len)) == NULL) { log(LOG_ERR, "in_arp: runt packet -- m_pullup failed\n"); return; } ah = mtod(m, struct arphdr *); op = ntohs(ah->ar_op); (void)memcpy(&isaddr, ar_spa(ah), sizeof (isaddr)); (void)memcpy(&itaddr, ar_tpa(ah), sizeof (itaddr)); #ifdef BRIDGE #define BRIDGE_TEST (do_bridge) #else #define BRIDGE_TEST (0) /* cc will optimise the test away */ #endif /* * For a bridge, we want to check the address irrespective * of the receive interface. (This will change slightly * when we have clusters of interfaces). */ LIST_FOREACH(ia, INADDR_HASH(itaddr.s_addr), ia_hash) if ((BRIDGE_TEST || (ia->ia_ifp == ifp)) && itaddr.s_addr == ia->ia_addr.sin_addr.s_addr) goto match; LIST_FOREACH(ia, INADDR_HASH(isaddr.s_addr), ia_hash) if ((BRIDGE_TEST || (ia->ia_ifp == ifp)) && isaddr.s_addr == ia->ia_addr.sin_addr.s_addr) goto match; /* * No match, use the first inet address on the receive interface * as a dummy address for the rest of the function. */ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET) { ia = ifatoia(ifa); goto match; } /* * If bridging, fall back to using any inet address. */ if (!BRIDGE_TEST || (ia = TAILQ_FIRST(&in_ifaddrhead)) == NULL) { m_freem(m); return; } match: myaddr = ia->ia_addr.sin_addr; if (!bcmp(ar_sha(ah), IF_LLADDR(ifp), ifp->if_addrlen)) { m_freem(m); /* it's from me, ignore it. */ return; } if (!bcmp(ar_sha(ah), ifp->if_broadcastaddr, ifp->if_addrlen)) { log(LOG_ERR, "arp: link address is broadcast for IP address %s!\n", inet_ntoa(isaddr)); m_freem(m); return; } if (isaddr.s_addr == myaddr.s_addr) { log(LOG_ERR, "arp: %*D is using my IP address %s!\n", ifp->if_addrlen, (u_char *)ar_sha(ah), ":", inet_ntoa(isaddr)); itaddr = myaddr; goto reply; } if (ifp->if_flags & IFF_STATICARP) goto reply; la = arplookup(isaddr.s_addr, itaddr.s_addr == myaddr.s_addr, 0); if (la && (rt = la->la_rt) && (sdl = SDL(rt->rt_gateway))) { /* the following is not an error when doing bridging */ if (!BRIDGE_TEST && rt->rt_ifp != ifp) { if (log_arp_wrong_iface) log(LOG_ERR, "arp: %s is on %s%d but got reply from %*D on %s%d\n", inet_ntoa(isaddr), rt->rt_ifp->if_name, rt->rt_ifp->if_unit, ifp->if_addrlen, (u_char *)ar_sha(ah), ":", ifp->if_name, ifp->if_unit); goto reply; } if (sdl->sdl_alen && bcmp(ar_sha(ah), LLADDR(sdl), sdl->sdl_alen)) { if (rt->rt_expire) { if (log_arp_movements) log(LOG_INFO, "arp: %s moved from %*D to %*D on %s%d\n", inet_ntoa(isaddr), ifp->if_addrlen, (u_char *)LLADDR(sdl), ":", ifp->if_addrlen, (u_char *)ar_sha(ah), ":", ifp->if_name, ifp->if_unit); } else { log(LOG_ERR, "arp: %*D attempts to modify permanent entry for %s on %s%d\n", ifp->if_addrlen, (u_char *)ar_sha(ah), ":", inet_ntoa(isaddr), ifp->if_name, ifp->if_unit); goto reply; } } /* * sanity check for the address length. * XXX this does not work for protocols with variable address * length. -is */ if (sdl->sdl_alen && sdl->sdl_alen != ah->ar_hln) { log(LOG_WARNING, "arp from %*D: new addr len %d, was %d", ifp->if_addrlen, (u_char *) ar_sha(ah), ":", ah->ar_hln, sdl->sdl_alen); } if (ifp->if_addrlen != ah->ar_hln) { log(LOG_WARNING, "arp from %*D: addr len: new %d, i/f %d (ignored)", ifp->if_addrlen, (u_char *) ar_sha(ah), ":", ah->ar_hln, ifp->if_addrlen); goto reply; } (void)memcpy(LLADDR(sdl), ar_sha(ah), sdl->sdl_alen = ah->ar_hln); /* * If we receive an arp from a token-ring station over * a token-ring nic then try to save the source * routing info. */ if (ifp->if_type == IFT_ISO88025) { th = (struct iso88025_header *)m->m_pkthdr.header; trld = SDL_ISO88025(sdl); rif_len = TR_RCF_RIFLEN(th->rcf); if ((th->iso88025_shost[0] & TR_RII) && (rif_len > 2)) { trld->trld_rcf = th->rcf; trld->trld_rcf ^= htons(TR_RCF_DIR); memcpy(trld->trld_route, th->rd, rif_len - 2); trld->trld_rcf &= ~htons(TR_RCF_BCST_MASK); /* * Set up source routing information for * reply packet (XXX) */ m->m_data -= rif_len; m->m_len += rif_len; m->m_pkthdr.len += rif_len; } else { th->iso88025_shost[0] &= ~TR_RII; trld->trld_rcf = 0; } m->m_data -= 8; m->m_len += 8; m->m_pkthdr.len += 8; th->rcf = trld->trld_rcf; } + RT_LOCK(rt); if (rt->rt_expire) rt->rt_expire = time_second + arpt_keep; rt->rt_flags &= ~RTF_REJECT; + RT_UNLOCK(rt); la->la_asked = 0; la->la_preempt = arp_maxtries; if (la->la_hold) { (*ifp->if_output)(ifp, la->la_hold, rt_key(rt), rt); la->la_hold = 0; } } reply: if (op != ARPOP_REQUEST) { m_freem(m); return; } if (itaddr.s_addr == myaddr.s_addr) { /* I am the target */ (void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln); (void)memcpy(ar_sha(ah), IF_LLADDR(ifp), ah->ar_hln); } else { la = arplookup(itaddr.s_addr, 0, SIN_PROXY); if (la == NULL) { struct sockaddr_in sin; if (!arp_proxyall) { m_freem(m); return; } bzero(&sin, sizeof sin); sin.sin_family = AF_INET; sin.sin_len = sizeof sin; sin.sin_addr = itaddr; rt = rtalloc1((struct sockaddr *)&sin, 0, 0UL); if (!rt) { m_freem(m); return; } /* * Don't send proxies for nodes on the same interface * as this one came out of, or we'll get into a fight * over who claims what Ether address. */ if (rt->rt_ifp == ifp) { rtfree(rt); m_freem(m); return; } (void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln); (void)memcpy(ar_sha(ah), IF_LLADDR(ifp), ah->ar_hln); rtfree(rt); /* * Also check that the node which sent the ARP packet * is on the the interface we expect it to be on. This * avoids ARP chaos if an interface is connected to the * wrong network. */ sin.sin_addr = isaddr; rt = rtalloc1((struct sockaddr *)&sin, 0, 0UL); if (!rt) { m_freem(m); return; } if (rt->rt_ifp != ifp) { log(LOG_INFO, "arp_proxy: ignoring request" " from %s via %s%d, expecting %s%d\n", inet_ntoa(isaddr), ifp->if_name, ifp->if_unit, rt->rt_ifp->if_name, rt->rt_ifp->if_unit); rtfree(rt); m_freem(m); return; } rtfree(rt); #ifdef DEBUG_PROXY printf("arp: proxying for %s\n", inet_ntoa(itaddr)); #endif } else { rt = la->la_rt; (void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln); sdl = SDL(rt->rt_gateway); (void)memcpy(ar_sha(ah), LLADDR(sdl), ah->ar_hln); } } (void)memcpy(ar_tpa(ah), ar_spa(ah), ah->ar_pln); (void)memcpy(ar_spa(ah), &itaddr, ah->ar_pln); ah->ar_op = htons(ARPOP_REPLY); ah->ar_pro = htons(ETHERTYPE_IP); /* let's be sure! */ switch (ifp->if_type) { case IFT_ARCNET: arh = (struct arc_header *)sa.sa_data; arh->arc_dhost = *ar_tha(ah); arh->arc_type = ARCTYPE_ARP; break; case IFT_ISO88025: /* Re-arrange the source/dest address */ memcpy(th->iso88025_dhost, th->iso88025_shost, sizeof(th->iso88025_dhost)); memcpy(th->iso88025_shost, IF_LLADDR(ifp), sizeof(th->iso88025_shost)); /* Set the source routing bit if neccesary */ if (th->iso88025_dhost[0] & TR_RII) { th->iso88025_dhost[0] &= ~TR_RII; if (TR_RCF_RIFLEN(th->rcf) > 2) th->iso88025_shost[0] |= TR_RII; } /* Copy the addresses, ac and fc into sa_data */ memcpy(sa.sa_data, th->iso88025_dhost, sizeof(th->iso88025_dhost) * 2); sa.sa_data[(sizeof(th->iso88025_dhost) * 2)] = TR_AC; sa.sa_data[(sizeof(th->iso88025_dhost) * 2) + 1] = TR_LLC_FRAME; break; case IFT_ETHER: case IFT_FDDI: /* * May not be correct for types not explictly * listed, but it is our best guess. */ default: eh = (struct ether_header *)sa.sa_data; (void)memcpy(eh->ether_dhost, ar_tha(ah), sizeof(eh->ether_dhost)); eh->ether_type = htons(ETHERTYPE_ARP); break; } sa.sa_family = AF_UNSPEC; sa.sa_len = sizeof(sa); (*ifp->if_output)(ifp, m, &sa, (struct rtentry *)0); return; } #endif /* * Free an arp entry. */ static void arptfree(la) register struct llinfo_arp *la; { register struct rtentry *rt = la->la_rt; register struct sockaddr_dl *sdl; + if (rt == 0) panic("arptfree"); if (rt->rt_refcnt > 0 && (sdl = SDL(rt->rt_gateway)) && sdl->sdl_family == AF_LINK) { sdl->sdl_alen = 0; la->la_preempt = la->la_asked = 0; + RT_LOCK(rt); /* XXX needed or move higher? */ rt->rt_flags &= ~RTF_REJECT; + RT_UNLOCK(rt); return; } rtrequest(RTM_DELETE, rt_key(rt), (struct sockaddr *)0, rt_mask(rt), 0, (struct rtentry **)0); } /* * Lookup or enter a new address in arptab. */ static struct llinfo_arp * arplookup(addr, create, proxy) u_long addr; int create, proxy; { register struct rtentry *rt; - static struct sockaddr_inarp sin = {sizeof(sin), AF_INET }; + struct sockaddr_inarp sin; const char *why = 0; + bzero(&sin, sizeof(sin)); + sin.sin_len = sizeof(sin); + sin.sin_family = AF_INET; sin.sin_addr.s_addr = addr; - sin.sin_other = proxy ? SIN_PROXY : 0; + if (proxy) + sin.sin_other = SIN_PROXY; rt = rtalloc1((struct sockaddr *)&sin, create, 0UL); if (rt == 0) return (0); - rt->rt_refcnt--; if (rt->rt_flags & RTF_GATEWAY) why = "host is not on local network"; else if ((rt->rt_flags & RTF_LLINFO) == 0) why = "could not allocate llinfo"; else if (rt->rt_gateway->sa_family != AF_LINK) why = "gateway route is not ours"; if (why) { - if (create) { +#define ISDYNCLONE(_rt) \ + (((_rt)->rt_flags & (RTF_STATIC | RTF_WASCLONED)) == RTF_WASCLONED) + if (create) log(LOG_DEBUG, "arplookup %s failed: %s\n", inet_ntoa(sin.sin_addr), why); - /* - * If there are no references to this Layer 2 route, - * and it is a cloned route, and not static, and - * arplookup() is creating the route, then purge - * it from the routing table as it is probably bogus. - */ - if (((rt->rt_flags & (RTF_STATIC | RTF_WASCLONED)) == - RTF_WASCLONED) && (rt->rt_refcnt == 0)) - rtrequest(RTM_DELETE, - (struct sockaddr *)rt_key(rt), - rt->rt_gateway, rt_mask(rt), - rt->rt_flags, 0); + /* + * If there are no references to this Layer 2 route, + * and it is a cloned route, and not static, and + * arplookup() is creating the route, then purge + * it from the routing table as it is probably bogus. + */ + RT_UNLOCK(rt); + if (rt->rt_refcnt == 1 && ISDYNCLONE(rt)) { + rtrequest(RTM_DELETE, + (struct sockaddr *)rt_key(rt), + rt->rt_gateway, rt_mask(rt), + rt->rt_flags, 0); } + RTFREE(rt); return (0); +#undef ISDYNCLONE + } else { + rt->rt_refcnt--; + RT_UNLOCK(rt); + return ((struct llinfo_arp *)rt->rt_llinfo); } - return ((struct llinfo_arp *)rt->rt_llinfo); } void arp_ifinit(ifp, ifa) struct ifnet *ifp; struct ifaddr *ifa; { if (ntohl(IA_SIN(ifa)->sin_addr.s_addr) != INADDR_ANY) arprequest(ifp, &IA_SIN(ifa)->sin_addr, &IA_SIN(ifa)->sin_addr, IF_LLADDR(ifp)); ifa->ifa_rtrequest = arp_rtrequest; ifa->ifa_flags |= RTF_CLONING; } static void arp_init(void) { arpintrq.ifq_maxlen = 50; mtx_init(&arpintrq.ifq_mtx, "arp_inq", NULL, MTX_DEF); LIST_INIT(&llinfo_arp); + callout_init(&arp_callout, CALLOUT_MPSAFE); netisr_register(NETISR_ARP, arpintr, &arpintrq); } - SYSINIT(arp, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, arp_init, 0); Index: head/sys/netinet/in_pcb.c =================================================================== --- head/sys/netinet/in_pcb.c (revision 120726) +++ head/sys/netinet/in_pcb.c (revision 120727) @@ -1,1181 +1,1182 @@ /* * Copyright (c) 1982, 1986, 1991, 1993, 1995 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95 * $FreeBSD$ */ #include "opt_ipsec.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #include #endif /* INET6 */ #ifdef IPSEC #include #include #endif /* IPSEC */ #ifdef FAST_IPSEC #if defined(IPSEC) || defined(IPSEC_ESP) #error "Bad idea: don't compile with both IPSEC and FAST_IPSEC!" #endif #include #include #define IPSEC #endif /* FAST_IPSEC */ struct in_addr zeroin_addr; /* * These configure the range of local port addresses assigned to * "unspecified" outgoing connections/packets/whatever. */ int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */ int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */ int ipport_firstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ int ipport_lastauto = IPPORT_HILASTAUTO; /* 65535 */ int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */ /* * Reserved ports accessible only to root. There are significant * security considerations that must be accounted for when changing these, * but the security benefits can be great. Please be careful. */ int ipport_reservedhigh = IPPORT_RESERVED - 1; /* 1023 */ int ipport_reservedlow = 0; #define RANGECHK(var, min, max) \ if ((var) < (min)) { (var) = (min); } \ else if ((var) > (max)) { (var) = (max); } static int sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS) { int error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); if (!error) { RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1); RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1); RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX); RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX); RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX); RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX); } return error; } #undef RANGECHK SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports"); SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW, &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", ""); SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW, &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", ""); SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW, &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", ""); SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW, &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", ""); SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW, &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", ""); SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW, &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", ""); SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, reservedhigh, CTLFLAG_RW|CTLFLAG_SECURE, &ipport_reservedhigh, 0, ""); SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, reservedlow, CTLFLAG_RW|CTLFLAG_SECURE, &ipport_reservedlow, 0, ""); /* * in_pcb.c: manage the Protocol Control Blocks. * * NOTE: It is assumed that most of these functions will be called at * splnet(). XXX - There are, unfortunately, a few exceptions to this * rule that should be fixed. */ /* * Allocate a PCB and associate it with the socket. */ int in_pcballoc(so, pcbinfo, td) struct socket *so; struct inpcbinfo *pcbinfo; struct thread *td; { register struct inpcb *inp; #ifdef IPSEC int error; #endif - - inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT); + inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT | M_ZERO); if (inp == NULL) return (ENOBUFS); - bzero((caddr_t)inp, sizeof(*inp)); inp->inp_gencnt = ++pcbinfo->ipi_gencnt; inp->inp_pcbinfo = pcbinfo; inp->inp_socket = so; #ifdef IPSEC error = ipsec_init_policy(so, &inp->inp_sp); if (error != 0) { uma_zfree(pcbinfo->ipi_zone, inp); return error; } #endif /*IPSEC*/ #if defined(INET6) if (INP_SOCKAF(so) == AF_INET6) { inp->inp_vflag |= INP_IPV6PROTO; if (ip6_v6only) inp->inp_flags |= IN6P_IPV6_V6ONLY; } #endif LIST_INSERT_HEAD(pcbinfo->listhead, inp, inp_list); pcbinfo->ipi_count++; so->so_pcb = (caddr_t)inp; INP_LOCK_INIT(inp, "inp"); #ifdef INET6 if (ip6_auto_flowlabel) inp->inp_flags |= IN6P_AUTOFLOWLABEL; #endif return (0); } int in_pcbbind(inp, nam, td) register struct inpcb *inp; struct sockaddr *nam; struct thread *td; { int anonport, error; if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) return (EINVAL); anonport = inp->inp_lport == 0 && (nam == NULL || ((struct sockaddr_in *)nam)->sin_port == 0); error = in_pcbbind_setup(inp, nam, &inp->inp_laddr.s_addr, &inp->inp_lport, td); if (error) return (error); if (in_pcbinshash(inp) != 0) { inp->inp_laddr.s_addr = INADDR_ANY; inp->inp_lport = 0; return (EAGAIN); } if (anonport) inp->inp_flags |= INP_ANONPORT; return (0); } /* * Set up a bind operation on a PCB, performing port allocation * as required, but do not actually modify the PCB. Callers can * either complete the bind by setting inp_laddr/inp_lport and * calling in_pcbinshash(), or they can just use the resulting * port and address to authorise the sending of a once-off packet. * * On error, the values of *laddrp and *lportp are not changed. */ int in_pcbbind_setup(inp, nam, laddrp, lportp, td) struct inpcb *inp; struct sockaddr *nam; in_addr_t *laddrp; u_short *lportp; struct thread *td; { struct socket *so = inp->inp_socket; unsigned short *lastport; struct sockaddr_in *sin; struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; struct in_addr laddr; u_short lport = 0; int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); int error, prison = 0; if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */ return (EADDRNOTAVAIL); laddr.s_addr = *laddrp; if (nam != NULL && laddr.s_addr != INADDR_ANY) return (EINVAL); if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) wild = 1; if (nam) { sin = (struct sockaddr_in *)nam; if (nam->sa_len != sizeof (*sin)) return (EINVAL); #ifdef notdef /* * We should check the family, but old programs * incorrectly fail to initialize it. */ if (sin->sin_family != AF_INET) return (EAFNOSUPPORT); #endif if (sin->sin_addr.s_addr != INADDR_ANY) if (prison_ip(td->td_ucred, 0, &sin->sin_addr.s_addr)) return(EINVAL); if (sin->sin_port != *lportp) { /* Don't allow the port to change. */ if (*lportp != 0) return (EINVAL); lport = sin->sin_port; } /* NB: lport is left as 0 if the port isn't being changed. */ if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { /* * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; * allow complete duplication of binding if * SO_REUSEPORT is set, or if SO_REUSEADDR is set * and a multicast address is bound on both * new and duplicated sockets. */ if (so->so_options & SO_REUSEADDR) reuseport = SO_REUSEADDR|SO_REUSEPORT; } else if (sin->sin_addr.s_addr != INADDR_ANY) { sin->sin_port = 0; /* yech... */ bzero(&sin->sin_zero, sizeof(sin->sin_zero)); if (ifa_ifwithaddr((struct sockaddr *)sin) == 0) return (EADDRNOTAVAIL); } laddr = sin->sin_addr; if (lport) { struct inpcb *t; /* GROSS */ if (ntohs(lport) <= ipport_reservedhigh && ntohs(lport) >= ipport_reservedlow && td && suser_cred(td->td_ucred, PRISON_ROOT)) return (EACCES); if (td && jailed(td->td_ucred)) prison = 1; if (so->so_cred->cr_uid != 0 && !IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { t = in_pcblookup_local(inp->inp_pcbinfo, sin->sin_addr, lport, prison ? 0 : INPLOOKUP_WILDCARD); /* * XXX * This entire block sorely needs a rewrite. */ if (t && (t->inp_vflag & INP_TIMEWAIT)) { if ((ntohl(sin->sin_addr.s_addr) != INADDR_ANY || ntohl(t->inp_laddr.s_addr) != INADDR_ANY || (intotw(t)->tw_so_options & SO_REUSEPORT) == 0) && (so->so_cred->cr_uid != intotw(t)->tw_cred->cr_uid)) return (EADDRINUSE); } else if (t && (ntohl(sin->sin_addr.s_addr) != INADDR_ANY || ntohl(t->inp_laddr.s_addr) != INADDR_ANY || (t->inp_socket->so_options & SO_REUSEPORT) == 0) && (so->so_cred->cr_uid != t->inp_socket->so_cred->cr_uid)) { #if defined(INET6) if (ntohl(sin->sin_addr.s_addr) != INADDR_ANY || ntohl(t->inp_laddr.s_addr) != INADDR_ANY || INP_SOCKAF(so) == INP_SOCKAF(t->inp_socket)) #endif /* defined(INET6) */ return (EADDRINUSE); } } if (prison && prison_ip(td->td_ucred, 0, &sin->sin_addr.s_addr)) return (EADDRNOTAVAIL); t = in_pcblookup_local(pcbinfo, sin->sin_addr, lport, prison ? 0 : wild); if (t && (t->inp_vflag & INP_TIMEWAIT)) { if ((reuseport & intotw(t)->tw_so_options) == 0) return (EADDRINUSE); } else if (t && (reuseport & t->inp_socket->so_options) == 0) { #if defined(INET6) if (ntohl(sin->sin_addr.s_addr) != INADDR_ANY || ntohl(t->inp_laddr.s_addr) != INADDR_ANY || INP_SOCKAF(so) == INP_SOCKAF(t->inp_socket)) #endif /* defined(INET6) */ return (EADDRINUSE); } } } if (*lportp != 0) lport = *lportp; if (lport == 0) { u_short first, last; int count; if (laddr.s_addr != INADDR_ANY) if (prison_ip(td->td_ucred, 0, &laddr.s_addr)) return (EINVAL); if (inp->inp_flags & INP_HIGHPORT) { first = ipport_hifirstauto; /* sysctl */ last = ipport_hilastauto; lastport = &pcbinfo->lasthi; } else if (inp->inp_flags & INP_LOWPORT) { if (td && (error = suser_cred(td->td_ucred, PRISON_ROOT)) != 0) return error; first = ipport_lowfirstauto; /* 1023 */ last = ipport_lowlastauto; /* 600 */ lastport = &pcbinfo->lastlow; } else { first = ipport_firstauto; /* sysctl */ last = ipport_lastauto; lastport = &pcbinfo->lastport; } /* * Simple check to ensure all ports are not used up causing * a deadlock here. * * We split the two cases (up and down) so that the direction * is not being tested on each round of the loop. */ if (first > last) { /* * counting down */ count = first - last; do { if (count-- < 0) /* completely used? */ return (EADDRNOTAVAIL); --*lastport; if (*lastport > first || *lastport < last) *lastport = first; lport = htons(*lastport); } while (in_pcblookup_local(pcbinfo, laddr, lport, wild)); } else { /* * counting up */ count = last - first; do { if (count-- < 0) /* completely used? */ return (EADDRNOTAVAIL); ++*lastport; if (*lastport < first || *lastport > last) *lastport = first; lport = htons(*lastport); } while (in_pcblookup_local(pcbinfo, laddr, lport, wild)); } } if (prison_ip(td->td_ucred, 0, &laddr.s_addr)) return (EINVAL); *laddrp = laddr.s_addr; *lportp = lport; return (0); } /* * Connect from a socket to a specified address. * Both address and port must be specified in argument sin. * If don't have a local address for this socket yet, * then pick one. */ int in_pcbconnect(inp, nam, td) register struct inpcb *inp; struct sockaddr *nam; struct thread *td; { u_short lport, fport; in_addr_t laddr, faddr; int anonport, error; lport = inp->inp_lport; laddr = inp->inp_laddr.s_addr; anonport = (lport == 0); error = in_pcbconnect_setup(inp, nam, &laddr, &lport, &faddr, &fport, NULL, td); if (error) return (error); /* Do the initial binding of the local address if required. */ if (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0) { inp->inp_lport = lport; inp->inp_laddr.s_addr = laddr; if (in_pcbinshash(inp) != 0) { inp->inp_laddr.s_addr = INADDR_ANY; inp->inp_lport = 0; return (EAGAIN); } } /* Commit the remaining changes. */ inp->inp_lport = lport; inp->inp_laddr.s_addr = laddr; inp->inp_faddr.s_addr = faddr; inp->inp_fport = fport; in_pcbrehash(inp); if (anonport) inp->inp_flags |= INP_ANONPORT; return (0); } /* * Set up for a connect from a socket to the specified address. * On entry, *laddrp and *lportp should contain the current local * address and port for the PCB; these are updated to the values * that should be placed in inp_laddr and inp_lport to complete * the connect. * * On success, *faddrp and *fportp will be set to the remote address * and port. These are not updated in the error case. * * If the operation fails because the connection already exists, * *oinpp will be set to the PCB of that connection so that the * caller can decide to override it. In all other cases, *oinpp * is set to NULL. */ int in_pcbconnect_setup(inp, nam, laddrp, lportp, faddrp, fportp, oinpp, td) register struct inpcb *inp; struct sockaddr *nam; in_addr_t *laddrp; u_short *lportp; in_addr_t *faddrp; u_short *fportp; struct inpcb **oinpp; struct thread *td; { struct sockaddr_in *sin = (struct sockaddr_in *)nam; struct in_ifaddr *ia; struct sockaddr_in sa; struct ucred *cred; struct inpcb *oinp; struct in_addr laddr, faddr; u_short lport, fport; int error; if (oinpp != NULL) *oinpp = NULL; if (nam->sa_len != sizeof (*sin)) return (EINVAL); if (sin->sin_family != AF_INET) return (EAFNOSUPPORT); if (sin->sin_port == 0) return (EADDRNOTAVAIL); laddr.s_addr = *laddrp; lport = *lportp; faddr = sin->sin_addr; fport = sin->sin_port; cred = inp->inp_socket->so_cred; if (laddr.s_addr == INADDR_ANY && jailed(cred)) { bzero(&sa, sizeof(sa)); sa.sin_addr.s_addr = htonl(prison_getip(cred)); sa.sin_len = sizeof(sa); sa.sin_family = AF_INET; error = in_pcbbind_setup(inp, (struct sockaddr *)&sa, &laddr.s_addr, &lport, td); if (error) return (error); } if (!TAILQ_EMPTY(&in_ifaddrhead)) { /* * If the destination address is INADDR_ANY, * use the primary local address. * If the supplied address is INADDR_BROADCAST, * and the primary interface supports broadcast, * choose the broadcast address for that interface. */ if (faddr.s_addr == INADDR_ANY) faddr = IA_SIN(TAILQ_FIRST(&in_ifaddrhead))->sin_addr; else if (faddr.s_addr == (u_long)INADDR_BROADCAST && (TAILQ_FIRST(&in_ifaddrhead)->ia_ifp->if_flags & IFF_BROADCAST)) faddr = satosin(&TAILQ_FIRST( &in_ifaddrhead)->ia_broadaddr)->sin_addr; } if (laddr.s_addr == INADDR_ANY) { register struct route *ro; ia = (struct in_ifaddr *)0; /* * If route is known or can be allocated now, * our src addr is taken from the i/f, else punt. * Note that we should check the address family of the cached * destination, in case of sharing the cache with IPv6. */ ro = &inp->inp_route; if (ro->ro_rt && (ro->ro_dst.sa_family != AF_INET || satosin(&ro->ro_dst)->sin_addr.s_addr != faddr.s_addr || inp->inp_socket->so_options & SO_DONTROUTE)) { RTFREE(ro->ro_rt); ro->ro_rt = (struct rtentry *)0; } if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0 && /*XXX*/ (ro->ro_rt == (struct rtentry *)0 || ro->ro_rt->rt_ifp == (struct ifnet *)0)) { /* No route yet, so try to acquire one */ bzero(&ro->ro_dst, sizeof(struct sockaddr_in)); ro->ro_dst.sa_family = AF_INET; ro->ro_dst.sa_len = sizeof(struct sockaddr_in); ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = faddr; rtalloc(ro); } /* * If we found a route, use the address * corresponding to the outgoing interface * unless it is the loopback (in case a route * to our address on another net goes to loopback). */ if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) ia = ifatoia(ro->ro_rt->rt_ifa); if (ia == 0) { bzero(&sa, sizeof(sa)); sa.sin_addr = faddr; sa.sin_len = sizeof(sa); sa.sin_family = AF_INET; ia = ifatoia(ifa_ifwithdstaddr(sintosa(&sa))); if (ia == 0) ia = ifatoia(ifa_ifwithnet(sintosa(&sa))); if (ia == 0) ia = TAILQ_FIRST(&in_ifaddrhead); if (ia == 0) return (EADDRNOTAVAIL); } /* * If the destination address is multicast and an outgoing * interface has been set as a multicast option, use the * address of that interface as our source address. */ if (IN_MULTICAST(ntohl(faddr.s_addr)) && inp->inp_moptions != NULL) { struct ip_moptions *imo; struct ifnet *ifp; imo = inp->inp_moptions; if (imo->imo_multicast_ifp != NULL) { ifp = imo->imo_multicast_ifp; TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) if (ia->ia_ifp == ifp) break; if (ia == 0) return (EADDRNOTAVAIL); } } laddr = ia->ia_addr.sin_addr; } oinp = in_pcblookup_hash(inp->inp_pcbinfo, faddr, fport, laddr, lport, 0, NULL); if (oinp != NULL) { if (oinpp != NULL) *oinpp = oinp; return (EADDRINUSE); } if (lport == 0) { error = in_pcbbind_setup(inp, NULL, &laddr.s_addr, &lport, td); if (error) return (error); } *laddrp = laddr.s_addr; *lportp = lport; *faddrp = faddr.s_addr; *fportp = fport; return (0); } void in_pcbdisconnect(inp) struct inpcb *inp; { inp->inp_faddr.s_addr = INADDR_ANY; inp->inp_fport = 0; in_pcbrehash(inp); if (inp->inp_socket->so_state & SS_NOFDREF) in_pcbdetach(inp); } void in_pcbdetach(inp) struct inpcb *inp; { struct socket *so = inp->inp_socket; struct inpcbinfo *ipi = inp->inp_pcbinfo; #ifdef IPSEC ipsec4_delete_pcbpolicy(inp); #endif /*IPSEC*/ inp->inp_gencnt = ++ipi->ipi_gencnt; in_pcbremlists(inp); if (so) { so->so_pcb = 0; sotryfree(so); } if (inp->inp_options) (void)m_free(inp->inp_options); if (inp->inp_route.ro_rt) - rtfree(inp->inp_route.ro_rt); + RTFREE(inp->inp_route.ro_rt); ip_freemoptions(inp->inp_moptions); inp->inp_vflag = 0; INP_LOCK_DESTROY(inp); uma_zfree(ipi->ipi_zone, inp); } struct sockaddr * in_sockaddr(port, addr_p) in_port_t port; struct in_addr *addr_p; { struct sockaddr_in *sin; MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, M_WAITOK | M_ZERO); sin->sin_family = AF_INET; sin->sin_len = sizeof(*sin); sin->sin_addr = *addr_p; sin->sin_port = port; return (struct sockaddr *)sin; } /* * The wrapper function will pass down the pcbinfo for this function to lock. * The socket must have a valid * (i.e., non-nil) PCB, but it should be impossible to get an invalid one * except through a kernel programming error, so it is acceptable to panic * (or in this case trap) if the PCB is invalid. (Actually, we don't trap * because there actually /is/ a programming error somewhere... XXX) */ int in_setsockaddr(so, nam, pcbinfo) struct socket *so; struct sockaddr **nam; struct inpcbinfo *pcbinfo; { int s; register struct inpcb *inp; struct in_addr addr; in_port_t port; s = splnet(); INP_INFO_RLOCK(pcbinfo); inp = sotoinpcb(so); if (!inp) { INP_INFO_RUNLOCK(pcbinfo); splx(s); return ECONNRESET; } INP_LOCK(inp); port = inp->inp_lport; addr = inp->inp_laddr; INP_UNLOCK(inp); INP_INFO_RUNLOCK(pcbinfo); splx(s); *nam = in_sockaddr(port, &addr); return 0; } /* * The wrapper function will pass down the pcbinfo for this function to lock. */ int in_setpeeraddr(so, nam, pcbinfo) struct socket *so; struct sockaddr **nam; struct inpcbinfo *pcbinfo; { int s; register struct inpcb *inp; struct in_addr addr; in_port_t port; s = splnet(); INP_INFO_RLOCK(pcbinfo); inp = sotoinpcb(so); if (!inp) { INP_INFO_RUNLOCK(pcbinfo); splx(s); return ECONNRESET; } INP_LOCK(inp); port = inp->inp_fport; addr = inp->inp_faddr; INP_UNLOCK(inp); INP_INFO_RUNLOCK(pcbinfo); splx(s); *nam = in_sockaddr(port, &addr); return 0; } void in_pcbnotifyall(pcbinfo, faddr, errno, notify) struct inpcbinfo *pcbinfo; struct in_addr faddr; int errno; struct inpcb *(*notify)(struct inpcb *, int); { struct inpcb *inp, *ninp; struct inpcbhead *head; int s; s = splnet(); INP_INFO_WLOCK(pcbinfo); head = pcbinfo->listhead; for (inp = LIST_FIRST(head); inp != NULL; inp = ninp) { INP_LOCK(inp); ninp = LIST_NEXT(inp, inp_list); #ifdef INET6 if ((inp->inp_vflag & INP_IPV4) == 0) { INP_UNLOCK(inp); continue; } #endif if (inp->inp_faddr.s_addr != faddr.s_addr || inp->inp_socket == NULL) { INP_UNLOCK(inp); continue; } if ((*notify)(inp, errno)) INP_UNLOCK(inp); } INP_INFO_WUNLOCK(pcbinfo); splx(s); } void in_pcbpurgeif0(pcbinfo, ifp) struct inpcbinfo *pcbinfo; struct ifnet *ifp; { struct inpcb *inp; struct ip_moptions *imo; int i, gap; /* why no splnet here? XXX */ INP_INFO_RLOCK(pcbinfo); LIST_FOREACH(inp, pcbinfo->listhead, inp_list) { INP_LOCK(inp); imo = inp->inp_moptions; if ((inp->inp_vflag & INP_IPV4) && imo != NULL) { /* * Unselect the outgoing interface if it is being * detached. */ if (imo->imo_multicast_ifp == ifp) imo->imo_multicast_ifp = NULL; /* * Drop multicast group membership if we joined * through the interface being detached. */ for (i = 0, gap = 0; i < imo->imo_num_memberships; i++) { if (imo->imo_membership[i]->inm_ifp == ifp) { in_delmulti(imo->imo_membership[i]); gap++; } else if (gap != 0) imo->imo_membership[i - gap] = imo->imo_membership[i]; } imo->imo_num_memberships -= gap; } INP_UNLOCK(inp); } INP_INFO_RUNLOCK(pcbinfo); } /* * Check for alternatives when higher level complains * about service problems. For now, invalidate cached * routing information. If the route was created dynamically * (by a redirect), time to try a default gateway again. */ void in_losing(inp) struct inpcb *inp; { register struct rtentry *rt; struct rt_addrinfo info; if ((rt = inp->inp_route.ro_rt)) { + RT_LOCK(rt); + inp->inp_route.ro_rt = NULL; bzero((caddr_t)&info, sizeof(info)); info.rti_flags = rt->rt_flags; info.rti_info[RTAX_DST] = rt_key(rt); info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; info.rti_info[RTAX_NETMASK] = rt_mask(rt); rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0); - if (rt->rt_flags & RTF_DYNAMIC) + if (rt->rt_flags & RTF_DYNAMIC) { + RT_UNLOCK(rt); /* XXX refcnt? */ (void) rtrequest1(RTM_DELETE, &info, NULL); - inp->inp_route.ro_rt = NULL; - rtfree(rt); + } else + rtfree(rt); /* * A new route can be allocated * the next time output is attempted. */ } } /* * After a routing change, flush old routing * and allocate a (hopefully) better one. */ struct inpcb * in_rtchange(inp, errno) register struct inpcb *inp; int errno; { if (inp->inp_route.ro_rt) { - rtfree(inp->inp_route.ro_rt); + RTFREE(inp->inp_route.ro_rt); inp->inp_route.ro_rt = 0; /* * A new route can be allocated the next time * output is attempted. */ } return inp; } /* * Lookup a PCB based on the local address and port. */ struct inpcb * in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay) struct inpcbinfo *pcbinfo; struct in_addr laddr; u_int lport_arg; int wild_okay; { register struct inpcb *inp; int matchwild = 3, wildcard; u_short lport = lport_arg; if (!wild_okay) { struct inpcbhead *head; /* * Look for an unconnected (wildcard foreign addr) PCB that * matches the local address and port we're looking for. */ head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)]; LIST_FOREACH(inp, head, inp_hash) { #ifdef INET6 if ((inp->inp_vflag & INP_IPV4) == 0) continue; #endif if (inp->inp_faddr.s_addr == INADDR_ANY && inp->inp_laddr.s_addr == laddr.s_addr && inp->inp_lport == lport) { /* * Found. */ return (inp); } } /* * Not found. */ return (NULL); } else { struct inpcbporthead *porthash; struct inpcbport *phd; struct inpcb *match = NULL; /* * Best fit PCB lookup. * * First see if this local port is in use by looking on the * port hash list. */ porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport, pcbinfo->porthashmask)]; LIST_FOREACH(phd, porthash, phd_hash) { if (phd->phd_port == lport) break; } if (phd != NULL) { /* * Port is in use by one or more PCBs. Look for best * fit. */ LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { wildcard = 0; #ifdef INET6 if ((inp->inp_vflag & INP_IPV4) == 0) continue; #endif if (inp->inp_faddr.s_addr != INADDR_ANY) wildcard++; if (inp->inp_laddr.s_addr != INADDR_ANY) { if (laddr.s_addr == INADDR_ANY) wildcard++; else if (inp->inp_laddr.s_addr != laddr.s_addr) continue; } else { if (laddr.s_addr != INADDR_ANY) wildcard++; } if (wildcard < matchwild) { match = inp; matchwild = wildcard; if (matchwild == 0) { break; } } } } return (match); } } /* * Lookup PCB in hash list. */ struct inpcb * in_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard, ifp) struct inpcbinfo *pcbinfo; struct in_addr faddr, laddr; u_int fport_arg, lport_arg; int wildcard; struct ifnet *ifp; { struct inpcbhead *head; register struct inpcb *inp; u_short fport = fport_arg, lport = lport_arg; /* * First look for an exact match. */ head = &pcbinfo->hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, pcbinfo->hashmask)]; LIST_FOREACH(inp, head, inp_hash) { #ifdef INET6 if ((inp->inp_vflag & INP_IPV4) == 0) continue; #endif if (inp->inp_faddr.s_addr == faddr.s_addr && inp->inp_laddr.s_addr == laddr.s_addr && inp->inp_fport == fport && inp->inp_lport == lport) { /* * Found. */ return (inp); } } if (wildcard) { struct inpcb *local_wild = NULL; #if defined(INET6) struct inpcb *local_wild_mapped = NULL; #endif /* defined(INET6) */ head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)]; LIST_FOREACH(inp, head, inp_hash) { #ifdef INET6 if ((inp->inp_vflag & INP_IPV4) == 0) continue; #endif if (inp->inp_faddr.s_addr == INADDR_ANY && inp->inp_lport == lport) { if (ifp && ifp->if_type == IFT_FAITH && (inp->inp_flags & INP_FAITH) == 0) continue; if (inp->inp_laddr.s_addr == laddr.s_addr) return (inp); else if (inp->inp_laddr.s_addr == INADDR_ANY) { #if defined(INET6) if (INP_CHECK_SOCKAF(inp->inp_socket, AF_INET6)) local_wild_mapped = inp; else #endif /* defined(INET6) */ local_wild = inp; } } } #if defined(INET6) if (local_wild == NULL) return (local_wild_mapped); #endif /* defined(INET6) */ return (local_wild); } /* * Not found. */ return (NULL); } /* * Insert PCB onto various hash lists. */ int in_pcbinshash(inp) struct inpcb *inp; { struct inpcbhead *pcbhash; struct inpcbporthead *pcbporthash; struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; struct inpcbport *phd; u_int32_t hashkey_faddr; #ifdef INET6 if (inp->inp_vflag & INP_IPV6) hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; else #endif /* INET6 */ hashkey_faddr = inp->inp_faddr.s_addr; pcbhash = &pcbinfo->hashbase[INP_PCBHASH(hashkey_faddr, inp->inp_lport, inp->inp_fport, pcbinfo->hashmask)]; pcbporthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(inp->inp_lport, pcbinfo->porthashmask)]; /* * Go through port list and look for a head for this lport. */ LIST_FOREACH(phd, pcbporthash, phd_hash) { if (phd->phd_port == inp->inp_lport) break; } /* * If none exists, malloc one and tack it on. */ if (phd == NULL) { MALLOC(phd, struct inpcbport *, sizeof(struct inpcbport), M_PCB, M_NOWAIT); if (phd == NULL) { return (ENOBUFS); /* XXX */ } phd->phd_port = inp->inp_lport; LIST_INIT(&phd->phd_pcblist); LIST_INSERT_HEAD(pcbporthash, phd, phd_hash); } inp->inp_phd = phd; LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist); LIST_INSERT_HEAD(pcbhash, inp, inp_hash); return (0); } /* * Move PCB to the proper hash bucket when { faddr, fport } have been * changed. NOTE: This does not handle the case of the lport changing (the * hashed port list would have to be updated as well), so the lport must * not change after in_pcbinshash() has been called. */ void in_pcbrehash(inp) struct inpcb *inp; { struct inpcbhead *head; u_int32_t hashkey_faddr; #ifdef INET6 if (inp->inp_vflag & INP_IPV6) hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; else #endif /* INET6 */ hashkey_faddr = inp->inp_faddr.s_addr; head = &inp->inp_pcbinfo->hashbase[INP_PCBHASH(hashkey_faddr, inp->inp_lport, inp->inp_fport, inp->inp_pcbinfo->hashmask)]; LIST_REMOVE(inp, inp_hash); LIST_INSERT_HEAD(head, inp, inp_hash); } /* * Remove PCB from various lists. */ void in_pcbremlists(inp) struct inpcb *inp; { inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt; if (inp->inp_lport) { struct inpcbport *phd = inp->inp_phd; LIST_REMOVE(inp, inp_hash); LIST_REMOVE(inp, inp_portlist); if (LIST_FIRST(&phd->phd_pcblist) == NULL) { LIST_REMOVE(phd, phd_hash); free(phd, M_PCB); } } LIST_REMOVE(inp, inp_list); inp->inp_pcbinfo->ipi_count--; } int prison_xinpcb(struct thread *td, struct inpcb *inp) { if (!jailed(td->td_ucred)) return (0); if (ntohl(inp->inp_laddr.s_addr) == prison_getip(td->td_ucred)) return (0); return (1); } Index: head/sys/netinet/in_rmx.c =================================================================== --- head/sys/netinet/in_rmx.c (revision 120726) +++ head/sys/netinet/in_rmx.c (revision 120727) @@ -1,425 +1,432 @@ /* * Copyright 1994, 1995 Massachusetts Institute of Technology * * Permission to use, copy, modify, and distribute this software and * its documentation for any purpose and without fee is hereby * granted, provided that both the above copyright notice and this * permission notice appear in all copies, that both the above * copyright notice and this permission notice appear in all * supporting documentation, and that the name of M.I.T. not be used * in advertising or publicity pertaining to distribution of the * software without specific, written prior permission. M.I.T. makes * no representations about the suitability of this software for any * purpose. It is provided "as is" without express or implied * warranty. * * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * This code does two things necessary for the enhanced TCP metrics to * function in a useful manner: * 1) It marks all non-host routes as `cloning', thus ensuring that * every actual reference to such a route actually gets turned * into a reference to a host route to the specific destination * requested. * 2) When such routes lose all their references, it arranges for them * to be deleted in some random collection of circumstances, so that * a large quantity of stale routing data is not kept in kernel memory * indefinitely. See in_rtqtimo() below for the exact mechanism. */ #include #include #include #include #include #include #include +#include #include #include #include #include #include extern int in_inithead(void **head, int off); #define RTPRF_OURS RTF_PROTO3 /* set on routes we manage */ /* * Do what we need to do when inserting a route. */ static struct radix_node * in_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, struct radix_node *treenodes) { struct rtentry *rt = (struct rtentry *)treenodes; struct sockaddr_in *sin = (struct sockaddr_in *)rt_key(rt); struct radix_node *ret; /* * For IP, all unicast non-host routes are automatically cloning. */ if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) rt->rt_flags |= RTF_MULTICAST; if (!(rt->rt_flags & (RTF_HOST | RTF_CLONING | RTF_MULTICAST))) rt->rt_flags |= RTF_PRCLONING; /* * A little bit of help for both IP output and input: * For host routes, we make sure that RTF_BROADCAST * is set for anything that looks like a broadcast address. * This way, we can avoid an expensive call to in_broadcast() * in ip_output() most of the time (because the route passed * to ip_output() is almost always a host route). * * We also do the same for local addresses, with the thought * that this might one day be used to speed up ip_input(). * * We also mark routes to multicast addresses as such, because * it's easy to do and might be useful (but this is much more * dubious since it's so easy to inspect the address). (This * is done above.) */ if (rt->rt_flags & RTF_HOST) { if (in_broadcast(sin->sin_addr, rt->rt_ifp)) { rt->rt_flags |= RTF_BROADCAST; } else if (satosin(rt->rt_ifa->ifa_addr)->sin_addr.s_addr == sin->sin_addr.s_addr) { rt->rt_flags |= RTF_LOCAL; } } if (!rt->rt_rmx.rmx_mtu && !(rt->rt_rmx.rmx_locks & RTV_MTU) && rt->rt_ifp) rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; ret = rn_addroute(v_arg, n_arg, head, treenodes); if (ret == NULL && rt->rt_flags & RTF_HOST) { struct rtentry *rt2; /* * We are trying to add a host route, but can't. * Find out if it is because of an * ARP entry and delete it if so. */ rt2 = rtalloc1((struct sockaddr *)sin, 0, RTF_CLONING | RTF_PRCLONING); if (rt2) { if (rt2->rt_flags & RTF_LLINFO && rt2->rt_flags & RTF_HOST && rt2->rt_gateway && rt2->rt_gateway->sa_family == AF_LINK) { + /* NB: must unlock to avoid recursion */ + RT_UNLOCK(rt2); rtrequest(RTM_DELETE, (struct sockaddr *)rt_key(rt2), rt2->rt_gateway, rt_mask(rt2), rt2->rt_flags, 0); ret = rn_addroute(v_arg, n_arg, head, treenodes); + RT_LOCK(rt2); } - RTFREE(rt2); + RTFREE_LOCKED(rt2); } } /* * If the new route created successfully, and we are forwarding, * and there is a cached route, free it. Otherwise, we may end * up using the wrong route. */ if (ret != NULL && ipforwarding && ipforward_rt.ro_rt) { RTFREE(ipforward_rt.ro_rt); ipforward_rt.ro_rt = 0; } return ret; } /* * This code is the inverse of in_clsroute: on first reference, if we * were managing the route, stop doing so and set the expiration timer * back off again. */ static struct radix_node * in_matroute(void *v_arg, struct radix_node_head *head) { struct radix_node *rn = rn_match(v_arg, head); struct rtentry *rt = (struct rtentry *)rn; + /*XXX locking? */ if (rt && rt->rt_refcnt == 0) { /* this is first reference */ if (rt->rt_flags & RTPRF_OURS) { rt->rt_flags &= ~RTPRF_OURS; rt->rt_rmx.rmx_expire = 0; } } return rn; } static int rtq_reallyold = 60*60; /* one hour is "really old" */ SYSCTL_INT(_net_inet_ip, IPCTL_RTEXPIRE, rtexpire, CTLFLAG_RW, &rtq_reallyold, 0, "Default expiration time on dynamically learned routes"); static int rtq_minreallyold = 10; /* never automatically crank down to less */ SYSCTL_INT(_net_inet_ip, IPCTL_RTMINEXPIRE, rtminexpire, CTLFLAG_RW, &rtq_minreallyold, 0, "Minimum time to attempt to hold onto dynamically learned routes"); static int rtq_toomany = 128; /* 128 cached routes is "too many" */ SYSCTL_INT(_net_inet_ip, IPCTL_RTMAXCACHE, rtmaxcache, CTLFLAG_RW, &rtq_toomany, 0, "Upper limit on dynamically learned routes"); /* * On last reference drop, mark the route as belong to us so that it can be * timed out. */ static void in_clsroute(struct radix_node *rn, struct radix_node_head *head) { struct rtentry *rt = (struct rtentry *)rn; + RT_LOCK_ASSERT(rt); + if (!(rt->rt_flags & RTF_UP)) return; /* prophylactic measures */ if ((rt->rt_flags & (RTF_LLINFO | RTF_HOST)) != RTF_HOST) return; if ((rt->rt_flags & (RTF_WASCLONED | RTPRF_OURS)) != RTF_WASCLONED) return; /* * If rtq_reallyold is 0, just delete the route without * waiting for a timeout cycle to kill it. */ if (rtq_reallyold != 0) { rt->rt_flags |= RTPRF_OURS; rt->rt_rmx.rmx_expire = time_second + rtq_reallyold; } else { + /* NB: must unlock to avoid recursion */ + RT_UNLOCK(rt); rtrequest(RTM_DELETE, (struct sockaddr *)rt_key(rt), rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0); + RT_LOCK(rt); } } struct rtqk_arg { struct radix_node_head *rnh; int draining; int killed; int found; int updating; time_t nextstop; }; /* * Get rid of old routes. When draining, this deletes everything, even when * the timeout is not expired yet. When updating, this makes sure that * nothing has a timeout longer than the current value of rtq_reallyold. */ static int in_rtqkill(struct radix_node *rn, void *rock) { struct rtqk_arg *ap = rock; struct rtentry *rt = (struct rtentry *)rn; int err; if (rt->rt_flags & RTPRF_OURS) { ap->found++; if (ap->draining || rt->rt_rmx.rmx_expire <= time_second) { if (rt->rt_refcnt > 0) panic("rtqkill route really not free"); err = rtrequest(RTM_DELETE, (struct sockaddr *)rt_key(rt), rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0); if (err) { log(LOG_WARNING, "in_rtqkill: error %d\n", err); } else { ap->killed++; } } else { if (ap->updating && (rt->rt_rmx.rmx_expire - time_second > rtq_reallyold)) { rt->rt_rmx.rmx_expire = time_second + rtq_reallyold; } ap->nextstop = lmin(ap->nextstop, rt->rt_rmx.rmx_expire); } } return 0; } #define RTQ_TIMEOUT 60*10 /* run no less than once every ten minutes */ static int rtq_timeout = RTQ_TIMEOUT; +static struct callout rtq_timer; static void in_rtqtimo(void *rock) { struct radix_node_head *rnh = rock; struct rtqk_arg arg; struct timeval atv; static time_t last_adjusted_timeout = 0; - int s; arg.found = arg.killed = 0; arg.rnh = rnh; arg.nextstop = time_second + rtq_timeout; arg.draining = arg.updating = 0; - s = splnet(); RADIX_NODE_HEAD_LOCK(rnh); rnh->rnh_walktree(rnh, in_rtqkill, &arg); RADIX_NODE_HEAD_UNLOCK(rnh); - splx(s); /* * Attempt to be somewhat dynamic about this: * If there are ``too many'' routes sitting around taking up space, * then crank down the timeout, and see if we can't make some more * go away. However, we make sure that we will never adjust more * than once in rtq_timeout seconds, to keep from cranking down too * hard. */ if ((arg.found - arg.killed > rtq_toomany) && (time_second - last_adjusted_timeout >= rtq_timeout) && rtq_reallyold > rtq_minreallyold) { rtq_reallyold = 2 * rtq_reallyold / 3; if (rtq_reallyold < rtq_minreallyold) { rtq_reallyold = rtq_minreallyold; } last_adjusted_timeout = time_second; #ifdef DIAGNOSTIC log(LOG_DEBUG, "in_rtqtimo: adjusted rtq_reallyold to %d\n", rtq_reallyold); #endif arg.found = arg.killed = 0; arg.updating = 1; - s = splnet(); RADIX_NODE_HEAD_LOCK(rnh); rnh->rnh_walktree(rnh, in_rtqkill, &arg); RADIX_NODE_HEAD_UNLOCK(rnh); - splx(s); } atv.tv_usec = 0; atv.tv_sec = arg.nextstop - time_second; - timeout(in_rtqtimo, rock, tvtohz(&atv)); + callout_reset(&rtq_timer, tvtohz(&atv), in_rtqtimo, rock); } void in_rtqdrain(void) { struct radix_node_head *rnh = rt_tables[AF_INET]; struct rtqk_arg arg; - int s; + arg.found = arg.killed = 0; arg.rnh = rnh; arg.nextstop = 0; arg.draining = 1; arg.updating = 0; - s = splnet(); RADIX_NODE_HEAD_LOCK(rnh); rnh->rnh_walktree(rnh, in_rtqkill, &arg); RADIX_NODE_HEAD_UNLOCK(rnh); - splx(s); } /* * Initialize our routing tree. */ int in_inithead(void **head, int off) { struct radix_node_head *rnh; if (!rn_inithead(head, off)) return 0; if (head != (void **)&rt_tables[AF_INET]) /* BOGUS! */ return 1; /* only do this for the real routing table */ rnh = *head; rnh->rnh_addaddr = in_addroute; rnh->rnh_matchaddr = in_matroute; rnh->rnh_close = in_clsroute; + callout_init(&rtq_timer, CALLOUT_MPSAFE); in_rtqtimo(rnh); /* kick off timeout first time */ return 1; } /* * This zaps old routes when the interface goes down or interface * address is deleted. In the latter case, it deletes static routes * that point to this address. If we don't do this, we may end up * using the old address in the future. The ones we always want to * get rid of are things like ARP entries, since the user might down * the interface, walk over to a completely different network, and * plug back in. */ struct in_ifadown_arg { struct radix_node_head *rnh; struct ifaddr *ifa; int del; }; static int in_ifadownkill(struct radix_node *rn, void *xap) { struct in_ifadown_arg *ap = xap; struct rtentry *rt = (struct rtentry *)rn; int err; if (rt->rt_ifa == ap->ifa && (ap->del || !(rt->rt_flags & RTF_STATIC))) { /* * We need to disable the automatic prune that happens * in this case in rtrequest() because it will blow * away the pointers that rn_walktree() needs in order * continue our descent. We will end up deleting all * the routes that rtrequest() would have in any case, * so that behavior is not needed there. */ + RT_LOCK(rt); rt->rt_flags &= ~(RTF_CLONING | RTF_PRCLONING); + RT_UNLOCK(rt); err = rtrequest(RTM_DELETE, (struct sockaddr *)rt_key(rt), rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0); if (err) { log(LOG_WARNING, "in_ifadownkill: error %d\n", err); } } return 0; } int in_ifadown(struct ifaddr *ifa, int delete) { struct in_ifadown_arg arg; struct radix_node_head *rnh; if (ifa->ifa_addr->sa_family != AF_INET) return 1; arg.rnh = rnh = rt_tables[AF_INET]; arg.ifa = ifa; arg.del = delete; RADIX_NODE_HEAD_LOCK(rnh); rnh->rnh_walktree(rnh, in_ifadownkill, &arg); RADIX_NODE_HEAD_UNLOCK(rnh); - ifa->ifa_flags &= ~IFA_ROUTE; + ifa->ifa_flags &= ~IFA_ROUTE; /* XXXlocking? */ return 0; } Index: head/sys/netinet/ip_flow.c =================================================================== --- head/sys/netinet/ip_flow.c (revision 120726) +++ head/sys/netinet/ip_flow.c (revision 120727) @@ -1,375 +1,377 @@ /*- * Copyright (c) 1998 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by the 3am Software Foundry ("3am"). It was developed by Matt Thomas. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the NetBSD * Foundation, Inc. and its contributors. * 4. Neither the name of The NetBSD Foundation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define IPFLOW_TIMER (5 * PR_SLOWHZ) #define IPFLOW_HASHBITS 6 /* should not be a multiple of 8 */ #define IPFLOW_HASHSIZE (1 << IPFLOW_HASHBITS) #if IPFLOW_HASHSIZE > 255 #error "make ipf_hash larger" #endif static struct ipflow_head ipflows[IPFLOW_HASHSIZE]; static int ipflow_inuse; #define IPFLOW_MAX 256 /* * Each flow list has a lock that guards updates to the list and to * all entries on the list. Flow entries hold the hash index for * finding the head of the list so the lock can be found quickly. * * ipflow_inuse holds a count of the number of flow entries present. * This is used to bound the size of the table. When IPFLOW_MAX entries * are present and an additional entry is needed one is chosen for * replacement. We could use atomic ops for this counter but having it * inconsistent doesn't appear to be a problem. */ #define IPFLOW_HEAD_LOCK(_ipfh) mtx_lock(&(_ipfh)->ipfh_mtx) #define IPFLOW_HEAD_UNLOCK(_ipfh) mtx_unlock(&(_ipfh)->ipfh_mtx) #define IPFLOW_LOCK(_ipf) \ IPFLOW_HEAD_LOCK(&ipflows[(_ipf)->ipf_hash]) #define IPFLOW_UNLOCK(_ipf) \ IPFLOW_HEAD_UNLOCK(&ipflows[(_ipf)->ipf_hash]) static int ipflow_active = 0; SYSCTL_INT(_net_inet_ip, IPCTL_FASTFORWARDING, fastforwarding, CTLFLAG_RW, &ipflow_active, 0, "Enable flow-based IP forwarding"); static MALLOC_DEFINE(M_IPFLOW, "ip_flow", "IP flow"); static unsigned ipflow_hash(struct in_addr dst, struct in_addr src, unsigned tos) { unsigned hash = tos; int idx; for (idx = 0; idx < 32; idx += IPFLOW_HASHBITS) hash += (dst.s_addr >> (32 - idx)) + (src.s_addr >> idx); return hash & (IPFLOW_HASHSIZE-1); } static struct ipflow * ipflow_lookup(const struct ip *ip) { unsigned hash; struct ipflow_head *head; struct ipflow *ipf; hash = ipflow_hash(ip->ip_dst, ip->ip_src, ip->ip_tos); head = &ipflows[hash]; IPFLOW_HEAD_LOCK(head); LIST_FOREACH(ipf, &head->ipfh_head, ipf_next) { if (ip->ip_dst.s_addr == ipf->ipf_dst.s_addr && ip->ip_src.s_addr == ipf->ipf_src.s_addr && ip->ip_tos == ipf->ipf_tos) { /* NB: return head locked */ return ipf; } } IPFLOW_HEAD_UNLOCK(head); return NULL; } int ipflow_fastforward(struct mbuf *m) { struct ip *ip; struct ipflow *ipf; struct rtentry *rt; struct sockaddr *dst; int error; /* * Are we forwarding packets? Big enough for an IP packet? */ if (!ipforwarding || !ipflow_active || m->m_len < sizeof(struct ip)) return 0; /* * IP header with no option and valid version and length */ ip = mtod(m, struct ip *); if (ip->ip_v != IPVERSION || ip->ip_hl != (sizeof(struct ip) >> 2) || ntohs(ip->ip_len) > m->m_pkthdr.len) return 0; /* * Find a flow. */ if ((ipf = ipflow_lookup(ip)) == NULL) return 0; /* * Route and interface still up? */ rt = ipf->ipf_ro.ro_rt; if ((rt->rt_flags & RTF_UP) == 0 || (rt->rt_ifp->if_flags & IFF_UP) == 0) { IPFLOW_UNLOCK(ipf); return 0; } /* * Packet size OK? TTL? */ if (m->m_pkthdr.len > rt->rt_ifp->if_mtu || ip->ip_ttl <= IPTTLDEC) { IPFLOW_UNLOCK(ipf); return 0; } /* * Everything checks out and so we can forward this packet. * Modify the TTL and incrementally change the checksum. */ ip->ip_ttl -= IPTTLDEC; if (ip->ip_sum >= htons(0xffff - (IPTTLDEC << 8))) { ip->ip_sum += htons(IPTTLDEC << 8) + 1; } else { ip->ip_sum += htons(IPTTLDEC << 8); } /* * Send the packet on its way. All we can get back is ENOBUFS */ ipf->ipf_uses++; ipf->ipf_timer = IPFLOW_TIMER; if (rt->rt_flags & RTF_GATEWAY) dst = rt->rt_gateway; else dst = &ipf->ipf_ro.ro_dst; if ((error = (*rt->rt_ifp->if_output)(rt->rt_ifp, m, dst, rt)) != 0) { if (error == ENOBUFS) ipf->ipf_dropped++; else ipf->ipf_errors++; } IPFLOW_UNLOCK(ipf); return 1; } static void ipflow_addstats(struct ipflow *ipf) { ipf->ipf_ro.ro_rt->rt_use += ipf->ipf_uses; ipstat.ips_cantforward += ipf->ipf_errors + ipf->ipf_dropped; ipstat.ips_forward += ipf->ipf_uses; ipstat.ips_fastforward += ipf->ipf_uses; } /* * XXX the locking here makes reaping an entry very expensive... */ static struct ipflow * ipflow_reap(void) { struct ipflow *victim = NULL; struct ipflow *ipf; int idx; for (idx = 0; idx < IPFLOW_HASHSIZE; idx++) { struct ipflow_head *head = &ipflows[idx]; IPFLOW_HEAD_LOCK(head); LIST_FOREACH(ipf, &head->ipfh_head, ipf_next) { /* * If this no longer points to a valid route * reclaim it. */ if ((ipf->ipf_ro.ro_rt->rt_flags & RTF_UP) == 0) goto done; /* * choose the one that's been least recently used * or has had the least uses in the last 1.5 * intervals. */ if (victim == NULL) victim = ipf; else if (ipf->ipf_timer < victim->ipf_timer || (ipf->ipf_timer == victim->ipf_timer && ipf->ipf_last_uses + ipf->ipf_uses < victim->ipf_last_uses + victim->ipf_uses)) { if (victim->ipf_hash != ipf->ipf_hash) IPFLOW_UNLOCK(victim); victim = ipf; } } if (victim && victim->ipf_hash != idx) IPFLOW_HEAD_UNLOCK(head); } ipf = victim; done: /* * Remove the entry from the flow table. */ LIST_REMOVE(ipf, ipf_next); IPFLOW_UNLOCK(ipf); ipflow_addstats(ipf); RTFREE(ipf->ipf_ro.ro_rt); return ipf; } static void ipflow_free(struct ipflow *ipf) { /* * Remove the flow from the hash table. */ LIST_REMOVE(ipf, ipf_next); ipflow_addstats(ipf); RTFREE(ipf->ipf_ro.ro_rt); ipflow_inuse--; free(ipf, M_IPFLOW); } void ipflow_slowtimo(void) { struct ipflow *ipf; int idx; for (idx = 0; idx < IPFLOW_HASHSIZE; idx++) { struct ipflow_head *head = &ipflows[idx]; IPFLOW_HEAD_LOCK(head); ipf = LIST_FIRST(&head->ipfh_head); while (ipf != NULL) { struct ipflow *next_ipf = LIST_NEXT(ipf, ipf_next); if (--ipf->ipf_timer == 0) { ipflow_free(ipf); } else { ipf->ipf_last_uses = ipf->ipf_uses; ipf->ipf_ro.ro_rt->rt_use += ipf->ipf_uses; ipstat.ips_forward += ipf->ipf_uses; ipstat.ips_fastforward += ipf->ipf_uses; ipf->ipf_uses = 0; } ipf = next_ipf; } IPFLOW_HEAD_UNLOCK(head); } } void ipflow_create(const struct route *ro, struct mbuf *m) { const struct ip *const ip = mtod(m, struct ip *); struct ipflow *ipf; /* * Don't create cache entries for ICMP messages. */ if (!ipflow_active || ip->ip_p == IPPROTO_ICMP) return; /* * See if an existing flow struct exists. If so remove it from it's * list and free the old route. If not, try to malloc a new one * (if we aren't at our limit). */ ipf = ipflow_lookup(ip); if (ipf == NULL) { if (ipflow_inuse == IPFLOW_MAX) { ipf = ipflow_reap(); } else { ipf = (struct ipflow *) malloc(sizeof(*ipf), M_IPFLOW, M_NOWAIT); if (ipf == NULL) return; ipflow_inuse++; } bzero((caddr_t) ipf, sizeof(*ipf)); ipf->ipf_hash = ipflow_hash(ip->ip_dst, ip->ip_src, ip->ip_tos); ipf->ipf_dst = ip->ip_dst; ipf->ipf_src = ip->ip_src; ipf->ipf_tos = ip->ip_tos; IPFLOW_LOCK(ipf); } else { LIST_REMOVE(ipf, ipf_next); ipflow_addstats(ipf); /* add stats to old route */ RTFREE(ipf->ipf_ro.ro_rt); /* clear reference */ ipf->ipf_uses = ipf->ipf_last_uses = 0; ipf->ipf_errors = ipf->ipf_dropped = 0; } /* * Fill in the updated information. */ ipf->ipf_ro = *ro; + RT_LOCK(ro->ro_rt); ro->ro_rt->rt_refcnt++; + RT_UNLOCK(ro->ro_rt); ipf->ipf_timer = IPFLOW_TIMER; /* * Insert into the approriate bucket of the flow table. */ LIST_INSERT_HEAD(&ipflows[ipf->ipf_hash].ipfh_head, ipf, ipf_next); IPFLOW_UNLOCK(ipf); } static void ipflow_init(void) { int idx; for (idx = 0; idx < IPFLOW_HASHSIZE; idx++) { struct ipflow_head *head = &ipflows[idx]; LIST_INIT(&head->ipfh_head); mtx_init(&head->ipfh_mtx, "ipflow list head", NULL, MTX_DEF); } } SYSINIT(ipflow, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, ipflow_init, 0); Index: head/sys/netinet/ip_icmp.c =================================================================== --- head/sys/netinet/ip_icmp.c (revision 120726) +++ head/sys/netinet/ip_icmp.c (revision 120727) @@ -1,881 +1,881 @@ /* * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94 * $FreeBSD$ */ #include "opt_ipsec.h" #include "opt_mac.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef IPSEC #include #include #endif #ifdef FAST_IPSEC #include #include #define IPSEC #endif #include /* * ICMP routines: error generation, receive packet processing, and * routines to turnaround packets back to the originator, and * host table maintenance routines. */ static struct icmpstat icmpstat; SYSCTL_STRUCT(_net_inet_icmp, ICMPCTL_STATS, stats, CTLFLAG_RW, &icmpstat, icmpstat, ""); static int icmpmaskrepl = 0; SYSCTL_INT(_net_inet_icmp, ICMPCTL_MASKREPL, maskrepl, CTLFLAG_RW, &icmpmaskrepl, 0, "Reply to ICMP Address Mask Request packets."); static u_int icmpmaskfake = 0; SYSCTL_UINT(_net_inet_icmp, OID_AUTO, maskfake, CTLFLAG_RW, &icmpmaskfake, 0, "Fake reply to ICMP Address Mask Request packets."); static int drop_redirect = 0; SYSCTL_INT(_net_inet_icmp, OID_AUTO, drop_redirect, CTLFLAG_RW, &drop_redirect, 0, ""); static int log_redirect = 0; SYSCTL_INT(_net_inet_icmp, OID_AUTO, log_redirect, CTLFLAG_RW, &log_redirect, 0, ""); static int icmplim = 200; SYSCTL_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_RW, &icmplim, 0, ""); static int icmplim_output = 1; SYSCTL_INT(_net_inet_icmp, OID_AUTO, icmplim_output, CTLFLAG_RW, &icmplim_output, 0, ""); /* * ICMP broadcast echo sysctl */ static int icmpbmcastecho = 0; SYSCTL_INT(_net_inet_icmp, OID_AUTO, bmcastecho, CTLFLAG_RW, &icmpbmcastecho, 0, ""); #ifdef ICMPPRINTFS int icmpprintfs = 0; #endif static void icmp_reflect(struct mbuf *); static void icmp_send(struct mbuf *, struct mbuf *, struct route *); static int ip_next_mtu(int, int); extern struct protosw inetsw[]; /* * Generate an error packet of type error * in response to bad packet ip. */ void icmp_error(n, type, code, dest, destifp) struct mbuf *n; int type, code; n_long dest; struct ifnet *destifp; { register struct ip *oip = mtod(n, struct ip *), *nip; register unsigned oiplen = oip->ip_hl << 2; register struct icmp *icp; register struct mbuf *m; unsigned icmplen; #ifdef ICMPPRINTFS if (icmpprintfs) printf("icmp_error(%p, %x, %d)\n", oip, type, code); #endif if (type != ICMP_REDIRECT) icmpstat.icps_error++; /* * Don't send error if not the first fragment of message. * Don't error if the old packet protocol was ICMP * error message, only known informational types. */ if (oip->ip_off &~ (IP_MF|IP_DF)) goto freeit; if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT && n->m_len >= oiplen + ICMP_MINLEN && !ICMP_INFOTYPE(((struct icmp *)((caddr_t)oip + oiplen))->icmp_type)) { icmpstat.icps_oldicmp++; goto freeit; } /* Don't send error in response to a multicast or broadcast packet */ if (n->m_flags & (M_BCAST|M_MCAST)) goto freeit; /* * First, formulate icmp message */ m = m_gethdr(M_DONTWAIT, MT_HEADER); if (m == NULL) goto freeit; #ifdef MAC mac_create_mbuf_netlayer(n, m); #endif icmplen = min(oiplen + 8, oip->ip_len); if (icmplen < sizeof(struct ip)) panic("icmp_error: bad length"); m->m_len = icmplen + ICMP_MINLEN; MH_ALIGN(m, m->m_len); icp = mtod(m, struct icmp *); if ((u_int)type > ICMP_MAXTYPE) panic("icmp_error"); icmpstat.icps_outhist[type]++; icp->icmp_type = type; if (type == ICMP_REDIRECT) icp->icmp_gwaddr.s_addr = dest; else { icp->icmp_void = 0; /* * The following assignments assume an overlay with the * zeroed icmp_void field. */ if (type == ICMP_PARAMPROB) { icp->icmp_pptr = code; code = 0; } else if (type == ICMP_UNREACH && code == ICMP_UNREACH_NEEDFRAG && destifp) { icp->icmp_nextmtu = htons(destifp->if_mtu); } } icp->icmp_code = code; m_copydata(n, 0, icmplen, (caddr_t)&icp->icmp_ip); nip = &icp->icmp_ip; /* * Convert fields to network representation. */ nip->ip_len = htons(nip->ip_len); nip->ip_off = htons(nip->ip_off); /* * Now, copy old ip header (without options) * in front of icmp message. */ if (m->m_data - sizeof(struct ip) < m->m_pktdat) panic("icmp len"); m->m_data -= sizeof(struct ip); m->m_len += sizeof(struct ip); m->m_pkthdr.len = m->m_len; m->m_pkthdr.rcvif = n->m_pkthdr.rcvif; nip = mtod(m, struct ip *); bcopy((caddr_t)oip, (caddr_t)nip, sizeof(struct ip)); nip->ip_len = m->m_len; nip->ip_v = IPVERSION; nip->ip_hl = 5; nip->ip_p = IPPROTO_ICMP; nip->ip_tos = 0; icmp_reflect(m); freeit: m_freem(n); } static struct sockaddr_in icmpsrc = { sizeof (struct sockaddr_in), AF_INET }; static struct sockaddr_in icmpdst = { sizeof (struct sockaddr_in), AF_INET }; static struct sockaddr_in icmpgw = { sizeof (struct sockaddr_in), AF_INET }; /* * Process a received ICMP message. */ void icmp_input(m, off) register struct mbuf *m; int off; { int hlen = off; register struct icmp *icp; register struct ip *ip = mtod(m, struct ip *); int icmplen = ip->ip_len; register int i; struct in_ifaddr *ia; void (*ctlfunc)(int, struct sockaddr *, void *); int code; /* * Locate icmp structure in mbuf, and check * that not corrupted and of at least minimum length. */ #ifdef ICMPPRINTFS if (icmpprintfs) { char buf[4 * sizeof "123"]; strcpy(buf, inet_ntoa(ip->ip_src)); printf("icmp_input from %s to %s, len %d\n", buf, inet_ntoa(ip->ip_dst), icmplen); } #endif if (icmplen < ICMP_MINLEN) { icmpstat.icps_tooshort++; goto freeit; } i = hlen + min(icmplen, ICMP_ADVLENMIN); if (m->m_len < i && (m = m_pullup(m, i)) == 0) { icmpstat.icps_tooshort++; return; } ip = mtod(m, struct ip *); m->m_len -= hlen; m->m_data += hlen; icp = mtod(m, struct icmp *); if (in_cksum(m, icmplen)) { icmpstat.icps_checksum++; goto freeit; } m->m_len += hlen; m->m_data -= hlen; if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_type == IFT_FAITH) { /* * Deliver very specific ICMP type only. */ switch (icp->icmp_type) { case ICMP_UNREACH: case ICMP_TIMXCEED: break; default: goto freeit; } } #ifdef ICMPPRINTFS if (icmpprintfs) printf("icmp_input, type %d code %d\n", icp->icmp_type, icp->icmp_code); #endif /* * Message type specific processing. */ if (icp->icmp_type > ICMP_MAXTYPE) goto raw; icmpstat.icps_inhist[icp->icmp_type]++; code = icp->icmp_code; switch (icp->icmp_type) { case ICMP_UNREACH: switch (code) { case ICMP_UNREACH_NET: case ICMP_UNREACH_HOST: case ICMP_UNREACH_SRCFAIL: case ICMP_UNREACH_NET_UNKNOWN: case ICMP_UNREACH_HOST_UNKNOWN: case ICMP_UNREACH_ISOLATED: case ICMP_UNREACH_TOSNET: case ICMP_UNREACH_TOSHOST: case ICMP_UNREACH_HOST_PRECEDENCE: case ICMP_UNREACH_PRECEDENCE_CUTOFF: code = PRC_UNREACH_NET; break; case ICMP_UNREACH_NEEDFRAG: code = PRC_MSGSIZE; break; /* * RFC 1122, Sections 3.2.2.1 and 4.2.3.9. * Treat subcodes 2,3 as immediate RST */ case ICMP_UNREACH_PROTOCOL: case ICMP_UNREACH_PORT: code = PRC_UNREACH_PORT; break; case ICMP_UNREACH_NET_PROHIB: case ICMP_UNREACH_HOST_PROHIB: case ICMP_UNREACH_FILTER_PROHIB: code = PRC_UNREACH_ADMIN_PROHIB; break; default: goto badcode; } goto deliver; case ICMP_TIMXCEED: if (code > 1) goto badcode; code += PRC_TIMXCEED_INTRANS; goto deliver; case ICMP_PARAMPROB: if (code > 1) goto badcode; code = PRC_PARAMPROB; goto deliver; case ICMP_SOURCEQUENCH: if (code) goto badcode; code = PRC_QUENCH; deliver: /* * Problem with datagram; advise higher level routines. */ if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) || icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) { icmpstat.icps_badlen++; goto freeit; } icp->icmp_ip.ip_len = ntohs(icp->icmp_ip.ip_len); /* Discard ICMP's in response to multicast packets */ if (IN_MULTICAST(ntohl(icp->icmp_ip.ip_dst.s_addr))) goto badcode; #ifdef ICMPPRINTFS if (icmpprintfs) printf("deliver to protocol %d\n", icp->icmp_ip.ip_p); #endif icmpsrc.sin_addr = icp->icmp_ip.ip_dst; #if 1 /* * MTU discovery: * If we got a needfrag and there is a host route to the * original destination, and the MTU is not locked, then * set the MTU in the route to the suggested new value * (if given) and then notify as usual. The ULPs will * notice that the MTU has changed and adapt accordingly. * If no new MTU was suggested, then we guess a new one * less than the current value. If the new MTU is * unreasonably small (arbitrarily set at 296), then * we reset the MTU to the interface value and enable the * lock bit, indicating that we are no longer doing MTU * discovery. */ if (code == PRC_MSGSIZE) { struct rtentry *rt; int mtu; rt = rtalloc1((struct sockaddr *)&icmpsrc, 0, RTF_CLONING | RTF_PRCLONING); if (rt && (rt->rt_flags & RTF_HOST) && !(rt->rt_rmx.rmx_locks & RTV_MTU)) { mtu = ntohs(icp->icmp_nextmtu); if (!mtu) mtu = ip_next_mtu(rt->rt_rmx.rmx_mtu, 1); #ifdef DEBUG_MTUDISC printf("MTU for %s reduced to %d\n", inet_ntoa(icmpsrc.sin_addr), mtu); #endif if (mtu < 296) { /* rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; */ rt->rt_rmx.rmx_locks |= RTV_MTU; } else if (rt->rt_rmx.rmx_mtu > mtu) { rt->rt_rmx.rmx_mtu = mtu; } } if (rt) - RTFREE(rt); + rtfree(rt); } #endif /* * XXX if the packet contains [IPv4 AH TCP], we can't make a * notification to TCP layer. */ ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput; if (ctlfunc) (*ctlfunc)(code, (struct sockaddr *)&icmpsrc, (void *)&icp->icmp_ip); break; badcode: icmpstat.icps_badcode++; break; case ICMP_ECHO: if (!icmpbmcastecho && (m->m_flags & (M_MCAST | M_BCAST)) != 0) { icmpstat.icps_bmcastecho++; break; } icp->icmp_type = ICMP_ECHOREPLY; if (badport_bandlim(BANDLIM_ICMP_ECHO) < 0) goto freeit; else goto reflect; case ICMP_TSTAMP: if (!icmpbmcastecho && (m->m_flags & (M_MCAST | M_BCAST)) != 0) { icmpstat.icps_bmcasttstamp++; break; } if (icmplen < ICMP_TSLEN) { icmpstat.icps_badlen++; break; } icp->icmp_type = ICMP_TSTAMPREPLY; icp->icmp_rtime = iptime(); icp->icmp_ttime = icp->icmp_rtime; /* bogus, do later! */ if (badport_bandlim(BANDLIM_ICMP_TSTAMP) < 0) goto freeit; else goto reflect; case ICMP_MASKREQ: if (icmpmaskrepl == 0) break; /* * We are not able to respond with all ones broadcast * unless we receive it over a point-to-point interface. */ if (icmplen < ICMP_MASKLEN) break; switch (ip->ip_dst.s_addr) { case INADDR_BROADCAST: case INADDR_ANY: icmpdst.sin_addr = ip->ip_src; break; default: icmpdst.sin_addr = ip->ip_dst; } ia = (struct in_ifaddr *)ifaof_ifpforaddr( (struct sockaddr *)&icmpdst, m->m_pkthdr.rcvif); if (ia == 0) break; if (ia->ia_ifp == 0) break; icp->icmp_type = ICMP_MASKREPLY; if (icmpmaskfake == 0) icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr; else icp->icmp_mask = icmpmaskfake; if (ip->ip_src.s_addr == 0) { if (ia->ia_ifp->if_flags & IFF_BROADCAST) ip->ip_src = satosin(&ia->ia_broadaddr)->sin_addr; else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT) ip->ip_src = satosin(&ia->ia_dstaddr)->sin_addr; } reflect: ip->ip_len += hlen; /* since ip_input deducts this */ icmpstat.icps_reflect++; icmpstat.icps_outhist[icp->icmp_type]++; icmp_reflect(m); return; case ICMP_REDIRECT: if (log_redirect) { u_long src, dst, gw; src = ntohl(ip->ip_src.s_addr); dst = ntohl(icp->icmp_ip.ip_dst.s_addr); gw = ntohl(icp->icmp_gwaddr.s_addr); printf("icmp redirect from %d.%d.%d.%d: " "%d.%d.%d.%d => %d.%d.%d.%d\n", (int)(src >> 24), (int)((src >> 16) & 0xff), (int)((src >> 8) & 0xff), (int)(src & 0xff), (int)(dst >> 24), (int)((dst >> 16) & 0xff), (int)((dst >> 8) & 0xff), (int)(dst & 0xff), (int)(gw >> 24), (int)((gw >> 16) & 0xff), (int)((gw >> 8) & 0xff), (int)(gw & 0xff)); } if (drop_redirect) break; if (code > 3) goto badcode; if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) || icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) { icmpstat.icps_badlen++; break; } /* * Short circuit routing redirects to force * immediate change in the kernel's routing * tables. The message is also handed to anyone * listening on a raw socket (e.g. the routing * daemon for use in updating its tables). */ icmpgw.sin_addr = ip->ip_src; icmpdst.sin_addr = icp->icmp_gwaddr; #ifdef ICMPPRINTFS if (icmpprintfs) { char buf[4 * sizeof "123"]; strcpy(buf, inet_ntoa(icp->icmp_ip.ip_dst)); printf("redirect dst %s to %s\n", buf, inet_ntoa(icp->icmp_gwaddr)); } #endif icmpsrc.sin_addr = icp->icmp_ip.ip_dst; rtredirect((struct sockaddr *)&icmpsrc, (struct sockaddr *)&icmpdst, (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST, - (struct sockaddr *)&icmpgw, (struct rtentry **)0); + (struct sockaddr *)&icmpgw); pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&icmpsrc); #ifdef IPSEC key_sa_routechange((struct sockaddr *)&icmpsrc); #endif break; /* * No kernel processing for the following; * just fall through to send to raw listener. */ case ICMP_ECHOREPLY: case ICMP_ROUTERADVERT: case ICMP_ROUTERSOLICIT: case ICMP_TSTAMPREPLY: case ICMP_IREQREPLY: case ICMP_MASKREPLY: default: break; } raw: rip_input(m, off); return; freeit: m_freem(m); } /* * Reflect the ip packet back to the source */ static void icmp_reflect(m) struct mbuf *m; { struct ip *ip = mtod(m, struct ip *); struct ifaddr *ifa; struct in_ifaddr *ia; struct in_addr t; struct mbuf *opts = 0; int optlen = (ip->ip_hl << 2) - sizeof(struct ip); struct route *ro = NULL, rt; if (!in_canforward(ip->ip_src) && ((ntohl(ip->ip_src.s_addr) & IN_CLASSA_NET) != (IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) { m_freem(m); /* Bad return address */ icmpstat.icps_badaddr++; goto done; /* Ip_output() will check for broadcast */ } t = ip->ip_dst; ip->ip_dst = ip->ip_src; ro = &rt; bzero(ro, sizeof(*ro)); /* * If the incoming packet was addressed directly to us, * use dst as the src for the reply. Otherwise (broadcast * or anonymous), use the address which corresponds * to the incoming interface. */ LIST_FOREACH(ia, INADDR_HASH(t.s_addr), ia_hash) if (t.s_addr == IA_SIN(ia)->sin_addr.s_addr) goto match; if (m->m_pkthdr.rcvif != NULL && m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST) { TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET) continue; ia = ifatoia(ifa); if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr == t.s_addr) goto match; } } ia = ip_rtaddr(ip->ip_dst, ro); /* We need a route to do anything useful. */ if (ia == NULL) { m_freem(m); icmpstat.icps_noroute++; goto done; } match: #ifdef MAC mac_reflect_mbuf_icmp(m); #endif t = IA_SIN(ia)->sin_addr; ip->ip_src = t; ip->ip_ttl = ip_defttl; if (optlen > 0) { register u_char *cp; int opt, cnt; u_int len; /* * Retrieve any source routing from the incoming packet; * add on any record-route or timestamp options. */ cp = (u_char *) (ip + 1); if ((opts = ip_srcroute()) == 0 && (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) { opts->m_len = sizeof(struct in_addr); mtod(opts, struct in_addr *)->s_addr = 0; } if (opts) { #ifdef ICMPPRINTFS if (icmpprintfs) printf("icmp_reflect optlen %d rt %d => ", optlen, opts->m_len); #endif for (cnt = optlen; cnt > 0; cnt -= len, cp += len) { opt = cp[IPOPT_OPTVAL]; if (opt == IPOPT_EOL) break; if (opt == IPOPT_NOP) len = 1; else { if (cnt < IPOPT_OLEN + sizeof(*cp)) break; len = cp[IPOPT_OLEN]; if (len < IPOPT_OLEN + sizeof(*cp) || len > cnt) break; } /* * Should check for overflow, but it "can't happen" */ if (opt == IPOPT_RR || opt == IPOPT_TS || opt == IPOPT_SECURITY) { bcopy((caddr_t)cp, mtod(opts, caddr_t) + opts->m_len, len); opts->m_len += len; } } /* Terminate & pad, if necessary */ cnt = opts->m_len % 4; if (cnt) { for (; cnt < 4; cnt++) { *(mtod(opts, caddr_t) + opts->m_len) = IPOPT_EOL; opts->m_len++; } } #ifdef ICMPPRINTFS if (icmpprintfs) printf("%d\n", opts->m_len); #endif } /* * Now strip out original options by copying rest of first * mbuf's data back, and adjust the IP length. */ ip->ip_len -= optlen; ip->ip_v = IPVERSION; ip->ip_hl = 5; m->m_len -= optlen; if (m->m_flags & M_PKTHDR) m->m_pkthdr.len -= optlen; optlen += sizeof(struct ip); bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1), (unsigned)(m->m_len - sizeof(struct ip))); } m->m_flags &= ~(M_BCAST|M_MCAST); icmp_send(m, opts, ro); done: if (opts) (void)m_free(opts); if (ro && ro->ro_rt) RTFREE(ro->ro_rt); } /* * Send an icmp packet back to the ip level, * after supplying a checksum. */ static void icmp_send(m, opts, rt) register struct mbuf *m; struct mbuf *opts; struct route *rt; { register struct ip *ip = mtod(m, struct ip *); register int hlen; register struct icmp *icp; hlen = ip->ip_hl << 2; m->m_data += hlen; m->m_len -= hlen; icp = mtod(m, struct icmp *); icp->icmp_cksum = 0; icp->icmp_cksum = in_cksum(m, ip->ip_len - hlen); m->m_data -= hlen; m->m_len += hlen; m->m_pkthdr.rcvif = (struct ifnet *)0; #ifdef ICMPPRINTFS if (icmpprintfs) { char buf[4 * sizeof "123"]; strcpy(buf, inet_ntoa(ip->ip_dst)); printf("icmp_send dst %s src %s\n", buf, inet_ntoa(ip->ip_src)); } #endif (void) ip_output(m, opts, rt, 0, NULL, NULL); } n_time iptime() { struct timeval atv; u_long t; getmicrotime(&atv); t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000; return (htonl(t)); } #if 1 /* * Return the next larger or smaller MTU plateau (table from RFC 1191) * given current value MTU. If DIR is less than zero, a larger plateau * is returned; otherwise, a smaller value is returned. */ static int ip_next_mtu(mtu, dir) int mtu; int dir; { static int mtutab[] = { 65535, 32000, 17914, 8166, 4352, 2002, 1492, 1006, 508, 296, 68, 0 }; int i; for (i = 0; i < (sizeof mtutab) / (sizeof mtutab[0]); i++) { if (mtu >= mtutab[i]) break; } if (dir < 0) { if (i == 0) { return 0; } else { return mtutab[i - 1]; } } else { if (mtutab[i] == 0) { return 0; } else if(mtu > mtutab[i]) { return mtutab[i]; } else { return mtutab[i + 1]; } } } #endif /* * badport_bandlim() - check for ICMP bandwidth limit * * Return 0 if it is ok to send an ICMP error response, -1 if we have * hit our bandwidth limit and it is not ok. * * If icmplim is <= 0, the feature is disabled and 0 is returned. * * For now we separate the TCP and UDP subsystems w/ different 'which' * values. We may eventually remove this separation (and simplify the * code further). * * Note that the printing of the error message is delayed so we can * properly print the icmp error rate that the system was trying to do * (i.e. 22000/100 pps, etc...). This can cause long delays in printing * the 'final' error, but it doesn't make sense to solve the printing * delay with more complex code. */ int badport_bandlim(int which) { #define N(a) (sizeof (a) / sizeof (a[0])) static struct rate { const char *type; struct timeval lasttime; int curpps;; } rates[BANDLIM_MAX+1] = { { "icmp unreach response" }, { "icmp ping response" }, { "icmp tstamp response" }, { "closed port RST response" }, { "open port RST response" } }; /* * Return ok status if feature disabled or argument out of range. */ if (icmplim > 0 && (u_int) which < N(rates)) { struct rate *r = &rates[which]; int opps = r->curpps; if (!ppsratecheck(&r->lasttime, &r->curpps, icmplim)) return -1; /* discard packet */ /* * If we've dropped below the threshold after having * rate-limited traffic print the message. This preserves * the previous behaviour at the expense of added complexity. */ if (icmplim_output && opps > icmplim) printf("Limiting %s from %d to %d packets/sec\n", r->type, opps, icmplim); } return 0; /* okay to send packet */ #undef N } Index: head/sys/netinet/ip_output.c =================================================================== --- head/sys/netinet/ip_output.c (revision 120726) +++ head/sys/netinet/ip_output.c (revision 120727) @@ -1,2260 +1,2260 @@ /* * Copyright (c) 1982, 1986, 1988, 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 * $FreeBSD$ */ #include "opt_ipfw.h" #include "opt_ipdn.h" #include "opt_ipdivert.h" #include "opt_ipfilter.h" #include "opt_ipsec.h" #include "opt_mac.h" #include "opt_pfil_hooks.h" #include "opt_random_ip_id.h" #include "opt_mbuf_stress_test.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef PFIL_HOOKS #include #endif #include static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options"); #ifdef IPSEC #include #include #ifdef IPSEC_DEBUG #include #else #define KEYDEBUG(lev,arg) #endif #endif /*IPSEC*/ #ifdef FAST_IPSEC #include #include #include #endif /*FAST_IPSEC*/ #include #include #define print_ip(x, a, y) printf("%s %d.%d.%d.%d%s",\ x, (ntohl(a.s_addr)>>24)&0xFF,\ (ntohl(a.s_addr)>>16)&0xFF,\ (ntohl(a.s_addr)>>8)&0xFF,\ (ntohl(a.s_addr))&0xFF, y); u_short ip_id; #ifdef MBUF_STRESS_TEST int mbuf_frag_size = 0; SYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW, &mbuf_frag_size, 0, "Fragment outgoing mbufs to this size"); #endif static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *); static struct ifnet *ip_multicast_if(struct in_addr *, int *); static void ip_mloopback (struct ifnet *, struct mbuf *, struct sockaddr_in *, int); static int ip_getmoptions (struct sockopt *, struct ip_moptions *); static int ip_pcbopts(int, struct mbuf **, struct mbuf *); static int ip_setmoptions (struct sockopt *, struct ip_moptions **); int ip_optcopy(struct ip *, struct ip *); extern struct protosw inetsw[]; /* * IP output. The packet in mbuf chain m contains a skeletal IP * header (with len, off, ttl, proto, tos, src, dst). * The mbuf chain containing the packet will be freed. * The mbuf opt, if present, will not be freed. */ int ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro, int flags, struct ip_moptions *imo, struct inpcb *inp) { struct ip *ip; struct ifnet *ifp = NULL; /* keep compiler happy */ struct mbuf *m; int hlen = sizeof (struct ip); int len, off, error = 0; struct sockaddr_in *dst = NULL; /* keep compiler happy */ struct in_ifaddr *ia = NULL; int isbroadcast, sw_csum; struct in_addr pkt_dst; #ifdef IPSEC struct route iproute; struct secpolicy *sp = NULL; #endif #ifdef FAST_IPSEC struct route iproute; struct m_tag *mtag; struct secpolicy *sp = NULL; struct tdb_ident *tdbi; int s; #endif /* FAST_IPSEC */ struct ip_fw_args args; int src_was_INADDR_ANY = 0; /* as the name says... */ args.eh = NULL; args.rule = NULL; args.next_hop = NULL; args.divert_rule = 0; /* divert cookie */ /* Grab info from MT_TAG mbufs prepended to the chain. */ for (; m0 && m0->m_type == MT_TAG; m0 = m0->m_next) { switch(m0->_m_tag_id) { default: printf("ip_output: unrecognised MT_TAG tag %d\n", m0->_m_tag_id); break; case PACKET_TAG_DUMMYNET: /* * the packet was already tagged, so part of the * processing was already done, and we need to go down. * Get parameters from the header. */ args.rule = ((struct dn_pkt *)m0)->rule; opt = NULL ; ro = & ( ((struct dn_pkt *)m0)->ro ) ; imo = NULL ; dst = ((struct dn_pkt *)m0)->dn_dst ; ifp = ((struct dn_pkt *)m0)->ifp ; flags = ((struct dn_pkt *)m0)->flags ; break; case PACKET_TAG_DIVERT: args.divert_rule = (intptr_t)m0->m_data & 0xffff; break; case PACKET_TAG_IPFORWARD: args.next_hop = (struct sockaddr_in *)m0->m_data; break; } } m = m0; M_ASSERTPKTHDR(m); #ifndef FAST_IPSEC KASSERT(ro != NULL, ("ip_output: no route, proto %d", mtod(m, struct ip *)->ip_p)); #endif if (args.rule != NULL) { /* dummynet already saw us */ ip = mtod(m, struct ip *); hlen = ip->ip_hl << 2 ; if (ro->ro_rt) ia = ifatoia(ro->ro_rt->rt_ifa); goto sendit; } if (opt) { len = 0; m = ip_insertoptions(m, opt, &len); if (len != 0) hlen = len; } ip = mtod(m, struct ip *); pkt_dst = args.next_hop ? args.next_hop->sin_addr : ip->ip_dst; /* * Fill in IP header. If we are not allowing fragmentation, * then the ip_id field is meaningless, so send it as zero * to reduce information leakage. Otherwise, if we are not * randomizing ip_id, then don't bother to convert it to network * byte order -- it's just a nonce. Note that a 16-bit counter * will wrap around in less than 10 seconds at 100 Mbit/s on a * medium with MTU 1500. See Steven M. Bellovin, "A Technique * for Counting NATted Hosts", Proc. IMW'02, available at * . */ if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { ip->ip_v = IPVERSION; ip->ip_hl = hlen >> 2; if ((ip->ip_off & IP_DF) == 0) { ip->ip_off = 0; #ifdef RANDOM_IP_ID ip->ip_id = ip_randomid(); #else ip->ip_id = ip_id++; #endif } else { ip->ip_off = IP_DF; ip->ip_id = 0; } ipstat.ips_localout++; } else { hlen = ip->ip_hl << 2; } #ifdef FAST_IPSEC if (ro == NULL) { ro = &iproute; bzero(ro, sizeof (*ro)); } #endif /* FAST_IPSEC */ dst = (struct sockaddr_in *)&ro->ro_dst; /* * If there is a cached route, * check that it is to the same destination * and is still up. If not, free it and try again. * The address family should also be checked in case of sharing the * cache with IPv6. */ if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 || dst->sin_family != AF_INET || dst->sin_addr.s_addr != pkt_dst.s_addr)) { RTFREE(ro->ro_rt); ro->ro_rt = (struct rtentry *)0; } if (ro->ro_rt == 0) { bzero(dst, sizeof(*dst)); dst->sin_family = AF_INET; dst->sin_len = sizeof(*dst); dst->sin_addr = pkt_dst; } /* * If routing to interface only, * short circuit routing lookup. */ if (flags & IP_ROUTETOIF) { if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 && (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) { ipstat.ips_noroute++; error = ENETUNREACH; goto bad; } ifp = ia->ia_ifp; ip->ip_ttl = 1; isbroadcast = in_broadcast(dst->sin_addr, ifp); } else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) && imo != NULL && imo->imo_multicast_ifp != NULL) { /* * Bypass the normal routing lookup for multicast * packets if the interface is specified. */ ifp = imo->imo_multicast_ifp; IFP_TO_IA(ifp, ia); isbroadcast = 0; /* fool gcc */ } else { /* * If this is the case, we probably don't want to allocate * a protocol-cloned route since we didn't get one from the * ULP. This lets TCP do its thing, while not burdening * forwarding or ICMP with the overhead of cloning a route. * Of course, we still want to do any cloning requested by * the link layer, as this is probably required in all cases * for correct operation (as it is for ARP). */ if (ro->ro_rt == 0) rtalloc_ign(ro, RTF_PRCLONING); if (ro->ro_rt == 0) { ipstat.ips_noroute++; error = EHOSTUNREACH; goto bad; } ia = ifatoia(ro->ro_rt->rt_ifa); ifp = ro->ro_rt->rt_ifp; ro->ro_rt->rt_use++; if (ro->ro_rt->rt_flags & RTF_GATEWAY) dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway; if (ro->ro_rt->rt_flags & RTF_HOST) isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST); else isbroadcast = in_broadcast(dst->sin_addr, ifp); } if (IN_MULTICAST(ntohl(pkt_dst.s_addr))) { struct in_multi *inm; m->m_flags |= M_MCAST; /* * IP destination address is multicast. Make sure "dst" * still points to the address in "ro". (It may have been * changed to point to a gateway address, above.) */ dst = (struct sockaddr_in *)&ro->ro_dst; /* * See if the caller provided any multicast options */ if (imo != NULL) { ip->ip_ttl = imo->imo_multicast_ttl; if (imo->imo_multicast_vif != -1) ip->ip_src.s_addr = ip_mcast_src ? ip_mcast_src(imo->imo_multicast_vif) : INADDR_ANY; } else ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL; /* * Confirm that the outgoing interface supports multicast. */ if ((imo == NULL) || (imo->imo_multicast_vif == -1)) { if ((ifp->if_flags & IFF_MULTICAST) == 0) { ipstat.ips_noroute++; error = ENETUNREACH; goto bad; } } /* * If source address not specified yet, use address * of outgoing interface. */ if (ip->ip_src.s_addr == INADDR_ANY) { /* Interface may have no addresses. */ if (ia != NULL) ip->ip_src = IA_SIN(ia)->sin_addr; } if (ip_mrouter && (flags & IP_FORWARDING) == 0) { /* * XXX * delayed checksums are not currently * compatible with IP multicast routing */ if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { in_delayed_cksum(m); m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; } } IN_LOOKUP_MULTI(pkt_dst, ifp, inm); if (inm != NULL && (imo == NULL || imo->imo_multicast_loop)) { /* * If we belong to the destination multicast group * on the outgoing interface, and the caller did not * forbid loopback, loop back a copy. */ ip_mloopback(ifp, m, dst, hlen); } else { /* * If we are acting as a multicast router, perform * multicast forwarding as if the packet had just * arrived on the interface to which we are about * to send. The multicast forwarding function * recursively calls this function, using the * IP_FORWARDING flag to prevent infinite recursion. * * Multicasts that are looped back by ip_mloopback(), * above, will be forwarded by the ip_input() routine, * if necessary. */ if (ip_mrouter && (flags & IP_FORWARDING) == 0) { /* * If rsvp daemon is not running, do not * set ip_moptions. This ensures that the packet * is multicast and not just sent down one link * as prescribed by rsvpd. */ if (!rsvp_on) imo = NULL; if (ip_mforward && ip_mforward(ip, ifp, m, imo) != 0) { m_freem(m); goto done; } } } /* * Multicasts with a time-to-live of zero may be looped- * back, above, but must not be transmitted on a network. * Also, multicasts addressed to the loopback interface * are not sent -- the above call to ip_mloopback() will * loop back a copy if this host actually belongs to the * destination group on the loopback interface. */ if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) { m_freem(m); goto done; } goto sendit; } #ifndef notdef /* * If the source address is not specified yet, use the address * of the outoing interface. In case, keep note we did that, so * if the the firewall changes the next-hop causing the output * interface to change, we can fix that. */ if (ip->ip_src.s_addr == INADDR_ANY) { /* Interface may have no addresses. */ if (ia != NULL) { ip->ip_src = IA_SIN(ia)->sin_addr; src_was_INADDR_ANY = 1; } } #endif /* notdef */ /* * Verify that we have any chance at all of being able to queue * the packet or packet fragments */ if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >= ifp->if_snd.ifq_maxlen) { error = ENOBUFS; ipstat.ips_odropped++; goto bad; } /* * Look for broadcast address and * verify user is allowed to send * such a packet. */ if (isbroadcast) { if ((ifp->if_flags & IFF_BROADCAST) == 0) { error = EADDRNOTAVAIL; goto bad; } if ((flags & IP_ALLOWBROADCAST) == 0) { error = EACCES; goto bad; } /* don't allow broadcast messages to be fragmented */ if (ip->ip_len > ifp->if_mtu) { error = EMSGSIZE; goto bad; } if (flags & IP_SENDONES) ip->ip_dst.s_addr = INADDR_BROADCAST; m->m_flags |= M_BCAST; } else { m->m_flags &= ~M_BCAST; } sendit: #ifdef IPSEC /* get SP for this packet */ if (inp == NULL) sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error); else sp = ipsec4_getpolicybypcb(m, IPSEC_DIR_OUTBOUND, inp, &error); if (sp == NULL) { ipsecstat.out_inval++; goto bad; } error = 0; /* check policy */ switch (sp->policy) { case IPSEC_POLICY_DISCARD: /* * This packet is just discarded. */ ipsecstat.out_polvio++; goto bad; case IPSEC_POLICY_BYPASS: case IPSEC_POLICY_NONE: /* no need to do IPsec. */ goto skip_ipsec; case IPSEC_POLICY_IPSEC: if (sp->req == NULL) { /* acquire a policy */ error = key_spdacquire(sp); goto bad; } break; case IPSEC_POLICY_ENTRUST: default: printf("ip_output: Invalid policy found. %d\n", sp->policy); } { struct ipsec_output_state state; bzero(&state, sizeof(state)); state.m = m; if (flags & IP_ROUTETOIF) { state.ro = &iproute; bzero(&iproute, sizeof(iproute)); } else state.ro = ro; state.dst = (struct sockaddr *)dst; ip->ip_sum = 0; /* * XXX * delayed checksums are not currently compatible with IPsec */ if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { in_delayed_cksum(m); m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; } ip->ip_len = htons(ip->ip_len); ip->ip_off = htons(ip->ip_off); error = ipsec4_output(&state, sp, flags); m = state.m; if (flags & IP_ROUTETOIF) { /* * if we have tunnel mode SA, we may need to ignore * IP_ROUTETOIF. */ if (state.ro != &iproute || state.ro->ro_rt != NULL) { flags &= ~IP_ROUTETOIF; ro = state.ro; } } else ro = state.ro; dst = (struct sockaddr_in *)state.dst; if (error) { /* mbuf is already reclaimed in ipsec4_output. */ m0 = NULL; switch (error) { case EHOSTUNREACH: case ENETUNREACH: case EMSGSIZE: case ENOBUFS: case ENOMEM: break; default: printf("ip4_output (ipsec): error code %d\n", error); /*fall through*/ case ENOENT: /* don't show these error codes to the user */ error = 0; break; } goto bad; } } /* be sure to update variables that are affected by ipsec4_output() */ ip = mtod(m, struct ip *); hlen = ip->ip_hl << 2; if (ro->ro_rt == NULL) { if ((flags & IP_ROUTETOIF) == 0) { printf("ip_output: " "can't update route after IPsec processing\n"); error = EHOSTUNREACH; /*XXX*/ goto bad; } } else { ia = ifatoia(ro->ro_rt->rt_ifa); ifp = ro->ro_rt->rt_ifp; } /* make it flipped, again. */ ip->ip_len = ntohs(ip->ip_len); ip->ip_off = ntohs(ip->ip_off); skip_ipsec: #endif /*IPSEC*/ #ifdef FAST_IPSEC /* * Check the security policy (SP) for the packet and, if * required, do IPsec-related processing. There are two * cases here; the first time a packet is sent through * it will be untagged and handled by ipsec4_checkpolicy. * If the packet is resubmitted to ip_output (e.g. after * AH, ESP, etc. processing), there will be a tag to bypass * the lookup and related policy checking. */ mtag = m_tag_find(m, PACKET_TAG_IPSEC_PENDING_TDB, NULL); s = splnet(); if (mtag != NULL) { tdbi = (struct tdb_ident *)(mtag + 1); sp = ipsec_getpolicy(tdbi, IPSEC_DIR_OUTBOUND); if (sp == NULL) error = -EINVAL; /* force silent drop */ m_tag_delete(m, mtag); } else { sp = ipsec4_checkpolicy(m, IPSEC_DIR_OUTBOUND, flags, &error, inp); } /* * There are four return cases: * sp != NULL apply IPsec policy * sp == NULL, error == 0 no IPsec handling needed * sp == NULL, error == -EINVAL discard packet w/o error * sp == NULL, error != 0 discard packet, report error */ if (sp != NULL) { /* Loop detection, check if ipsec processing already done */ KASSERT(sp->req != NULL, ("ip_output: no ipsec request")); for (mtag = m_tag_first(m); mtag != NULL; mtag = m_tag_next(m, mtag)) { if (mtag->m_tag_cookie != MTAG_ABI_COMPAT) continue; if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE && mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED) continue; /* * Check if policy has an SA associated with it. * This can happen when an SP has yet to acquire * an SA; e.g. on first reference. If it occurs, * then we let ipsec4_process_packet do its thing. */ if (sp->req->sav == NULL) break; tdbi = (struct tdb_ident *)(mtag + 1); if (tdbi->spi == sp->req->sav->spi && tdbi->proto == sp->req->sav->sah->saidx.proto && bcmp(&tdbi->dst, &sp->req->sav->sah->saidx.dst, sizeof (union sockaddr_union)) == 0) { /* * No IPsec processing is needed, free * reference to SP. * * NB: null pointer to avoid free at * done: below. */ KEY_FREESP(&sp), sp = NULL; splx(s); goto spd_done; } } /* * Do delayed checksums now because we send before * this is done in the normal processing path. */ if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { in_delayed_cksum(m); m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; } ip->ip_len = htons(ip->ip_len); ip->ip_off = htons(ip->ip_off); /* NB: callee frees mbuf */ error = ipsec4_process_packet(m, sp->req, flags, 0); /* * Preserve KAME behaviour: ENOENT can be returned * when an SA acquire is in progress. Don't propagate * this to user-level; it confuses applications. * * XXX this will go away when the SADB is redone. */ if (error == ENOENT) error = 0; splx(s); goto done; } else { splx(s); if (error != 0) { /* * Hack: -EINVAL is used to signal that a packet * should be silently discarded. This is typically * because we asked key management for an SA and * it was delayed (e.g. kicked up to IKE). */ if (error == -EINVAL) error = 0; goto bad; } else { /* No IPsec processing for this packet. */ } #ifdef notyet /* * If deferred crypto processing is needed, check that * the interface supports it. */ mtag = m_tag_find(m, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL); if (mtag != NULL && (ifp->if_capenable & IFCAP_IPSEC) == 0) { /* notify IPsec to do its own crypto */ ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1)); error = EHOSTUNREACH; goto bad; } #endif } spd_done: #endif /* FAST_IPSEC */ /* * IpHack's section. * - Xlate: translate packet's addr/port (NAT). * - Firewall: deny/allow/etc. * - Wrap: fake packet's addr/port * - Encapsulate: put it in another IP and send out. */ #ifdef PFIL_HOOKS /* * Run through list of hooks for output packets. */ error = pfil_run_hooks(&inet_pfil_hook, &m, ifp, PFIL_OUT); if (error != 0 || m == NULL) goto done; ip = mtod(m, struct ip *); #endif /* PFIL_HOOKS */ /* * Check with the firewall... * but not if we are already being fwd'd from a firewall. */ if (fw_enable && IPFW_LOADED && !args.next_hop) { struct sockaddr_in *old = dst; args.m = m; args.next_hop = dst; args.oif = ifp; off = ip_fw_chk_ptr(&args); m = args.m; dst = args.next_hop; /* * On return we must do the following: * m == NULL -> drop the pkt (old interface, deprecated) * (off & IP_FW_PORT_DENY_FLAG) -> drop the pkt (new interface) * 1<=off<= 0xffff -> DIVERT * (off & IP_FW_PORT_DYNT_FLAG) -> send to a DUMMYNET pipe * (off & IP_FW_PORT_TEE_FLAG) -> TEE the packet * dst != old -> IPFIREWALL_FORWARD * off==0, dst==old -> accept * If some of the above modules are not compiled in, then * we should't have to check the corresponding condition * (because the ipfw control socket should not accept * unsupported rules), but better play safe and drop * packets in case of doubt. */ if ( (off & IP_FW_PORT_DENY_FLAG) || m == NULL) { if (m) m_freem(m); error = EACCES; goto done; } ip = mtod(m, struct ip *); if (off == 0 && dst == old) /* common case */ goto pass; if (DUMMYNET_LOADED && (off & IP_FW_PORT_DYNT_FLAG) != 0) { /* * pass the pkt to dummynet. Need to include * pipe number, m, ifp, ro, dst because these are * not recomputed in the next pass. * All other parameters have been already used and * so they are not needed anymore. * XXX note: if the ifp or ro entry are deleted * while a pkt is in dummynet, we are in trouble! */ args.ro = ro; args.dst = dst; args.flags = flags; error = ip_dn_io_ptr(m, off & 0xffff, DN_TO_IP_OUT, &args); goto done; } #ifdef IPDIVERT if (off != 0 && (off & IP_FW_PORT_DYNT_FLAG) == 0) { struct mbuf *clone = NULL; /* Clone packet if we're doing a 'tee' */ if ((off & IP_FW_PORT_TEE_FLAG) != 0) clone = m_dup(m, M_DONTWAIT); /* * XXX * delayed checksums are not currently compatible * with divert sockets. */ if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { in_delayed_cksum(m); m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; } /* Restore packet header fields to original values */ ip->ip_len = htons(ip->ip_len); ip->ip_off = htons(ip->ip_off); /* Deliver packet to divert input routine */ divert_packet(m, 0, off & 0xffff, args.divert_rule); /* If 'tee', continue with original packet */ if (clone != NULL) { m = clone; ip = mtod(m, struct ip *); goto pass; } goto done; } #endif /* IPFIREWALL_FORWARD */ /* * Check dst to make sure it is directly reachable on the * interface we previously thought it was. * If it isn't (which may be likely in some situations) we have * to re-route it (ie, find a route for the next-hop and the * associated interface) and set them here. This is nested * forwarding which in most cases is undesirable, except where * such control is nigh impossible. So we do it here. * And I'm babbling. */ if (off == 0 && old != dst) { /* FORWARD, dst has changed */ #if 0 /* * XXX To improve readability, this block should be * changed into a function call as below: */ error = ip_ipforward(&m, &dst, &ifp); if (error) goto bad; if (m == NULL) /* ip_input consumed the mbuf */ goto done; #else struct in_ifaddr *ia; /* * XXX sro_fwd below is static, and a pointer * to it gets passed to routines downstream. * This could have surprisingly bad results in * practice, because its content is overwritten * by subsequent packets. */ /* There must be a better way to do this next line... */ static struct route sro_fwd; struct route *ro_fwd = &sro_fwd; #if 0 print_ip("IPFIREWALL_FORWARD: New dst ip: ", dst->sin_addr, "\n"); #endif /* * We need to figure out if we have been forwarded * to a local socket. If so, then we should somehow * "loop back" to ip_input, and get directed to the * PCB as if we had received this packet. This is * because it may be dificult to identify the packets * you want to forward until they are being output * and have selected an interface. (e.g. locally * initiated packets) If we used the loopback inteface, * we would not be able to control what happens * as the packet runs through ip_input() as * it is done through an ISR. */ LIST_FOREACH(ia, INADDR_HASH(dst->sin_addr.s_addr), ia_hash) { /* * If the addr to forward to is one * of ours, we pretend to * be the destination for this packet. */ if (IA_SIN(ia)->sin_addr.s_addr == dst->sin_addr.s_addr) break; } if (ia) { /* tell ip_input "dont filter" */ struct m_hdr tag; tag.mh_type = MT_TAG; tag.mh_flags = PACKET_TAG_IPFORWARD; tag.mh_data = (caddr_t)args.next_hop; tag.mh_next = m; if (m->m_pkthdr.rcvif == NULL) m->m_pkthdr.rcvif = ifunit("lo0"); if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; m0->m_pkthdr.csum_data = 0xffff; } m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED | CSUM_IP_VALID; ip->ip_len = htons(ip->ip_len); ip->ip_off = htons(ip->ip_off); ip_input((struct mbuf *)&tag); goto done; } /* Some of the logic for this was * nicked from above. * * This rewrites the cached route in a local PCB. * Is this what we want to do? */ bcopy(dst, &ro_fwd->ro_dst, sizeof(*dst)); ro_fwd->ro_rt = 0; rtalloc_ign(ro_fwd, RTF_PRCLONING); if (ro_fwd->ro_rt == 0) { ipstat.ips_noroute++; error = EHOSTUNREACH; goto bad; } ia = ifatoia(ro_fwd->ro_rt->rt_ifa); ifp = ro_fwd->ro_rt->rt_ifp; ro_fwd->ro_rt->rt_use++; if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY) dst = (struct sockaddr_in *) ro_fwd->ro_rt->rt_gateway; if (ro_fwd->ro_rt->rt_flags & RTF_HOST) isbroadcast = (ro_fwd->ro_rt->rt_flags & RTF_BROADCAST); else isbroadcast = in_broadcast(dst->sin_addr, ifp); if (ro->ro_rt) RTFREE(ro->ro_rt); ro->ro_rt = ro_fwd->ro_rt; dst = (struct sockaddr_in *)&ro_fwd->ro_dst; #endif /* ... block to be put into a function */ /* * If we added a default src ip earlier, * which would have been gotten from the-then * interface, do it again, from the new one. */ if (src_was_INADDR_ANY) ip->ip_src = IA_SIN(ia)->sin_addr; goto pass ; } /* * if we get here, none of the above matches, and * we have to drop the pkt */ m_freem(m); error = EACCES; /* not sure this is the right error msg */ goto done; } pass: /* 127/8 must not appear on wire - RFC1122. */ if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { if ((ifp->if_flags & IFF_LOOPBACK) == 0) { ipstat.ips_badaddr++; error = EADDRNOTAVAIL; goto bad; } } m->m_pkthdr.csum_flags |= CSUM_IP; sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_hwassist; if (sw_csum & CSUM_DELAY_DATA) { in_delayed_cksum(m); sw_csum &= ~CSUM_DELAY_DATA; } m->m_pkthdr.csum_flags &= ifp->if_hwassist; /* * If small enough for interface, or the interface will take * care of the fragmentation for us, can just send directly. */ if (ip->ip_len <= ifp->if_mtu || ifp->if_hwassist & CSUM_FRAGMENT) { ip->ip_len = htons(ip->ip_len); ip->ip_off = htons(ip->ip_off); ip->ip_sum = 0; if (sw_csum & CSUM_DELAY_IP) ip->ip_sum = in_cksum(m, hlen); /* Record statistics for this interface address. */ if (!(flags & IP_FORWARDING) && ia) { ia->ia_ifa.if_opackets++; ia->ia_ifa.if_obytes += m->m_pkthdr.len; } #ifdef IPSEC /* clean ipsec history once it goes out of the node */ ipsec_delaux(m); #endif #ifdef MBUF_STRESS_TEST if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size) m = m_fragment(m, M_DONTWAIT, mbuf_frag_size); #endif error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst, ro->ro_rt); goto done; } if (ip->ip_off & IP_DF) { error = EMSGSIZE; /* * This case can happen if the user changed the MTU * of an interface after enabling IP on it. Because * most netifs don't keep track of routes pointing to * them, there is no way for one to update all its * routes when the MTU is changed. */ if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST)) && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) { ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu; } ipstat.ips_cantfrag++; goto bad; } /* * Too large for interface; fragment if possible. If successful, * on return, m will point to a list of packets to be sent. */ error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist, sw_csum); if (error) goto bad; for (; m; m = m0) { m0 = m->m_nextpkt; m->m_nextpkt = 0; #ifdef IPSEC /* clean ipsec history once it goes out of the node */ ipsec_delaux(m); #endif if (error == 0) { /* Record statistics for this interface address. */ if (ia != NULL) { ia->ia_ifa.if_opackets++; ia->ia_ifa.if_obytes += m->m_pkthdr.len; } error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst, ro->ro_rt); } else m_freem(m); } if (error == 0) ipstat.ips_fragmented++; done: #ifdef IPSEC if (ro == &iproute && ro->ro_rt) { RTFREE(ro->ro_rt); ro->ro_rt = NULL; } if (sp != NULL) { KEYDEBUG(KEYDEBUG_IPSEC_STAMP, printf("DP ip_output call free SP:%p\n", sp)); key_freesp(sp); } #endif #ifdef FAST_IPSEC if (ro == &iproute && ro->ro_rt) { RTFREE(ro->ro_rt); ro->ro_rt = NULL; } if (sp != NULL) KEY_FREESP(&sp); #endif return (error); bad: m_freem(m); goto done; } /* * Create a chain of fragments which fit the given mtu. m_frag points to the * mbuf to be fragmented; on return it points to the chain with the fragments. * Return 0 if no error. If error, m_frag may contain a partially built * chain of fragments that should be freed by the caller. * * if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist) * sw_csum contains the delayed checksums flags (e.g., CSUM_DELAY_IP). */ int ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu, u_long if_hwassist_flags, int sw_csum) { int error = 0; int hlen = ip->ip_hl << 2; int len = (mtu - hlen) & ~7; /* size of payload in each fragment */ int off; struct mbuf *m0 = *m_frag; /* the original packet */ int firstlen; struct mbuf **mnext; int nfrags; if (ip->ip_off & IP_DF) { /* Fragmentation not allowed */ ipstat.ips_cantfrag++; return EMSGSIZE; } /* * Must be able to put at least 8 bytes per fragment. */ if (len < 8) return EMSGSIZE; /* * If the interface will not calculate checksums on * fragmented packets, then do it here. */ if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA && (if_hwassist_flags & CSUM_IP_FRAGS) == 0) { in_delayed_cksum(m0); m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; } if (len > PAGE_SIZE) { /* * Fragment large datagrams such that each segment * contains a multiple of PAGE_SIZE amount of data, * plus headers. This enables a receiver to perform * page-flipping zero-copy optimizations. * * XXX When does this help given that sender and receiver * could have different page sizes, and also mtu could * be less than the receiver's page size ? */ int newlen; struct mbuf *m; for (m = m0, off = 0; m && (off+m->m_len) <= mtu; m = m->m_next) off += m->m_len; /* * firstlen (off - hlen) must be aligned on an * 8-byte boundary */ if (off < hlen) goto smart_frag_failure; off = ((off - hlen) & ~7) + hlen; newlen = (~PAGE_MASK) & mtu; if ((newlen + sizeof (struct ip)) > mtu) { /* we failed, go back the default */ smart_frag_failure: newlen = len; off = hlen + len; } len = newlen; } else { off = hlen + len; } firstlen = off - hlen; mnext = &m0->m_nextpkt; /* pointer to next packet */ /* * Loop through length of segment after first fragment, * make new header and copy data of each part and link onto chain. * Here, m0 is the original packet, m is the fragment being created. * The fragments are linked off the m_nextpkt of the original * packet, which after processing serves as the first fragment. */ for (nfrags = 1; off < ip->ip_len; off += len, nfrags++) { struct ip *mhip; /* ip header on the fragment */ struct mbuf *m; int mhlen = sizeof (struct ip); MGETHDR(m, M_DONTWAIT, MT_HEADER); if (m == 0) { error = ENOBUFS; ipstat.ips_odropped++; goto done; } m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG; /* * In the first mbuf, leave room for the link header, then * copy the original IP header including options. The payload * goes into an additional mbuf chain returned by m_copy(). */ m->m_data += max_linkhdr; mhip = mtod(m, struct ip *); *mhip = *ip; if (hlen > sizeof (struct ip)) { mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); mhip->ip_v = IPVERSION; mhip->ip_hl = mhlen >> 2; } m->m_len = mhlen; /* XXX do we need to add ip->ip_off below ? */ mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off; if (off + len >= ip->ip_len) { /* last fragment */ len = ip->ip_len - off; m->m_flags |= M_LASTFRAG; } else mhip->ip_off |= IP_MF; mhip->ip_len = htons((u_short)(len + mhlen)); m->m_next = m_copy(m0, off, len); if (m->m_next == 0) { /* copy failed */ m_free(m); error = ENOBUFS; /* ??? */ ipstat.ips_odropped++; goto done; } m->m_pkthdr.len = mhlen + len; m->m_pkthdr.rcvif = (struct ifnet *)0; #ifdef MAC mac_create_fragment(m0, m); #endif m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags; mhip->ip_off = htons(mhip->ip_off); mhip->ip_sum = 0; if (sw_csum & CSUM_DELAY_IP) mhip->ip_sum = in_cksum(m, mhlen); *mnext = m; mnext = &m->m_nextpkt; } ipstat.ips_ofragments += nfrags; /* set first marker for fragment chain */ m0->m_flags |= M_FIRSTFRAG | M_FRAG; m0->m_pkthdr.csum_data = nfrags; /* * Update first fragment by trimming what's been copied out * and updating header. */ m_adj(m0, hlen + firstlen - ip->ip_len); m0->m_pkthdr.len = hlen + firstlen; ip->ip_len = htons((u_short)m0->m_pkthdr.len); ip->ip_off |= IP_MF; ip->ip_off = htons(ip->ip_off); ip->ip_sum = 0; if (sw_csum & CSUM_DELAY_IP) ip->ip_sum = in_cksum(m0, hlen); done: *m_frag = m0; return error; } void in_delayed_cksum(struct mbuf *m) { struct ip *ip; u_short csum, offset; ip = mtod(m, struct ip *); offset = ip->ip_hl << 2 ; csum = in_cksum_skip(m, ip->ip_len, offset); if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0) csum = 0xffff; offset += m->m_pkthdr.csum_data; /* checksum offset */ if (offset + sizeof(u_short) > m->m_len) { printf("delayed m_pullup, m->len: %d off: %d p: %d\n", m->m_len, offset, ip->ip_p); /* * XXX * this shouldn't happen, but if it does, the * correct behavior may be to insert the checksum * in the existing chain instead of rearranging it. */ m = m_pullup(m, offset + sizeof(u_short)); } *(u_short *)(m->m_data + offset) = csum; } /* * Insert IP options into preformed packet. * Adjust IP destination as required for IP source routing, * as indicated by a non-zero in_addr at the start of the options. * * XXX This routine assumes that the packet has no options in place. */ static struct mbuf * ip_insertoptions(m, opt, phlen) register struct mbuf *m; struct mbuf *opt; int *phlen; { register struct ipoption *p = mtod(opt, struct ipoption *); struct mbuf *n; register struct ip *ip = mtod(m, struct ip *); unsigned optlen; optlen = opt->m_len - sizeof(p->ipopt_dst); if (optlen + ip->ip_len > IP_MAXPACKET) { *phlen = 0; return (m); /* XXX should fail */ } if (p->ipopt_dst.s_addr) ip->ip_dst = p->ipopt_dst; if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) { MGETHDR(n, M_DONTWAIT, MT_HEADER); if (n == 0) { *phlen = 0; return (m); } n->m_pkthdr.rcvif = (struct ifnet *)0; #ifdef MAC mac_create_mbuf_from_mbuf(m, n); #endif n->m_pkthdr.len = m->m_pkthdr.len + optlen; m->m_len -= sizeof(struct ip); m->m_data += sizeof(struct ip); n->m_next = m; m = n; m->m_len = optlen + sizeof(struct ip); m->m_data += max_linkhdr; bcopy(ip, mtod(m, void *), sizeof(struct ip)); } else { m->m_data -= optlen; m->m_len += optlen; m->m_pkthdr.len += optlen; bcopy(ip, mtod(m, void *), sizeof(struct ip)); } ip = mtod(m, struct ip *); bcopy(p->ipopt_list, ip + 1, optlen); *phlen = sizeof(struct ip) + optlen; ip->ip_v = IPVERSION; ip->ip_hl = *phlen >> 2; ip->ip_len += optlen; return (m); } /* * Copy options from ip to jp, * omitting those not copied during fragmentation. */ int ip_optcopy(ip, jp) struct ip *ip, *jp; { register u_char *cp, *dp; int opt, optlen, cnt; cp = (u_char *)(ip + 1); dp = (u_char *)(jp + 1); cnt = (ip->ip_hl << 2) - sizeof (struct ip); for (; cnt > 0; cnt -= optlen, cp += optlen) { opt = cp[0]; if (opt == IPOPT_EOL) break; if (opt == IPOPT_NOP) { /* Preserve for IP mcast tunnel's LSRR alignment. */ *dp++ = IPOPT_NOP; optlen = 1; continue; } KASSERT(cnt >= IPOPT_OLEN + sizeof(*cp), ("ip_optcopy: malformed ipv4 option")); optlen = cp[IPOPT_OLEN]; KASSERT(optlen >= IPOPT_OLEN + sizeof(*cp) && optlen <= cnt, ("ip_optcopy: malformed ipv4 option")); /* bogus lengths should have been caught by ip_dooptions */ if (optlen > cnt) optlen = cnt; if (IPOPT_COPIED(opt)) { bcopy(cp, dp, optlen); dp += optlen; } } for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++) *dp++ = IPOPT_EOL; return (optlen); } /* * IP socket option processing. */ int ip_ctloutput(so, sopt) struct socket *so; struct sockopt *sopt; { struct inpcb *inp = sotoinpcb(so); int error, optval; error = optval = 0; if (sopt->sopt_level != IPPROTO_IP) { return (EINVAL); } switch (sopt->sopt_dir) { case SOPT_SET: switch (sopt->sopt_name) { case IP_OPTIONS: #ifdef notyet case IP_RETOPTS: #endif { struct mbuf *m; if (sopt->sopt_valsize > MLEN) { error = EMSGSIZE; break; } MGET(m, sopt->sopt_td ? M_TRYWAIT : M_DONTWAIT, MT_HEADER); if (m == 0) { error = ENOBUFS; break; } m->m_len = sopt->sopt_valsize; error = sooptcopyin(sopt, mtod(m, char *), m->m_len, m->m_len); return (ip_pcbopts(sopt->sopt_name, &inp->inp_options, m)); } case IP_TOS: case IP_TTL: case IP_RECVOPTS: case IP_RECVRETOPTS: case IP_RECVDSTADDR: case IP_RECVTTL: case IP_RECVIF: case IP_FAITH: case IP_ONESBCAST: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; switch (sopt->sopt_name) { case IP_TOS: inp->inp_ip_tos = optval; break; case IP_TTL: inp->inp_ip_ttl = optval; break; #define OPTSET(bit) \ if (optval) \ inp->inp_flags |= bit; \ else \ inp->inp_flags &= ~bit; case IP_RECVOPTS: OPTSET(INP_RECVOPTS); break; case IP_RECVRETOPTS: OPTSET(INP_RECVRETOPTS); break; case IP_RECVDSTADDR: OPTSET(INP_RECVDSTADDR); break; case IP_RECVTTL: OPTSET(INP_RECVTTL); break; case IP_RECVIF: OPTSET(INP_RECVIF); break; case IP_FAITH: OPTSET(INP_FAITH); break; case IP_ONESBCAST: OPTSET(INP_ONESBCAST); break; } break; #undef OPTSET case IP_MULTICAST_IF: case IP_MULTICAST_VIF: case IP_MULTICAST_TTL: case IP_MULTICAST_LOOP: case IP_ADD_MEMBERSHIP: case IP_DROP_MEMBERSHIP: error = ip_setmoptions(sopt, &inp->inp_moptions); break; case IP_PORTRANGE: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; switch (optval) { case IP_PORTRANGE_DEFAULT: inp->inp_flags &= ~(INP_LOWPORT); inp->inp_flags &= ~(INP_HIGHPORT); break; case IP_PORTRANGE_HIGH: inp->inp_flags &= ~(INP_LOWPORT); inp->inp_flags |= INP_HIGHPORT; break; case IP_PORTRANGE_LOW: inp->inp_flags &= ~(INP_HIGHPORT); inp->inp_flags |= INP_LOWPORT; break; default: error = EINVAL; break; } break; #if defined(IPSEC) || defined(FAST_IPSEC) case IP_IPSEC_POLICY: { caddr_t req; size_t len = 0; int priv; struct mbuf *m; int optname; if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */ break; if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */ break; priv = (sopt->sopt_td != NULL && suser(sopt->sopt_td) != 0) ? 0 : 1; req = mtod(m, caddr_t); len = m->m_len; optname = sopt->sopt_name; error = ipsec4_set_policy(inp, optname, req, len, priv); m_freem(m); break; } #endif /*IPSEC*/ default: error = ENOPROTOOPT; break; } break; case SOPT_GET: switch (sopt->sopt_name) { case IP_OPTIONS: case IP_RETOPTS: if (inp->inp_options) error = sooptcopyout(sopt, mtod(inp->inp_options, char *), inp->inp_options->m_len); else sopt->sopt_valsize = 0; break; case IP_TOS: case IP_TTL: case IP_RECVOPTS: case IP_RECVRETOPTS: case IP_RECVDSTADDR: case IP_RECVTTL: case IP_RECVIF: case IP_PORTRANGE: case IP_FAITH: case IP_ONESBCAST: switch (sopt->sopt_name) { case IP_TOS: optval = inp->inp_ip_tos; break; case IP_TTL: optval = inp->inp_ip_ttl; break; #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0) case IP_RECVOPTS: optval = OPTBIT(INP_RECVOPTS); break; case IP_RECVRETOPTS: optval = OPTBIT(INP_RECVRETOPTS); break; case IP_RECVDSTADDR: optval = OPTBIT(INP_RECVDSTADDR); break; case IP_RECVTTL: optval = OPTBIT(INP_RECVTTL); break; case IP_RECVIF: optval = OPTBIT(INP_RECVIF); break; case IP_PORTRANGE: if (inp->inp_flags & INP_HIGHPORT) optval = IP_PORTRANGE_HIGH; else if (inp->inp_flags & INP_LOWPORT) optval = IP_PORTRANGE_LOW; else optval = 0; break; case IP_FAITH: optval = OPTBIT(INP_FAITH); break; case IP_ONESBCAST: optval = OPTBIT(INP_ONESBCAST); break; } error = sooptcopyout(sopt, &optval, sizeof optval); break; case IP_MULTICAST_IF: case IP_MULTICAST_VIF: case IP_MULTICAST_TTL: case IP_MULTICAST_LOOP: case IP_ADD_MEMBERSHIP: case IP_DROP_MEMBERSHIP: error = ip_getmoptions(sopt, inp->inp_moptions); break; #if defined(IPSEC) || defined(FAST_IPSEC) case IP_IPSEC_POLICY: { struct mbuf *m = NULL; caddr_t req = NULL; size_t len = 0; if (m != 0) { req = mtod(m, caddr_t); len = m->m_len; } error = ipsec4_get_policy(sotoinpcb(so), req, len, &m); if (error == 0) error = soopt_mcopyout(sopt, m); /* XXX */ if (error == 0) m_freem(m); break; } #endif /*IPSEC*/ default: error = ENOPROTOOPT; break; } break; } return (error); } /* * Set up IP options in pcb for insertion in output packets. * Store in mbuf with pointer in pcbopt, adding pseudo-option * with destination address if source routed. */ static int ip_pcbopts(optname, pcbopt, m) int optname; struct mbuf **pcbopt; register struct mbuf *m; { register int cnt, optlen; register u_char *cp; u_char opt; /* turn off any old options */ if (*pcbopt) (void)m_free(*pcbopt); *pcbopt = 0; if (m == (struct mbuf *)0 || m->m_len == 0) { /* * Only turning off any previous options. */ if (m) (void)m_free(m); return (0); } if (m->m_len % sizeof(int32_t)) goto bad; /* * IP first-hop destination address will be stored before * actual options; move other options back * and clear it when none present. */ if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN]) goto bad; cnt = m->m_len; m->m_len += sizeof(struct in_addr); cp = mtod(m, u_char *) + sizeof(struct in_addr); bcopy(mtod(m, void *), cp, (unsigned)cnt); bzero(mtod(m, void *), sizeof(struct in_addr)); for (; cnt > 0; cnt -= optlen, cp += optlen) { opt = cp[IPOPT_OPTVAL]; if (opt == IPOPT_EOL) break; if (opt == IPOPT_NOP) optlen = 1; else { if (cnt < IPOPT_OLEN + sizeof(*cp)) goto bad; optlen = cp[IPOPT_OLEN]; if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) goto bad; } switch (opt) { default: break; case IPOPT_LSRR: case IPOPT_SSRR: /* * user process specifies route as: * ->A->B->C->D * D must be our final destination (but we can't * check that since we may not have connected yet). * A is first hop destination, which doesn't appear in * actual IP option, but is stored before the options. */ if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr)) goto bad; m->m_len -= sizeof(struct in_addr); cnt -= sizeof(struct in_addr); optlen -= sizeof(struct in_addr); cp[IPOPT_OLEN] = optlen; /* * Move first hop before start of options. */ bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t), sizeof(struct in_addr)); /* * Then copy rest of options back * to close up the deleted entry. */ bcopy((&cp[IPOPT_OFFSET+1] + sizeof(struct in_addr)), &cp[IPOPT_OFFSET+1], (unsigned)cnt + sizeof(struct in_addr)); break; } } if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr)) goto bad; *pcbopt = m; return (0); bad: (void)m_free(m); return (EINVAL); } /* * XXX * The whole multicast option thing needs to be re-thought. * Several of these options are equally applicable to non-multicast * transmission, and one (IP_MULTICAST_TTL) totally duplicates a * standard option (IP_TTL). */ /* * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index. */ static struct ifnet * ip_multicast_if(a, ifindexp) struct in_addr *a; int *ifindexp; { int ifindex; struct ifnet *ifp; if (ifindexp) *ifindexp = 0; if (ntohl(a->s_addr) >> 24 == 0) { ifindex = ntohl(a->s_addr) & 0xffffff; if (ifindex < 0 || if_index < ifindex) return NULL; ifp = ifnet_byindex(ifindex); if (ifindexp) *ifindexp = ifindex; } else { INADDR_TO_IFP(*a, ifp); } return ifp; } /* * Set the IP multicast options in response to user setsockopt(). */ static int ip_setmoptions(sopt, imop) struct sockopt *sopt; struct ip_moptions **imop; { int error = 0; int i; struct in_addr addr; struct ip_mreq mreq; struct ifnet *ifp; struct ip_moptions *imo = *imop; struct route ro; struct sockaddr_in *dst; int ifindex; int s; if (imo == NULL) { /* * No multicast option buffer attached to the pcb; * allocate one and initialize to default values. */ imo = (struct ip_moptions*)malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK); if (imo == NULL) return (ENOBUFS); *imop = imo; imo->imo_multicast_ifp = NULL; imo->imo_multicast_addr.s_addr = INADDR_ANY; imo->imo_multicast_vif = -1; imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; imo->imo_num_memberships = 0; } switch (sopt->sopt_name) { /* store an index number for the vif you wanna use in the send */ case IP_MULTICAST_VIF: if (legal_vif_num == 0) { error = EOPNOTSUPP; break; } error = sooptcopyin(sopt, &i, sizeof i, sizeof i); if (error) break; if (!legal_vif_num(i) && (i != -1)) { error = EINVAL; break; } imo->imo_multicast_vif = i; break; case IP_MULTICAST_IF: /* * Select the interface for outgoing multicast packets. */ error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr); if (error) break; /* * INADDR_ANY is used to remove a previous selection. * When no interface is selected, a default one is * chosen every time a multicast packet is sent. */ if (addr.s_addr == INADDR_ANY) { imo->imo_multicast_ifp = NULL; break; } /* * The selected interface is identified by its local * IP address. Find the interface and confirm that * it supports multicasting. */ s = splimp(); ifp = ip_multicast_if(&addr, &ifindex); if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { splx(s); error = EADDRNOTAVAIL; break; } imo->imo_multicast_ifp = ifp; if (ifindex) imo->imo_multicast_addr = addr; else imo->imo_multicast_addr.s_addr = INADDR_ANY; splx(s); break; case IP_MULTICAST_TTL: /* * Set the IP time-to-live for outgoing multicast packets. * The original multicast API required a char argument, * which is inconsistent with the rest of the socket API. * We allow either a char or an int. */ if (sopt->sopt_valsize == 1) { u_char ttl; error = sooptcopyin(sopt, &ttl, 1, 1); if (error) break; imo->imo_multicast_ttl = ttl; } else { u_int ttl; error = sooptcopyin(sopt, &ttl, sizeof ttl, sizeof ttl); if (error) break; if (ttl > 255) error = EINVAL; else imo->imo_multicast_ttl = ttl; } break; case IP_MULTICAST_LOOP: /* * Set the loopback flag for outgoing multicast packets. * Must be zero or one. The original multicast API required a * char argument, which is inconsistent with the rest * of the socket API. We allow either a char or an int. */ if (sopt->sopt_valsize == 1) { u_char loop; error = sooptcopyin(sopt, &loop, 1, 1); if (error) break; imo->imo_multicast_loop = !!loop; } else { u_int loop; error = sooptcopyin(sopt, &loop, sizeof loop, sizeof loop); if (error) break; imo->imo_multicast_loop = !!loop; } break; case IP_ADD_MEMBERSHIP: /* * Add a multicast group membership. * Group must be a valid IP multicast address. */ error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq); if (error) break; if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) { error = EINVAL; break; } s = splimp(); /* * If no interface address was provided, use the interface of * the route to the given multicast address. */ if (mreq.imr_interface.s_addr == INADDR_ANY) { bzero((caddr_t)&ro, sizeof(ro)); dst = (struct sockaddr_in *)&ro.ro_dst; dst->sin_len = sizeof(*dst); dst->sin_family = AF_INET; dst->sin_addr = mreq.imr_multiaddr; rtalloc(&ro); if (ro.ro_rt == NULL) { error = EADDRNOTAVAIL; splx(s); break; } ifp = ro.ro_rt->rt_ifp; - rtfree(ro.ro_rt); + RTFREE(ro.ro_rt); } else { ifp = ip_multicast_if(&mreq.imr_interface, NULL); } /* * See if we found an interface, and confirm that it * supports multicast. */ if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { error = EADDRNOTAVAIL; splx(s); break; } /* * See if the membership already exists or if all the * membership slots are full. */ for (i = 0; i < imo->imo_num_memberships; ++i) { if (imo->imo_membership[i]->inm_ifp == ifp && imo->imo_membership[i]->inm_addr.s_addr == mreq.imr_multiaddr.s_addr) break; } if (i < imo->imo_num_memberships) { error = EADDRINUSE; splx(s); break; } if (i == IP_MAX_MEMBERSHIPS) { error = ETOOMANYREFS; splx(s); break; } /* * Everything looks good; add a new record to the multicast * address list for the given interface. */ if ((imo->imo_membership[i] = in_addmulti(&mreq.imr_multiaddr, ifp)) == NULL) { error = ENOBUFS; splx(s); break; } ++imo->imo_num_memberships; splx(s); break; case IP_DROP_MEMBERSHIP: /* * Drop a multicast group membership. * Group must be a valid IP multicast address. */ error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq); if (error) break; if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) { error = EINVAL; break; } s = splimp(); /* * If an interface address was specified, get a pointer * to its ifnet structure. */ if (mreq.imr_interface.s_addr == INADDR_ANY) ifp = NULL; else { ifp = ip_multicast_if(&mreq.imr_interface, NULL); if (ifp == NULL) { error = EADDRNOTAVAIL; splx(s); break; } } /* * Find the membership in the membership array. */ for (i = 0; i < imo->imo_num_memberships; ++i) { if ((ifp == NULL || imo->imo_membership[i]->inm_ifp == ifp) && imo->imo_membership[i]->inm_addr.s_addr == mreq.imr_multiaddr.s_addr) break; } if (i == imo->imo_num_memberships) { error = EADDRNOTAVAIL; splx(s); break; } /* * Give up the multicast address record to which the * membership points. */ in_delmulti(imo->imo_membership[i]); /* * Remove the gap in the membership array. */ for (++i; i < imo->imo_num_memberships; ++i) imo->imo_membership[i-1] = imo->imo_membership[i]; --imo->imo_num_memberships; splx(s); break; default: error = EOPNOTSUPP; break; } /* * If all options have default values, no need to keep the mbuf. */ if (imo->imo_multicast_ifp == NULL && imo->imo_multicast_vif == -1 && imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL && imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP && imo->imo_num_memberships == 0) { free(*imop, M_IPMOPTS); *imop = NULL; } return (error); } /* * Return the IP multicast options in response to user getsockopt(). */ static int ip_getmoptions(sopt, imo) struct sockopt *sopt; register struct ip_moptions *imo; { struct in_addr addr; struct in_ifaddr *ia; int error, optval; u_char coptval; error = 0; switch (sopt->sopt_name) { case IP_MULTICAST_VIF: if (imo != NULL) optval = imo->imo_multicast_vif; else optval = -1; error = sooptcopyout(sopt, &optval, sizeof optval); break; case IP_MULTICAST_IF: if (imo == NULL || imo->imo_multicast_ifp == NULL) addr.s_addr = INADDR_ANY; else if (imo->imo_multicast_addr.s_addr) { /* return the value user has set */ addr = imo->imo_multicast_addr; } else { IFP_TO_IA(imo->imo_multicast_ifp, ia); addr.s_addr = (ia == NULL) ? INADDR_ANY : IA_SIN(ia)->sin_addr.s_addr; } error = sooptcopyout(sopt, &addr, sizeof addr); break; case IP_MULTICAST_TTL: if (imo == 0) optval = coptval = IP_DEFAULT_MULTICAST_TTL; else optval = coptval = imo->imo_multicast_ttl; if (sopt->sopt_valsize == 1) error = sooptcopyout(sopt, &coptval, 1); else error = sooptcopyout(sopt, &optval, sizeof optval); break; case IP_MULTICAST_LOOP: if (imo == 0) optval = coptval = IP_DEFAULT_MULTICAST_LOOP; else optval = coptval = imo->imo_multicast_loop; if (sopt->sopt_valsize == 1) error = sooptcopyout(sopt, &coptval, 1); else error = sooptcopyout(sopt, &optval, sizeof optval); break; default: error = ENOPROTOOPT; break; } return (error); } /* * Discard the IP multicast options. */ void ip_freemoptions(imo) register struct ip_moptions *imo; { register int i; if (imo != NULL) { for (i = 0; i < imo->imo_num_memberships; ++i) in_delmulti(imo->imo_membership[i]); free(imo, M_IPMOPTS); } } /* * Routine called from ip_output() to loop back a copy of an IP multicast * packet to the input queue of a specified interface. Note that this * calls the output routine of the loopback "driver", but with an interface * pointer that might NOT be a loopback interface -- evil, but easier than * replicating that code here. */ static void ip_mloopback(ifp, m, dst, hlen) struct ifnet *ifp; register struct mbuf *m; register struct sockaddr_in *dst; int hlen; { register struct ip *ip; struct mbuf *copym; copym = m_copy(m, 0, M_COPYALL); if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen)) copym = m_pullup(copym, hlen); if (copym != NULL) { /* * We don't bother to fragment if the IP length is greater * than the interface's MTU. Can this possibly matter? */ ip = mtod(copym, struct ip *); ip->ip_len = htons(ip->ip_len); ip->ip_off = htons(ip->ip_off); ip->ip_sum = 0; ip->ip_sum = in_cksum(copym, hlen); /* * NB: * It's not clear whether there are any lingering * reentrancy problems in other areas which might * be exposed by using ip_input directly (in * particular, everything which modifies the packet * in-place). Yet another option is using the * protosw directly to deliver the looped back * packet. For the moment, we'll err on the side * of safety by using if_simloop(). */ #if 1 /* XXX */ if (dst->sin_family != AF_INET) { printf("ip_mloopback: bad address family %d\n", dst->sin_family); dst->sin_family = AF_INET; } #endif #ifdef notdef copym->m_pkthdr.rcvif = ifp; ip_input(copym); #else /* if the checksum hasn't been computed, mark it as valid */ if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { copym->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; copym->m_pkthdr.csum_data = 0xffff; } if_simloop(ifp, copym, dst->sin_family, 0); #endif } } Index: head/sys/netinet6/icmp6.c =================================================================== --- head/sys/netinet6/icmp6.c (revision 120726) +++ head/sys/netinet6/icmp6.c (revision 120727) @@ -1,2894 +1,2892 @@ /* $FreeBSD$ */ /* $KAME: icmp6.c,v 1.211 2001/04/04 05:56:20 itojun Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94 */ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef IPSEC #include #include #endif #ifdef FAST_IPSEC #include #include #define IPSEC #endif #include #ifdef HAVE_NRL_INPCB /* inpcb members */ #define in6pcb inpcb #define in6p_laddr inp_laddr6 #define in6p_faddr inp_faddr6 #define in6p_icmp6filt inp_icmp6filt #define in6p_route inp_route #define in6p_socket inp_socket #define in6p_flags inp_flags #define in6p_moptions inp_moptions6 #define in6p_outputopts inp_outputopts6 #define in6p_ip6 inp_ipv6 #define in6p_flowinfo inp_flowinfo #define in6p_sp inp_sp #define in6p_next inp_next #define in6p_prev inp_prev /* macro names */ #define sotoin6pcb sotoinpcb /* function names */ #define in6_pcbdetach in_pcbdetach #define in6_rtchange in_rtchange /* * for KAME src sync over BSD*'s. XXX: FreeBSD (>=3) are VERY different from * others... */ #define in6p_ip6_nxt inp_ipv6.ip6_nxt #endif extern struct domain inet6domain; struct icmp6stat icmp6stat; extern struct inpcbhead ripcb; extern int icmp6errppslim; static int icmp6errpps_count = 0; static struct timeval icmp6errppslim_last; extern int icmp6_nodeinfo; static void icmp6_errcount __P((struct icmp6errstat *, int, int)); static int icmp6_rip6_input __P((struct mbuf **, int)); static int icmp6_ratelimit __P((const struct in6_addr *, const int, const int)); static const char *icmp6_redirect_diag __P((struct in6_addr *, struct in6_addr *, struct in6_addr *)); #define HAVE_PPSRATECHECK #ifndef HAVE_PPSRATECHECK static int ppsratecheck __P((struct timeval *, int *, int)); #endif static struct mbuf *ni6_input __P((struct mbuf *, int)); static struct mbuf *ni6_nametodns __P((const char *, int, int)); static int ni6_dnsmatch __P((const char *, int, const char *, int)); static int ni6_addrs __P((struct icmp6_nodeinfo *, struct mbuf *, struct ifnet **, char *)); static int ni6_store_addrs __P((struct icmp6_nodeinfo *, struct icmp6_nodeinfo *, struct ifnet *, int)); static int icmp6_notify_error __P((struct mbuf *, int, int, int)); #ifdef COMPAT_RFC1885 static struct route_in6 icmp6_reflect_rt; #endif void icmp6_init() { mld6_init(); } static void icmp6_errcount(stat, type, code) struct icmp6errstat *stat; int type, code; { switch (type) { case ICMP6_DST_UNREACH: switch (code) { case ICMP6_DST_UNREACH_NOROUTE: stat->icp6errs_dst_unreach_noroute++; return; case ICMP6_DST_UNREACH_ADMIN: stat->icp6errs_dst_unreach_admin++; return; case ICMP6_DST_UNREACH_BEYONDSCOPE: stat->icp6errs_dst_unreach_beyondscope++; return; case ICMP6_DST_UNREACH_ADDR: stat->icp6errs_dst_unreach_addr++; return; case ICMP6_DST_UNREACH_NOPORT: stat->icp6errs_dst_unreach_noport++; return; } break; case ICMP6_PACKET_TOO_BIG: stat->icp6errs_packet_too_big++; return; case ICMP6_TIME_EXCEEDED: switch (code) { case ICMP6_TIME_EXCEED_TRANSIT: stat->icp6errs_time_exceed_transit++; return; case ICMP6_TIME_EXCEED_REASSEMBLY: stat->icp6errs_time_exceed_reassembly++; return; } break; case ICMP6_PARAM_PROB: switch (code) { case ICMP6_PARAMPROB_HEADER: stat->icp6errs_paramprob_header++; return; case ICMP6_PARAMPROB_NEXTHEADER: stat->icp6errs_paramprob_nextheader++; return; case ICMP6_PARAMPROB_OPTION: stat->icp6errs_paramprob_option++; return; } break; case ND_REDIRECT: stat->icp6errs_redirect++; return; } stat->icp6errs_unknown++; } /* * Generate an error packet of type error in response to bad IP6 packet. */ void icmp6_error(m, type, code, param) struct mbuf *m; int type, code, param; { struct ip6_hdr *oip6, *nip6; struct icmp6_hdr *icmp6; u_int preplen; int off; int nxt; icmp6stat.icp6s_error++; /* count per-type-code statistics */ icmp6_errcount(&icmp6stat.icp6s_outerrhist, type, code); #ifdef M_DECRYPTED /*not openbsd*/ if (m->m_flags & M_DECRYPTED) { icmp6stat.icp6s_canterror++; goto freeit; } #endif #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), ); #else if (m->m_len < sizeof(struct ip6_hdr)) { m = m_pullup(m, sizeof(struct ip6_hdr)); if (m == NULL) return; } #endif oip6 = mtod(m, struct ip6_hdr *); /* * Multicast destination check. For unrecognized option errors, * this check has already done in ip6_unknown_opt(), so we can * check only for other errors. */ if ((m->m_flags & (M_BCAST|M_MCAST) || IN6_IS_ADDR_MULTICAST(&oip6->ip6_dst)) && (type != ICMP6_PACKET_TOO_BIG && (type != ICMP6_PARAM_PROB || code != ICMP6_PARAMPROB_OPTION))) goto freeit; /* Source address check. XXX: the case of anycast source? */ if (IN6_IS_ADDR_UNSPECIFIED(&oip6->ip6_src) || IN6_IS_ADDR_MULTICAST(&oip6->ip6_src)) goto freeit; /* * If we are about to send ICMPv6 against ICMPv6 error/redirect, * don't do it. */ nxt = -1; off = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt); if (off >= 0 && nxt == IPPROTO_ICMPV6) { struct icmp6_hdr *icp; #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, off + sizeof(struct icmp6_hdr), ); icp = (struct icmp6_hdr *)(mtod(m, caddr_t) + off); #else IP6_EXTHDR_GET(icp, struct icmp6_hdr *, m, off, sizeof(*icp)); if (icp == NULL) { icmp6stat.icp6s_tooshort++; return; } #endif if (icp->icmp6_type < ICMP6_ECHO_REQUEST || icp->icmp6_type == ND_REDIRECT) { /* * ICMPv6 error * Special case: for redirect (which is * informational) we must not send icmp6 error. */ icmp6stat.icp6s_canterror++; goto freeit; } else { /* ICMPv6 informational - send the error */ } } else { /* non-ICMPv6 - send the error */ } oip6 = mtod(m, struct ip6_hdr *); /* adjust pointer */ /* Finally, do rate limitation check. */ if (icmp6_ratelimit(&oip6->ip6_src, type, code)) { icmp6stat.icp6s_toofreq++; goto freeit; } /* * OK, ICMP6 can be generated. */ if (m->m_pkthdr.len >= ICMPV6_PLD_MAXLEN) m_adj(m, ICMPV6_PLD_MAXLEN - m->m_pkthdr.len); preplen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr); M_PREPEND(m, preplen, M_DONTWAIT); if (m && m->m_len < preplen) m = m_pullup(m, preplen); if (m == NULL) { nd6log((LOG_DEBUG, "ENOBUFS in icmp6_error %d\n", __LINE__)); return; } nip6 = mtod(m, struct ip6_hdr *); nip6->ip6_src = oip6->ip6_src; nip6->ip6_dst = oip6->ip6_dst; if (IN6_IS_SCOPE_LINKLOCAL(&oip6->ip6_src)) oip6->ip6_src.s6_addr16[1] = 0; if (IN6_IS_SCOPE_LINKLOCAL(&oip6->ip6_dst)) oip6->ip6_dst.s6_addr16[1] = 0; icmp6 = (struct icmp6_hdr *)(nip6 + 1); icmp6->icmp6_type = type; icmp6->icmp6_code = code; icmp6->icmp6_pptr = htonl((u_int32_t)param); /* * icmp6_reflect() is designed to be in the input path. * icmp6_error() can be called from both input and outut path, * and if we are in output path rcvif could contain bogus value. * clear m->m_pkthdr.rcvif for safety, we should have enough scope * information in ip header (nip6). */ m->m_pkthdr.rcvif = NULL; icmp6stat.icp6s_outhist[type]++; icmp6_reflect(m, sizeof(struct ip6_hdr)); /* header order: IPv6 - ICMPv6 */ return; freeit: /* * If we can't tell wheter or not we can generate ICMP6, free it. */ m_freem(m); } /* * Process a received ICMP6 message. */ int icmp6_input(mp, offp, proto) struct mbuf **mp; int *offp, proto; { struct mbuf *m = *mp, *n; struct ip6_hdr *ip6, *nip6; struct icmp6_hdr *icmp6, *nicmp6; int off = *offp; int icmp6len = m->m_pkthdr.len - *offp; int code, sum, noff; #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_hdr), IPPROTO_DONE); /* m might change if M_LOOP. So, call mtod after this */ #endif /* * Locate icmp6 structure in mbuf, and check * that not corrupted and of at least minimum length */ ip6 = mtod(m, struct ip6_hdr *); if (icmp6len < sizeof(struct icmp6_hdr)) { icmp6stat.icp6s_tooshort++; goto freeit; } /* * calculate the checksum */ #ifndef PULLDOWN_TEST icmp6 = (struct icmp6_hdr *)((caddr_t)ip6 + off); #else IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6)); if (icmp6 == NULL) { icmp6stat.icp6s_tooshort++; return IPPROTO_DONE; } #endif code = icmp6->icmp6_code; if ((sum = in6_cksum(m, IPPROTO_ICMPV6, off, icmp6len)) != 0) { nd6log((LOG_ERR, "ICMP6 checksum error(%d|%x) %s\n", icmp6->icmp6_type, sum, ip6_sprintf(&ip6->ip6_src))); icmp6stat.icp6s_checksum++; goto freeit; } if (faithprefix_p != NULL && (*faithprefix_p)(&ip6->ip6_dst)) { /* * Deliver very specific ICMP6 type only. * This is important to deilver TOOBIG. Otherwise PMTUD * will not work. */ switch (icmp6->icmp6_type) { case ICMP6_DST_UNREACH: case ICMP6_PACKET_TOO_BIG: case ICMP6_TIME_EXCEEDED: break; default: goto freeit; } } icmp6stat.icp6s_inhist[icmp6->icmp6_type]++; icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_msg); if (icmp6->icmp6_type < ICMP6_INFOMSG_MASK) icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_error); switch (icmp6->icmp6_type) { case ICMP6_DST_UNREACH: icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_dstunreach); switch (code) { case ICMP6_DST_UNREACH_NOROUTE: code = PRC_UNREACH_NET; break; case ICMP6_DST_UNREACH_ADMIN: icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_adminprohib); code = PRC_UNREACH_PROTOCOL; /* is this a good code? */ break; case ICMP6_DST_UNREACH_ADDR: code = PRC_HOSTDEAD; break; #ifdef COMPAT_RFC1885 case ICMP6_DST_UNREACH_NOTNEIGHBOR: code = PRC_UNREACH_SRCFAIL; break; #else case ICMP6_DST_UNREACH_BEYONDSCOPE: /* I mean "source address was incorrect." */ code = PRC_PARAMPROB; break; #endif case ICMP6_DST_UNREACH_NOPORT: code = PRC_UNREACH_PORT; break; default: goto badcode; } goto deliver; break; case ICMP6_PACKET_TOO_BIG: icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_pkttoobig); if (code != 0) goto badcode; code = PRC_MSGSIZE; /* * Updating the path MTU will be done after examining * intermediate extension headers. */ goto deliver; break; case ICMP6_TIME_EXCEEDED: icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_timeexceed); switch (code) { case ICMP6_TIME_EXCEED_TRANSIT: case ICMP6_TIME_EXCEED_REASSEMBLY: code += PRC_TIMXCEED_INTRANS; break; default: goto badcode; } goto deliver; break; case ICMP6_PARAM_PROB: icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_paramprob); switch (code) { case ICMP6_PARAMPROB_NEXTHEADER: code = PRC_UNREACH_PROTOCOL; break; case ICMP6_PARAMPROB_HEADER: case ICMP6_PARAMPROB_OPTION: code = PRC_PARAMPROB; break; default: goto badcode; } goto deliver; break; case ICMP6_ECHO_REQUEST: icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_echo); if (code != 0) goto badcode; if ((n = m_copy(m, 0, M_COPYALL)) == NULL) { /* Give up remote */ break; } if ((n->m_flags & M_EXT) != 0 || n->m_len < off + sizeof(struct icmp6_hdr)) { struct mbuf *n0 = n; const int maxlen = sizeof(*nip6) + sizeof(*nicmp6); int n0len; /* * Prepare an internal mbuf. m_pullup() doesn't * always copy the length we specified. */ if (maxlen >= MCLBYTES) { /* Give up remote */ m_freem(n0); break; } MGETHDR(n, M_DONTWAIT, n0->m_type); n0len = n0->m_pkthdr.len; /* save for use below */ if (n) M_MOVE_PKTHDR(n, n0); if (n && maxlen >= MHLEN) { MCLGET(n, M_DONTWAIT); if ((n->m_flags & M_EXT) == 0) { m_free(n); n = NULL; } } if (n == NULL) { /* Give up remote */ m_freem(n0); break; } /* * Copy IPv6 and ICMPv6 only. */ nip6 = mtod(n, struct ip6_hdr *); bcopy(ip6, nip6, sizeof(struct ip6_hdr)); nicmp6 = (struct icmp6_hdr *)(nip6 + 1); bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr)); noff = sizeof(struct ip6_hdr); /* new mbuf contains only ipv6+icmpv6 headers */ n->m_len = noff + sizeof(struct icmp6_hdr); /* * Adjust mbuf. ip6_plen will be adjusted in * ip6_output(). */ m_adj(n0, off + sizeof(struct icmp6_hdr)); /* recalculate complete packet size */ n->m_pkthdr.len = n0len + (noff - off); n->m_next = n0; } else { nip6 = mtod(n, struct ip6_hdr *); nicmp6 = (struct icmp6_hdr *)((caddr_t)nip6 + off); noff = off; } nicmp6->icmp6_type = ICMP6_ECHO_REPLY; nicmp6->icmp6_code = 0; if (n) { icmp6stat.icp6s_reflect++; icmp6stat.icp6s_outhist[ICMP6_ECHO_REPLY]++; icmp6_reflect(n, noff); } break; case ICMP6_ECHO_REPLY: icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_echoreply); if (code != 0) goto badcode; break; case MLD_LISTENER_QUERY: case MLD_LISTENER_REPORT: if (icmp6len < sizeof(struct mld_hdr)) goto badlen; if (icmp6->icmp6_type == MLD_LISTENER_QUERY) /* XXX: ugly... */ icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mldquery); else icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mldreport); if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) { /* give up local */ mld6_input(m, off); m = NULL; goto freeit; } mld6_input(n, off); /* m stays. */ break; case MLD_LISTENER_DONE: icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mlddone); if (icmp6len < sizeof(struct mld_hdr)) /* necessary? */ goto badlen; break; /* nothing to be done in kernel */ case MLD_MTRACE_RESP: case MLD_MTRACE: /* XXX: these two are experimental. not officially defind. */ /* XXX: per-interface statistics? */ break; /* just pass it to applications */ case ICMP6_WRUREQUEST: /* ICMP6_FQDN_QUERY */ { enum { WRU, FQDN } mode; if (!icmp6_nodeinfo) break; if (icmp6len == sizeof(struct icmp6_hdr) + 4) mode = WRU; else if (icmp6len >= sizeof(struct icmp6_nodeinfo)) mode = FQDN; else goto badlen; #define hostnamelen strlen(hostname) if (mode == FQDN) { #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_nodeinfo), IPPROTO_DONE); #endif n = m_copy(m, 0, M_COPYALL); if (n) n = ni6_input(n, off); /* XXX meaningless if n == NULL */ noff = sizeof(struct ip6_hdr); } else { u_char *p; int maxlen, maxhlen; if ((icmp6_nodeinfo & 5) != 5) break; if (code != 0) goto badcode; maxlen = sizeof(*nip6) + sizeof(*nicmp6) + 4; if (maxlen >= MCLBYTES) { /* Give up remote */ break; } MGETHDR(n, M_DONTWAIT, m->m_type); if (n && maxlen > MHLEN) { MCLGET(n, M_DONTWAIT); if ((n->m_flags & M_EXT) == 0) { m_free(n); n = NULL; } } if (!m_dup_pkthdr(n, m, M_DONTWAIT)) { /* * Previous code did a blind M_COPY_PKTHDR * and said "just for rcvif". If true, then * we could tolerate the dup failing (due to * the deep copy of the tag chain). For now * be conservative and just fail. */ m_free(n); n = NULL; } if (n == NULL) { /* Give up remote */ break; } n->m_pkthdr.rcvif = NULL; n->m_len = 0; maxhlen = M_TRAILINGSPACE(n) - maxlen; if (maxhlen > hostnamelen) maxhlen = hostnamelen; /* * Copy IPv6 and ICMPv6 only. */ nip6 = mtod(n, struct ip6_hdr *); bcopy(ip6, nip6, sizeof(struct ip6_hdr)); nicmp6 = (struct icmp6_hdr *)(nip6 + 1); bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr)); p = (u_char *)(nicmp6 + 1); bzero(p, 4); bcopy(hostname, p + 4, maxhlen); /* meaningless TTL */ noff = sizeof(struct ip6_hdr); n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) + 4 + maxhlen; nicmp6->icmp6_type = ICMP6_WRUREPLY; nicmp6->icmp6_code = 0; } #undef hostnamelen if (n) { icmp6stat.icp6s_reflect++; icmp6stat.icp6s_outhist[ICMP6_WRUREPLY]++; icmp6_reflect(n, noff); } break; } case ICMP6_WRUREPLY: if (code != 0) goto badcode; break; case ND_ROUTER_SOLICIT: icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_routersolicit); if (code != 0) goto badcode; if (icmp6len < sizeof(struct nd_router_solicit)) goto badlen; if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) { /* give up local */ nd6_rs_input(m, off, icmp6len); m = NULL; goto freeit; } nd6_rs_input(n, off, icmp6len); /* m stays. */ break; case ND_ROUTER_ADVERT: icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_routeradvert); if (code != 0) goto badcode; if (icmp6len < sizeof(struct nd_router_advert)) goto badlen; if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) { /* give up local */ nd6_ra_input(m, off, icmp6len); m = NULL; goto freeit; } nd6_ra_input(n, off, icmp6len); /* m stays. */ break; case ND_NEIGHBOR_SOLICIT: icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_neighborsolicit); if (code != 0) goto badcode; if (icmp6len < sizeof(struct nd_neighbor_solicit)) goto badlen; if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) { /* give up local */ nd6_ns_input(m, off, icmp6len); m = NULL; goto freeit; } nd6_ns_input(n, off, icmp6len); /* m stays. */ break; case ND_NEIGHBOR_ADVERT: icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_neighboradvert); if (code != 0) goto badcode; if (icmp6len < sizeof(struct nd_neighbor_advert)) goto badlen; if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) { /* give up local */ nd6_na_input(m, off, icmp6len); m = NULL; goto freeit; } nd6_na_input(n, off, icmp6len); /* m stays. */ break; case ND_REDIRECT: icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_redirect); if (code != 0) goto badcode; if (icmp6len < sizeof(struct nd_redirect)) goto badlen; if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) { /* give up local */ icmp6_redirect_input(m, off); m = NULL; goto freeit; } icmp6_redirect_input(n, off); /* m stays. */ break; case ICMP6_ROUTER_RENUMBERING: if (code != ICMP6_ROUTER_RENUMBERING_COMMAND && code != ICMP6_ROUTER_RENUMBERING_RESULT) goto badcode; if (icmp6len < sizeof(struct icmp6_router_renum)) goto badlen; break; default: nd6log((LOG_DEBUG, "icmp6_input: unknown type %d(src=%s, dst=%s, ifid=%d)\n", icmp6->icmp6_type, ip6_sprintf(&ip6->ip6_src), ip6_sprintf(&ip6->ip6_dst), m->m_pkthdr.rcvif ? m->m_pkthdr.rcvif->if_index : 0)); if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST) { /* ICMPv6 error: MUST deliver it by spec... */ code = PRC_NCMDS; /* deliver */ } else { /* ICMPv6 informational: MUST not deliver */ break; } deliver: if (icmp6_notify_error(m, off, icmp6len, code)) { /* In this case, m should've been freed. */ return(IPPROTO_DONE); } break; badcode: icmp6stat.icp6s_badcode++; break; badlen: icmp6stat.icp6s_badlen++; break; } /* deliver the packet to appropriate sockets */ icmp6_rip6_input(&m, *offp); return IPPROTO_DONE; freeit: m_freem(m); return IPPROTO_DONE; } static int icmp6_notify_error(m, off, icmp6len, code) struct mbuf *m; int off, icmp6len, code; { struct icmp6_hdr *icmp6; struct ip6_hdr *eip6; u_int32_t notifymtu; struct sockaddr_in6 icmp6src, icmp6dst; if (icmp6len < sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr)) { icmp6stat.icp6s_tooshort++; goto freeit; } #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr), -1); icmp6 = (struct icmp6_hdr *)(mtod(m, caddr_t) + off); #else IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6) + sizeof(struct ip6_hdr)); if (icmp6 == NULL) { icmp6stat.icp6s_tooshort++; return(-1); } #endif eip6 = (struct ip6_hdr *)(icmp6 + 1); /* Detect the upper level protocol */ { void (*ctlfunc) __P((int, struct sockaddr *, void *)); u_int8_t nxt = eip6->ip6_nxt; int eoff = off + sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr); struct ip6ctlparam ip6cp; struct in6_addr *finaldst = NULL; int icmp6type = icmp6->icmp6_type; struct ip6_frag *fh; struct ip6_rthdr *rth; struct ip6_rthdr0 *rth0; int rthlen; while (1) { /* XXX: should avoid infinite loop explicitly? */ struct ip6_ext *eh; switch (nxt) { case IPPROTO_HOPOPTS: case IPPROTO_DSTOPTS: case IPPROTO_AH: #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, eoff + sizeof(struct ip6_ext), -1); eh = (struct ip6_ext *)(mtod(m, caddr_t) + eoff); #else IP6_EXTHDR_GET(eh, struct ip6_ext *, m, eoff, sizeof(*eh)); if (eh == NULL) { icmp6stat.icp6s_tooshort++; return(-1); } #endif if (nxt == IPPROTO_AH) eoff += (eh->ip6e_len + 2) << 2; else eoff += (eh->ip6e_len + 1) << 3; nxt = eh->ip6e_nxt; break; case IPPROTO_ROUTING: /* * When the erroneous packet contains a * routing header, we should examine the * header to determine the final destination. * Otherwise, we can't properly update * information that depends on the final * destination (e.g. path MTU). */ #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, eoff + sizeof(*rth), -1); rth = (struct ip6_rthdr *)(mtod(m, caddr_t) + eoff); #else IP6_EXTHDR_GET(rth, struct ip6_rthdr *, m, eoff, sizeof(*rth)); if (rth == NULL) { icmp6stat.icp6s_tooshort++; return(-1); } #endif rthlen = (rth->ip6r_len + 1) << 3; /* * XXX: currently there is no * officially defined type other * than type-0. * Note that if the segment left field * is 0, all intermediate hops must * have been passed. */ if (rth->ip6r_segleft && rth->ip6r_type == IPV6_RTHDR_TYPE_0) { int hops; #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, eoff + rthlen, -1); rth0 = (struct ip6_rthdr0 *)(mtod(m, caddr_t) + eoff); #else IP6_EXTHDR_GET(rth0, struct ip6_rthdr0 *, m, eoff, rthlen); if (rth0 == NULL) { icmp6stat.icp6s_tooshort++; return(-1); } #endif /* just ignore a bogus header */ if ((rth0->ip6r0_len % 2) == 0 && (hops = rth0->ip6r0_len/2)) finaldst = (struct in6_addr *)(rth0 + 1) + (hops - 1); } eoff += rthlen; nxt = rth->ip6r_nxt; break; case IPPROTO_FRAGMENT: #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, eoff + sizeof(struct ip6_frag), -1); fh = (struct ip6_frag *)(mtod(m, caddr_t) + eoff); #else IP6_EXTHDR_GET(fh, struct ip6_frag *, m, eoff, sizeof(*fh)); if (fh == NULL) { icmp6stat.icp6s_tooshort++; return(-1); } #endif /* * Data after a fragment header is meaningless * unless it is the first fragment, but * we'll go to the notify label for path MTU * discovery. */ if (fh->ip6f_offlg & IP6F_OFF_MASK) goto notify; eoff += sizeof(struct ip6_frag); nxt = fh->ip6f_nxt; break; default: /* * This case includes ESP and the No Next * Header. In such cases going to the notify * label does not have any meaning * (i.e. ctlfunc will be NULL), but we go * anyway since we might have to update * path MTU information. */ goto notify; } } notify: #ifndef PULLDOWN_TEST icmp6 = (struct icmp6_hdr *)(mtod(m, caddr_t) + off); #else IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6) + sizeof(struct ip6_hdr)); if (icmp6 == NULL) { icmp6stat.icp6s_tooshort++; return(-1); } #endif eip6 = (struct ip6_hdr *)(icmp6 + 1); bzero(&icmp6dst, sizeof(icmp6dst)); icmp6dst.sin6_len = sizeof(struct sockaddr_in6); icmp6dst.sin6_family = AF_INET6; if (finaldst == NULL) icmp6dst.sin6_addr = eip6->ip6_dst; else icmp6dst.sin6_addr = *finaldst; icmp6dst.sin6_scope_id = in6_addr2scopeid(m->m_pkthdr.rcvif, &icmp6dst.sin6_addr); #ifndef SCOPEDROUTING if (in6_embedscope(&icmp6dst.sin6_addr, &icmp6dst, NULL, NULL)) { /* should be impossbile */ nd6log((LOG_DEBUG, "icmp6_notify_error: in6_embedscope failed\n")); goto freeit; } #endif /* * retrieve parameters from the inner IPv6 header, and convert * them into sockaddr structures. */ bzero(&icmp6src, sizeof(icmp6src)); icmp6src.sin6_len = sizeof(struct sockaddr_in6); icmp6src.sin6_family = AF_INET6; icmp6src.sin6_addr = eip6->ip6_src; icmp6src.sin6_scope_id = in6_addr2scopeid(m->m_pkthdr.rcvif, &icmp6src.sin6_addr); #ifndef SCOPEDROUTING if (in6_embedscope(&icmp6src.sin6_addr, &icmp6src, NULL, NULL)) { /* should be impossbile */ nd6log((LOG_DEBUG, "icmp6_notify_error: in6_embedscope failed\n")); goto freeit; } #endif icmp6src.sin6_flowinfo = (eip6->ip6_flow & IPV6_FLOWLABEL_MASK); if (finaldst == NULL) finaldst = &eip6->ip6_dst; ip6cp.ip6c_m = m; ip6cp.ip6c_icmp6 = icmp6; ip6cp.ip6c_ip6 = (struct ip6_hdr *)(icmp6 + 1); ip6cp.ip6c_off = eoff; ip6cp.ip6c_finaldst = finaldst; ip6cp.ip6c_src = &icmp6src; ip6cp.ip6c_nxt = nxt; if (icmp6type == ICMP6_PACKET_TOO_BIG) { notifymtu = ntohl(icmp6->icmp6_mtu); ip6cp.ip6c_cmdarg = (void *)¬ifymtu; icmp6_mtudisc_update(&ip6cp, 1); /*XXX*/ } ctlfunc = (void (*) __P((int, struct sockaddr *, void *))) (inet6sw[ip6_protox[nxt]].pr_ctlinput); if (ctlfunc) { (void) (*ctlfunc)(code, (struct sockaddr *)&icmp6dst, &ip6cp); } } return(0); freeit: m_freem(m); return(-1); } void icmp6_mtudisc_update(ip6cp, validated) struct ip6ctlparam *ip6cp; int validated; { struct in6_addr *dst = ip6cp->ip6c_finaldst; struct icmp6_hdr *icmp6 = ip6cp->ip6c_icmp6; struct mbuf *m = ip6cp->ip6c_m; /* will be necessary for scope issue */ u_int mtu = ntohl(icmp6->icmp6_mtu); struct rtentry *rt = NULL; struct sockaddr_in6 sin6; if (!validated) return; bzero(&sin6, sizeof(sin6)); sin6.sin6_family = PF_INET6; sin6.sin6_len = sizeof(struct sockaddr_in6); sin6.sin6_addr = *dst; /* XXX normally, this won't happen */ if (IN6_IS_ADDR_LINKLOCAL(dst)) { sin6.sin6_addr.s6_addr16[1] = htons(m->m_pkthdr.rcvif->if_index); } /* sin6.sin6_scope_id = XXX: should be set if DST is a scoped addr */ rt = rtalloc1((struct sockaddr *)&sin6, 0, RTF_CLONING | RTF_PRCLONING); if (rt && (rt->rt_flags & RTF_HOST) && !(rt->rt_rmx.rmx_locks & RTV_MTU)) { if (mtu < IPV6_MMTU) { /* xxx */ rt->rt_rmx.rmx_locks |= RTV_MTU; } else if (mtu < rt->rt_ifp->if_mtu && rt->rt_rmx.rmx_mtu > mtu) { icmp6stat.icp6s_pmtuchg++; rt->rt_rmx.rmx_mtu = mtu; } } - if (rt) { /* XXX: need braces to avoid conflict with else in RTFREE. */ - RTFREE(rt); - } + if (rt) + rtfree(rt); } /* * Process a Node Information Query packet, based on * draft-ietf-ipngwg-icmp-name-lookups-07. * * Spec incompatibilities: * - IPv6 Subject address handling * - IPv4 Subject address handling support missing * - Proxy reply (answer even if it's not for me) * - joins NI group address at in6_ifattach() time only, does not cope * with hostname changes by sethostname(3) */ #define hostnamelen strlen(hostname) static struct mbuf * ni6_input(m, off) struct mbuf *m; int off; { struct icmp6_nodeinfo *ni6, *nni6; struct mbuf *n = NULL; u_int16_t qtype; int subjlen; int replylen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo); struct ni_reply_fqdn *fqdn; int addrs; /* for NI_QTYPE_NODEADDR */ struct ifnet *ifp = NULL; /* for NI_QTYPE_NODEADDR */ struct sockaddr_in6 sin6; /* double meaning; ip6_dst and subjectaddr */ struct sockaddr_in6 sin6_d; /* XXX: we should retrieve this from m_aux */ struct ip6_hdr *ip6; int oldfqdn = 0; /* if 1, return pascal string (03 draft) */ char *subj = NULL; struct in6_ifaddr *ia6 = NULL; ip6 = mtod(m, struct ip6_hdr *); #ifndef PULLDOWN_TEST ni6 = (struct icmp6_nodeinfo *)(mtod(m, caddr_t) + off); #else IP6_EXTHDR_GET(ni6, struct icmp6_nodeinfo *, m, off, sizeof(*ni6)); if (ni6 == NULL) { /* m is already reclaimed */ return NULL; } #endif /* * Validate IPv6 destination address. * * The Responder must discard the Query without further processing * unless it is one of the Responder's unicast or anycast addresses, or * a link-local scope multicast address which the Responder has joined. * [icmp-name-lookups-07, Section 4.] */ bzero(&sin6, sizeof(sin6)); sin6.sin6_family = AF_INET6; sin6.sin6_len = sizeof(struct sockaddr_in6); bcopy(&ip6->ip6_dst, &sin6.sin6_addr, sizeof(sin6.sin6_addr)); /* XXX scopeid */ if ((ia6 = (struct in6_ifaddr *)ifa_ifwithaddr((struct sockaddr *)&sin6)) != NULL) { /* unicast/anycast, fine */ if ((ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0 && (icmp6_nodeinfo & 4) == 0) { nd6log((LOG_DEBUG, "ni6_input: ignore node info to " "a temporary address in %s:%d", __FILE__, __LINE__)); goto bad; } } else if (IN6_IS_ADDR_MC_LINKLOCAL(&sin6.sin6_addr)) ; /* link-local multicast, fine */ else goto bad; /* validate query Subject field. */ qtype = ntohs(ni6->ni_qtype); subjlen = m->m_pkthdr.len - off - sizeof(struct icmp6_nodeinfo); switch (qtype) { case NI_QTYPE_NOOP: case NI_QTYPE_SUPTYPES: /* 07 draft */ if (ni6->ni_code == ICMP6_NI_SUBJ_FQDN && subjlen == 0) break; /* FALLTHROUGH */ case NI_QTYPE_FQDN: case NI_QTYPE_NODEADDR: switch (ni6->ni_code) { case ICMP6_NI_SUBJ_IPV6: #if ICMP6_NI_SUBJ_IPV6 != 0 case 0: #endif /* * backward compatibility - try to accept 03 draft * format, where no Subject is present. */ if (qtype == NI_QTYPE_FQDN && ni6->ni_code == 0 && subjlen == 0) { oldfqdn++; break; } #if ICMP6_NI_SUBJ_IPV6 != 0 if (ni6->ni_code != ICMP6_NI_SUBJ_IPV6) goto bad; #endif if (subjlen != sizeof(sin6.sin6_addr)) goto bad; /* * Validate Subject address. * * Not sure what exactly "address belongs to the node" * means in the spec, is it just unicast, or what? * * At this moment we consider Subject address as * "belong to the node" if the Subject address equals * to the IPv6 destination address; validation for * IPv6 destination address should have done enough * check for us. * * We do not do proxy at this moment. */ /* m_pulldown instead of copy? */ m_copydata(m, off + sizeof(struct icmp6_nodeinfo), subjlen, (caddr_t)&sin6.sin6_addr); sin6.sin6_scope_id = in6_addr2scopeid(m->m_pkthdr.rcvif, &sin6.sin6_addr); #ifndef SCOPEDROUTING in6_embedscope(&sin6.sin6_addr, &sin6, NULL, NULL); #endif bzero(&sin6_d, sizeof(sin6_d)); sin6_d.sin6_family = AF_INET6; /* not used, actually */ sin6_d.sin6_len = sizeof(sin6_d); /* ditto */ sin6_d.sin6_addr = ip6->ip6_dst; sin6_d.sin6_scope_id = in6_addr2scopeid(m->m_pkthdr.rcvif, &ip6->ip6_dst); #ifndef SCOPEDROUTING in6_embedscope(&sin6_d.sin6_addr, &sin6_d, NULL, NULL); #endif subj = (char *)&sin6; if (SA6_ARE_ADDR_EQUAL(&sin6, &sin6_d)) break; /* * XXX if we are to allow other cases, we should really * be careful about scope here. * basically, we should disallow queries toward IPv6 * destination X with subject Y, if scope(X) > scope(Y). * if we allow scope(X) > scope(Y), it will result in * information leakage across scope boundary. */ goto bad; case ICMP6_NI_SUBJ_FQDN: /* * Validate Subject name with gethostname(3). * * The behavior may need some debate, since: * - we are not sure if the node has FQDN as * hostname (returned by gethostname(3)). * - the code does wildcard match for truncated names. * however, we are not sure if we want to perform * wildcard match, if gethostname(3) side has * truncated hostname. */ n = ni6_nametodns(hostname, hostnamelen, 0); if (!n || n->m_next || n->m_len == 0) goto bad; IP6_EXTHDR_GET(subj, char *, m, off + sizeof(struct icmp6_nodeinfo), subjlen); if (subj == NULL) goto bad; if (!ni6_dnsmatch(subj, subjlen, mtod(n, const char *), n->m_len)) { goto bad; } m_freem(n); n = NULL; break; case ICMP6_NI_SUBJ_IPV4: /* XXX: to be implemented? */ default: goto bad; } break; } /* refuse based on configuration. XXX ICMP6_NI_REFUSED? */ switch (qtype) { case NI_QTYPE_FQDN: if ((icmp6_nodeinfo & 1) == 0) goto bad; break; case NI_QTYPE_NODEADDR: if ((icmp6_nodeinfo & 2) == 0) goto bad; break; } /* guess reply length */ switch (qtype) { case NI_QTYPE_NOOP: break; /* no reply data */ case NI_QTYPE_SUPTYPES: replylen += sizeof(u_int32_t); break; case NI_QTYPE_FQDN: /* XXX will append an mbuf */ replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen); break; case NI_QTYPE_NODEADDR: addrs = ni6_addrs(ni6, m, &ifp, subj); if ((replylen += addrs * (sizeof(struct in6_addr) + sizeof(u_int32_t))) > MCLBYTES) replylen = MCLBYTES; /* XXX: will truncate pkt later */ break; default: /* * XXX: We must return a reply with the ICMP6 code * `unknown Qtype' in this case. However we regard the case * as an FQDN query for backward compatibility. * Older versions set a random value to this field, * so it rarely varies in the defined qtypes. * But the mechanism is not reliable... * maybe we should obsolete older versions. */ qtype = NI_QTYPE_FQDN; /* XXX will append an mbuf */ replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen); oldfqdn++; break; } /* allocate an mbuf to reply. */ MGETHDR(n, M_DONTWAIT, m->m_type); if (n == NULL) { m_freem(m); return(NULL); } M_MOVE_PKTHDR(n, m); /* just for recvif */ if (replylen > MHLEN) { if (replylen > MCLBYTES) { /* * XXX: should we try to allocate more? But MCLBYTES * is probably much larger than IPV6_MMTU... */ goto bad; } MCLGET(n, M_DONTWAIT); if ((n->m_flags & M_EXT) == 0) { goto bad; } } n->m_pkthdr.len = n->m_len = replylen; /* copy mbuf header and IPv6 + Node Information base headers */ bcopy(mtod(m, caddr_t), mtod(n, caddr_t), sizeof(struct ip6_hdr)); nni6 = (struct icmp6_nodeinfo *)(mtod(n, struct ip6_hdr *) + 1); bcopy((caddr_t)ni6, (caddr_t)nni6, sizeof(struct icmp6_nodeinfo)); /* qtype dependent procedure */ switch (qtype) { case NI_QTYPE_NOOP: nni6->ni_code = ICMP6_NI_SUCCESS; nni6->ni_flags = 0; break; case NI_QTYPE_SUPTYPES: { u_int32_t v; nni6->ni_code = ICMP6_NI_SUCCESS; nni6->ni_flags = htons(0x0000); /* raw bitmap */ /* supports NOOP, SUPTYPES, FQDN, and NODEADDR */ v = (u_int32_t)htonl(0x0000000f); bcopy(&v, nni6 + 1, sizeof(u_int32_t)); break; } case NI_QTYPE_FQDN: nni6->ni_code = ICMP6_NI_SUCCESS; fqdn = (struct ni_reply_fqdn *)(mtod(n, caddr_t) + sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo)); nni6->ni_flags = 0; /* XXX: meaningless TTL */ fqdn->ni_fqdn_ttl = 0; /* ditto. */ /* * XXX do we really have FQDN in variable "hostname"? */ n->m_next = ni6_nametodns(hostname, hostnamelen, oldfqdn); if (n->m_next == NULL) goto bad; /* XXX we assume that n->m_next is not a chain */ if (n->m_next->m_next != NULL) goto bad; n->m_pkthdr.len += n->m_next->m_len; break; case NI_QTYPE_NODEADDR: { int lenlim, copied; nni6->ni_code = ICMP6_NI_SUCCESS; n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo); lenlim = M_TRAILINGSPACE(n); copied = ni6_store_addrs(ni6, nni6, ifp, lenlim); /* XXX: reset mbuf length */ n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo) + copied; break; } default: break; /* XXX impossible! */ } nni6->ni_type = ICMP6_NI_REPLY; m_freem(m); return(n); bad: m_freem(m); if (n) m_freem(n); return(NULL); } #undef hostnamelen /* * make a mbuf with DNS-encoded string. no compression support. * * XXX names with less than 2 dots (like "foo" or "foo.section") will be * treated as truncated name (two \0 at the end). this is a wild guess. */ static struct mbuf * ni6_nametodns(name, namelen, old) const char *name; int namelen; int old; /* return pascal string if non-zero */ { struct mbuf *m; char *cp, *ep; const char *p, *q; int i, len, nterm; if (old) len = namelen + 1; else len = MCLBYTES; /* because MAXHOSTNAMELEN is usually 256, we use cluster mbuf */ MGET(m, M_DONTWAIT, MT_DATA); if (m && len > MLEN) { MCLGET(m, M_DONTWAIT); if ((m->m_flags & M_EXT) == 0) goto fail; } if (!m) goto fail; m->m_next = NULL; if (old) { m->m_len = len; *mtod(m, char *) = namelen; bcopy(name, mtod(m, char *) + 1, namelen); return m; } else { m->m_len = 0; cp = mtod(m, char *); ep = mtod(m, char *) + M_TRAILINGSPACE(m); /* if not certain about my name, return empty buffer */ if (namelen == 0) return m; /* * guess if it looks like shortened hostname, or FQDN. * shortened hostname needs two trailing "\0". */ i = 0; for (p = name; p < name + namelen; p++) { if (*p && *p == '.') i++; } if (i < 2) nterm = 2; else nterm = 1; p = name; while (cp < ep && p < name + namelen) { i = 0; for (q = p; q < name + namelen && *q && *q != '.'; q++) i++; /* result does not fit into mbuf */ if (cp + i + 1 >= ep) goto fail; /* * DNS label length restriction, RFC1035 page 8. * "i == 0" case is included here to avoid returning * 0-length label on "foo..bar". */ if (i <= 0 || i >= 64) goto fail; *cp++ = i; bcopy(p, cp, i); cp += i; p = q; if (p < name + namelen && *p == '.') p++; } /* termination */ if (cp + nterm >= ep) goto fail; while (nterm-- > 0) *cp++ = '\0'; m->m_len = cp - mtod(m, char *); return m; } panic("should not reach here"); /* NOTREACHED */ fail: if (m) m_freem(m); return NULL; } /* * check if two DNS-encoded string matches. takes care of truncated * form (with \0\0 at the end). no compression support. * XXX upper/lowercase match (see RFC2065) */ static int ni6_dnsmatch(a, alen, b, blen) const char *a; int alen; const char *b; int blen; { const char *a0, *b0; int l; /* simplest case - need validation? */ if (alen == blen && bcmp(a, b, alen) == 0) return 1; a0 = a; b0 = b; /* termination is mandatory */ if (alen < 2 || blen < 2) return 0; if (a0[alen - 1] != '\0' || b0[blen - 1] != '\0') return 0; alen--; blen--; while (a - a0 < alen && b - b0 < blen) { if (a - a0 + 1 > alen || b - b0 + 1 > blen) return 0; if ((signed char)a[0] < 0 || (signed char)b[0] < 0) return 0; /* we don't support compression yet */ if (a[0] >= 64 || b[0] >= 64) return 0; /* truncated case */ if (a[0] == 0 && a - a0 == alen - 1) return 1; if (b[0] == 0 && b - b0 == blen - 1) return 1; if (a[0] == 0 || b[0] == 0) return 0; if (a[0] != b[0]) return 0; l = a[0]; if (a - a0 + 1 + l > alen || b - b0 + 1 + l > blen) return 0; if (bcmp(a + 1, b + 1, l) != 0) return 0; a += 1 + l; b += 1 + l; } if (a - a0 == alen && b - b0 == blen) return 1; else return 0; } /* * calculate the number of addresses to be returned in the node info reply. */ static int ni6_addrs(ni6, m, ifpp, subj) struct icmp6_nodeinfo *ni6; struct mbuf *m; struct ifnet **ifpp; char *subj; { struct ifnet *ifp; struct in6_ifaddr *ifa6; struct ifaddr *ifa; struct sockaddr_in6 *subj_ip6 = NULL; /* XXX pedant */ int addrs = 0, addrsofif, iffound = 0; int niflags = ni6->ni_flags; if ((niflags & NI_NODEADDR_FLAG_ALL) == 0) { switch (ni6->ni_code) { case ICMP6_NI_SUBJ_IPV6: if (subj == NULL) /* must be impossible... */ return(0); subj_ip6 = (struct sockaddr_in6 *)subj; break; default: /* * XXX: we only support IPv6 subject address for * this Qtype. */ return(0); } } IFNET_RLOCK(); for (ifp = TAILQ_FIRST(&ifnet); ifp; ifp = TAILQ_NEXT(ifp, if_list)) { addrsofif = 0; TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; ifa6 = (struct in6_ifaddr *)ifa; if ((niflags & NI_NODEADDR_FLAG_ALL) == 0 && IN6_ARE_ADDR_EQUAL(&subj_ip6->sin6_addr, &ifa6->ia_addr.sin6_addr)) iffound = 1; /* * IPv4-mapped addresses can only be returned by a * Node Information proxy, since they represent * addresses of IPv4-only nodes, which perforce do * not implement this protocol. * [icmp-name-lookups-07, Section 5.4] * So we don't support NI_NODEADDR_FLAG_COMPAT in * this function at this moment. */ /* What do we have to do about ::1? */ switch (in6_addrscope(&ifa6->ia_addr.sin6_addr)) { case IPV6_ADDR_SCOPE_LINKLOCAL: if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0) continue; break; case IPV6_ADDR_SCOPE_SITELOCAL: if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0) continue; break; case IPV6_ADDR_SCOPE_GLOBAL: if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0) continue; break; default: continue; } /* * check if anycast is okay. * XXX: just experimental. not in the spec. */ if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0 && (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0) continue; /* we need only unicast addresses */ if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0 && (icmp6_nodeinfo & 4) == 0) { continue; } addrsofif++; /* count the address */ } if (iffound) { *ifpp = ifp; IFNET_RUNLOCK(); return(addrsofif); } addrs += addrsofif; } IFNET_RUNLOCK(); return(addrs); } static int ni6_store_addrs(ni6, nni6, ifp0, resid) struct icmp6_nodeinfo *ni6, *nni6; struct ifnet *ifp0; int resid; { struct ifnet *ifp = ifp0 ? ifp0 : TAILQ_FIRST(&ifnet); struct in6_ifaddr *ifa6; struct ifaddr *ifa; struct ifnet *ifp_dep = NULL; int copied = 0, allow_deprecated = 0; u_char *cp = (u_char *)(nni6 + 1); int niflags = ni6->ni_flags; u_int32_t ltime; if (ifp0 == NULL && !(niflags & NI_NODEADDR_FLAG_ALL)) return(0); /* needless to copy */ IFNET_RLOCK(); again: for (; ifp; ifp = TAILQ_NEXT(ifp, if_list)) { for (ifa = ifp->if_addrlist.tqh_first; ifa; ifa = ifa->ifa_list.tqe_next) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; ifa6 = (struct in6_ifaddr *)ifa; if ((ifa6->ia6_flags & IN6_IFF_DEPRECATED) != 0 && allow_deprecated == 0) { /* * prefererred address should be put before * deprecated addresses. */ /* record the interface for later search */ if (ifp_dep == NULL) ifp_dep = ifp; continue; } else if ((ifa6->ia6_flags & IN6_IFF_DEPRECATED) == 0 && allow_deprecated != 0) continue; /* we now collect deprecated addrs */ /* What do we have to do about ::1? */ switch (in6_addrscope(&ifa6->ia_addr.sin6_addr)) { case IPV6_ADDR_SCOPE_LINKLOCAL: if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0) continue; break; case IPV6_ADDR_SCOPE_SITELOCAL: if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0) continue; break; case IPV6_ADDR_SCOPE_GLOBAL: if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0) continue; break; default: continue; } /* * check if anycast is okay. * XXX: just experimental. not in the spec. */ if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0 && (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0) continue; if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0 && (icmp6_nodeinfo & 4) == 0) { continue; } /* now we can copy the address */ if (resid < sizeof(struct in6_addr) + sizeof(u_int32_t)) { /* * We give up much more copy. * Set the truncate flag and return. */ nni6->ni_flags |= NI_NODEADDR_FLAG_TRUNCATE; IFNET_RUNLOCK(); return(copied); } /* * Set the TTL of the address. * The TTL value should be one of the following * according to the specification: * * 1. The remaining lifetime of a DHCP lease on the * address, or * 2. The remaining Valid Lifetime of a prefix from * which the address was derived through Stateless * Autoconfiguration. * * Note that we currently do not support stateful * address configuration by DHCPv6, so the former * case can't happen. */ if (ifa6->ia6_lifetime.ia6t_expire == 0) ltime = ND6_INFINITE_LIFETIME; else { if (ifa6->ia6_lifetime.ia6t_expire > time_second) ltime = htonl(ifa6->ia6_lifetime.ia6t_expire - time_second); else ltime = 0; } bcopy(<ime, cp, sizeof(u_int32_t)); cp += sizeof(u_int32_t); /* copy the address itself */ bcopy(&ifa6->ia_addr.sin6_addr, cp, sizeof(struct in6_addr)); /* XXX: KAME link-local hack; remove ifindex */ if (IN6_IS_ADDR_LINKLOCAL(&ifa6->ia_addr.sin6_addr)) ((struct in6_addr *)cp)->s6_addr16[1] = 0; cp += sizeof(struct in6_addr); resid -= (sizeof(struct in6_addr) + sizeof(u_int32_t)); copied += (sizeof(struct in6_addr) + sizeof(u_int32_t)); } if (ifp0) /* we need search only on the specified IF */ break; } if (allow_deprecated == 0 && ifp_dep != NULL) { ifp = ifp_dep; allow_deprecated = 1; goto again; } IFNET_RUNLOCK(); return(copied); } /* * XXX almost dup'ed code with rip6_input. */ static int icmp6_rip6_input(mp, off) struct mbuf **mp; int off; { struct mbuf *m = *mp; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct in6pcb *in6p; struct in6pcb *last = NULL; struct sockaddr_in6 rip6src; struct icmp6_hdr *icmp6; struct mbuf *opts = NULL; #ifndef PULLDOWN_TEST /* this is assumed to be safe. */ icmp6 = (struct icmp6_hdr *)((caddr_t)ip6 + off); #else IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6)); if (icmp6 == NULL) { /* m is already reclaimed */ return IPPROTO_DONE; } #endif bzero(&rip6src, sizeof(rip6src)); rip6src.sin6_len = sizeof(struct sockaddr_in6); rip6src.sin6_family = AF_INET6; /* KAME hack: recover scopeid */ (void)in6_recoverscope(&rip6src, &ip6->ip6_src, m->m_pkthdr.rcvif); LIST_FOREACH(in6p, &ripcb, inp_list) { if ((in6p->inp_vflag & INP_IPV6) == 0) continue; #ifdef HAVE_NRL_INPCB if (!(in6p->in6p_flags & INP_IPV6)) continue; #endif if (in6p->in6p_ip6_nxt != IPPROTO_ICMPV6) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) && !IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &ip6->ip6_dst)) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr) && !IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src)) continue; if (in6p->in6p_icmp6filt && ICMP6_FILTER_WILLBLOCK(icmp6->icmp6_type, in6p->in6p_icmp6filt)) continue; if (last) { struct mbuf *n; if ((n = m_copy(m, 0, (int)M_COPYALL)) != NULL) { if (last->in6p_flags & IN6P_CONTROLOPTS) ip6_savecontrol(last, &opts, ip6, n); /* strip intermediate headers */ m_adj(n, off); if (sbappendaddr(&last->in6p_socket->so_rcv, (struct sockaddr *)&rip6src, n, opts) == 0) { /* should notify about lost packet */ m_freem(n); if (opts) { m_freem(opts); } } else sorwakeup(last->in6p_socket); opts = NULL; } } last = in6p; } if (last) { if (last->in6p_flags & IN6P_CONTROLOPTS) ip6_savecontrol(last, &opts, ip6, m); /* strip intermediate headers */ m_adj(m, off); if (sbappendaddr(&last->in6p_socket->so_rcv, (struct sockaddr *)&rip6src, m, opts) == 0) { m_freem(m); if (opts) m_freem(opts); } else sorwakeup(last->in6p_socket); } else { m_freem(m); ip6stat.ip6s_delivered--; } return IPPROTO_DONE; } /* * Reflect the ip6 packet back to the source. * OFF points to the icmp6 header, counted from the top of the mbuf. */ void icmp6_reflect(m, off) struct mbuf *m; size_t off; { struct ip6_hdr *ip6; struct icmp6_hdr *icmp6; struct in6_ifaddr *ia; struct in6_addr t, *src = 0; int plen; int type, code; struct ifnet *outif = NULL; struct sockaddr_in6 sa6_src, sa6_dst; #ifdef COMPAT_RFC1885 int mtu = IPV6_MMTU; struct sockaddr_in6 *sin6 = &icmp6_reflect_rt.ro_dst; #endif /* too short to reflect */ if (off < sizeof(struct ip6_hdr)) { nd6log((LOG_DEBUG, "sanity fail: off=%lx, sizeof(ip6)=%lx in %s:%d\n", (u_long)off, (u_long)sizeof(struct ip6_hdr), __FILE__, __LINE__)); goto bad; } /* * If there are extra headers between IPv6 and ICMPv6, strip * off that header first. */ #ifdef DIAGNOSTIC if (sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) > MHLEN) panic("assumption failed in icmp6_reflect"); #endif if (off > sizeof(struct ip6_hdr)) { size_t l; struct ip6_hdr nip6; l = off - sizeof(struct ip6_hdr); m_copydata(m, 0, sizeof(nip6), (caddr_t)&nip6); m_adj(m, l); l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr); if (m->m_len < l) { if ((m = m_pullup(m, l)) == NULL) return; } bcopy((caddr_t)&nip6, mtod(m, caddr_t), sizeof(nip6)); } else /* off == sizeof(struct ip6_hdr) */ { size_t l; l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr); if (m->m_len < l) { if ((m = m_pullup(m, l)) == NULL) return; } } plen = m->m_pkthdr.len - sizeof(struct ip6_hdr); ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_nxt = IPPROTO_ICMPV6; icmp6 = (struct icmp6_hdr *)(ip6 + 1); type = icmp6->icmp6_type; /* keep type for statistics */ code = icmp6->icmp6_code; /* ditto. */ t = ip6->ip6_dst; /* * ip6_input() drops a packet if its src is multicast. * So, the src is never multicast. */ ip6->ip6_dst = ip6->ip6_src; /* * XXX: make sure to embed scope zone information, using * already embedded IDs or the received interface (if any). * Note that rcvif may be NULL. * TODO: scoped routing case (XXX). */ bzero(&sa6_src, sizeof(sa6_src)); sa6_src.sin6_family = AF_INET6; sa6_src.sin6_len = sizeof(sa6_src); sa6_src.sin6_addr = ip6->ip6_dst; in6_recoverscope(&sa6_src, &ip6->ip6_dst, m->m_pkthdr.rcvif); in6_embedscope(&ip6->ip6_dst, &sa6_src, NULL, NULL); bzero(&sa6_dst, sizeof(sa6_dst)); sa6_dst.sin6_family = AF_INET6; sa6_dst.sin6_len = sizeof(sa6_dst); sa6_dst.sin6_addr = t; in6_recoverscope(&sa6_dst, &t, m->m_pkthdr.rcvif); in6_embedscope(&t, &sa6_dst, NULL, NULL); #ifdef COMPAT_RFC1885 /* * xxx guess MTU * RFC 1885 requires that echo reply should be truncated if it * does not fit in with (return) path MTU, but the description was * removed in the new spec. */ if (icmp6_reflect_rt.ro_rt == 0 || ! (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, &ip6->ip6_dst))) { if (icmp6_reflect_rt.ro_rt) { RTFREE(icmp6_reflect_rt.ro_rt); icmp6_reflect_rt.ro_rt = 0; } bzero(sin6, sizeof(*sin6)); sin6->sin6_family = PF_INET6; sin6->sin6_len = sizeof(struct sockaddr_in6); sin6->sin6_addr = ip6->ip6_dst; rtalloc_ign((struct route *)&icmp6_reflect_rt.ro_rt, RTF_PRCLONING); } if (icmp6_reflect_rt.ro_rt == 0) goto bad; if ((icmp6_reflect_rt.ro_rt->rt_flags & RTF_HOST) && mtu < icmp6_reflect_rt.ro_rt->rt_ifp->if_mtu) mtu = icmp6_reflect_rt.ro_rt->rt_rmx.rmx_mtu; if (mtu < m->m_pkthdr.len) { plen -= (m->m_pkthdr.len - mtu); m_adj(m, mtu - m->m_pkthdr.len); } #endif /* * If the incoming packet was addressed directly to us(i.e. unicast), * use dst as the src for the reply. * The IN6_IFF_NOTREADY case would be VERY rare, but is possible * (for example) when we encounter an error while forwarding procedure * destined to a duplicated address of ours. */ for (ia = in6_ifaddr; ia; ia = ia->ia_next) if (IN6_ARE_ADDR_EQUAL(&t, &ia->ia_addr.sin6_addr) && (ia->ia6_flags & (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY)) == 0) { src = &t; break; } if (ia == NULL && IN6_IS_ADDR_LINKLOCAL(&t) && (m->m_flags & M_LOOP)) { /* * This is the case if the dst is our link-local address * and the sender is also ourselves. */ src = &t; } if (src == 0) { int e; struct route_in6 ro; /* * This case matches to multicasts, our anycast, or unicasts * that we do not own. Select a source address based on the * source address of the erroneous packet. */ bzero(&ro, sizeof(ro)); src = in6_selectsrc(&sa6_src, NULL, NULL, &ro, NULL, &e); if (ro.ro_rt) RTFREE(ro.ro_rt); /* XXX: we could use this */ if (src == NULL) { nd6log((LOG_DEBUG, "icmp6_reflect: source can't be determined: " "dst=%s, error=%d\n", ip6_sprintf(&sa6_src.sin6_addr), e)); goto bad; } } ip6->ip6_src = *src; ip6->ip6_flow = 0; ip6->ip6_vfc &= ~IPV6_VERSION_MASK; ip6->ip6_vfc |= IPV6_VERSION; ip6->ip6_nxt = IPPROTO_ICMPV6; if (m->m_pkthdr.rcvif) { /* XXX: This may not be the outgoing interface */ ip6->ip6_hlim = nd_ifinfo[m->m_pkthdr.rcvif->if_index].chlim; } else ip6->ip6_hlim = ip6_defhlim; icmp6->icmp6_cksum = 0; icmp6->icmp6_cksum = in6_cksum(m, IPPROTO_ICMPV6, sizeof(struct ip6_hdr), plen); /* * XXX option handling */ m->m_flags &= ~(M_BCAST|M_MCAST); #ifdef COMPAT_RFC1885 ip6_output(m, NULL, &icmp6_reflect_rt, 0, NULL, &outif, NULL); #else ip6_output(m, NULL, NULL, 0, NULL, &outif, NULL); #endif if (outif) icmp6_ifoutstat_inc(outif, type, code); return; bad: m_freem(m); return; } void icmp6_fasttimo() { mld6_fasttimeo(); } static const char * icmp6_redirect_diag(src6, dst6, tgt6) struct in6_addr *src6; struct in6_addr *dst6; struct in6_addr *tgt6; { static char buf[1024]; snprintf(buf, sizeof(buf), "(src=%s dst=%s tgt=%s)", ip6_sprintf(src6), ip6_sprintf(dst6), ip6_sprintf(tgt6)); return buf; } void icmp6_redirect_input(m, off) struct mbuf *m; int off; { struct ifnet *ifp = m->m_pkthdr.rcvif; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct nd_redirect *nd_rd; int icmp6len = ntohs(ip6->ip6_plen); char *lladdr = NULL; int lladdrlen = 0; u_char *redirhdr = NULL; int redirhdrlen = 0; struct rtentry *rt = NULL; int is_router; int is_onlink; struct in6_addr src6 = ip6->ip6_src; struct in6_addr redtgt6; struct in6_addr reddst6; union nd_opts ndopts; if (!m || !ifp) return; /* XXX if we are router, we don't update route by icmp6 redirect */ if (ip6_forwarding) goto freeit; if (!icmp6_rediraccept) goto freeit; #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, icmp6len,); nd_rd = (struct nd_redirect *)((caddr_t)ip6 + off); #else IP6_EXTHDR_GET(nd_rd, struct nd_redirect *, m, off, icmp6len); if (nd_rd == NULL) { icmp6stat.icp6s_tooshort++; return; } #endif redtgt6 = nd_rd->nd_rd_target; reddst6 = nd_rd->nd_rd_dst; if (IN6_IS_ADDR_LINKLOCAL(&redtgt6)) redtgt6.s6_addr16[1] = htons(ifp->if_index); if (IN6_IS_ADDR_LINKLOCAL(&reddst6)) reddst6.s6_addr16[1] = htons(ifp->if_index); /* validation */ if (!IN6_IS_ADDR_LINKLOCAL(&src6)) { nd6log((LOG_ERR, "ICMP6 redirect sent from %s rejected; " "must be from linklocal\n", ip6_sprintf(&src6))); goto bad; } if (ip6->ip6_hlim != 255) { nd6log((LOG_ERR, "ICMP6 redirect sent from %s rejected; " "hlim=%d (must be 255)\n", ip6_sprintf(&src6), ip6->ip6_hlim)); goto bad; } { /* ip6->ip6_src must be equal to gw for icmp6->icmp6_reddst */ struct sockaddr_in6 sin6; struct in6_addr *gw6; bzero(&sin6, sizeof(sin6)); sin6.sin6_family = AF_INET6; sin6.sin6_len = sizeof(struct sockaddr_in6); bcopy(&reddst6, &sin6.sin6_addr, sizeof(reddst6)); rt = rtalloc1((struct sockaddr *)&sin6, 0, 0UL); if (rt) { if (rt->rt_gateway == NULL || rt->rt_gateway->sa_family != AF_INET6) { nd6log((LOG_ERR, "ICMP6 redirect rejected; no route " "with inet6 gateway found for redirect dst: %s\n", icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); - RTFREE(rt); + RTFREE_LOCKED(rt); goto bad; } gw6 = &(((struct sockaddr_in6 *)rt->rt_gateway)->sin6_addr); if (bcmp(&src6, gw6, sizeof(struct in6_addr)) != 0) { nd6log((LOG_ERR, "ICMP6 redirect rejected; " "not equal to gw-for-src=%s (must be same): " "%s\n", ip6_sprintf(gw6), icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); - RTFREE(rt); + RTFREE_LOCKED(rt); goto bad; } } else { nd6log((LOG_ERR, "ICMP6 redirect rejected; " "no route found for redirect dst: %s\n", icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); goto bad; } - RTFREE(rt); + RTFREE_LOCKED(rt); rt = NULL; } if (IN6_IS_ADDR_MULTICAST(&reddst6)) { nd6log((LOG_ERR, "ICMP6 redirect rejected; " "redirect dst must be unicast: %s\n", icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); goto bad; } is_router = is_onlink = 0; if (IN6_IS_ADDR_LINKLOCAL(&redtgt6)) is_router = 1; /* router case */ if (bcmp(&redtgt6, &reddst6, sizeof(redtgt6)) == 0) is_onlink = 1; /* on-link destination case */ if (!is_router && !is_onlink) { nd6log((LOG_ERR, "ICMP6 redirect rejected; " "neither router case nor onlink case: %s\n", icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); goto bad; } /* validation passed */ icmp6len -= sizeof(*nd_rd); nd6_option_init(nd_rd + 1, icmp6len, &ndopts); if (nd6_options(&ndopts) < 0) { nd6log((LOG_INFO, "icmp6_redirect_input: " "invalid ND option, rejected: %s\n", icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); /* nd6_options have incremented stats */ goto freeit; } if (ndopts.nd_opts_tgt_lladdr) { lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1); lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3; } if (ndopts.nd_opts_rh) { redirhdrlen = ndopts.nd_opts_rh->nd_opt_rh_len; redirhdr = (u_char *)(ndopts.nd_opts_rh + 1); /* xxx */ } if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) { nd6log((LOG_INFO, "icmp6_redirect_input: lladdrlen mismatch for %s " "(if %d, icmp6 packet %d): %s\n", ip6_sprintf(&redtgt6), ifp->if_addrlen, lladdrlen - 2, icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); goto bad; } /* RFC 2461 8.3 */ nd6_cache_lladdr(ifp, &redtgt6, lladdr, lladdrlen, ND_REDIRECT, is_onlink ? ND_REDIRECT_ONLINK : ND_REDIRECT_ROUTER); if (!is_onlink) { /* better router case. perform rtredirect. */ /* perform rtredirect */ struct sockaddr_in6 sdst; struct sockaddr_in6 sgw; struct sockaddr_in6 ssrc; bzero(&sdst, sizeof(sdst)); bzero(&sgw, sizeof(sgw)); bzero(&ssrc, sizeof(ssrc)); sdst.sin6_family = sgw.sin6_family = ssrc.sin6_family = AF_INET6; sdst.sin6_len = sgw.sin6_len = ssrc.sin6_len = sizeof(struct sockaddr_in6); bcopy(&redtgt6, &sgw.sin6_addr, sizeof(struct in6_addr)); bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr)); bcopy(&src6, &ssrc.sin6_addr, sizeof(struct in6_addr)); rtredirect((struct sockaddr *)&sdst, (struct sockaddr *)&sgw, (struct sockaddr *)NULL, RTF_GATEWAY | RTF_HOST, - (struct sockaddr *)&ssrc, - (struct rtentry **)NULL); + (struct sockaddr *)&ssrc); } /* finally update cached route in each socket via pfctlinput */ { struct sockaddr_in6 sdst; bzero(&sdst, sizeof(sdst)); sdst.sin6_family = AF_INET6; sdst.sin6_len = sizeof(struct sockaddr_in6); bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr)); pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&sdst); #ifdef IPSEC key_sa_routechange((struct sockaddr *)&sdst); #endif } freeit: m_freem(m); return; bad: icmp6stat.icp6s_badredirect++; m_freem(m); } void icmp6_redirect_output(m0, rt) struct mbuf *m0; struct rtentry *rt; { struct ifnet *ifp; /* my outgoing interface */ struct in6_addr *ifp_ll6; struct in6_addr *router_ll6; struct ip6_hdr *sip6; /* m0 as struct ip6_hdr */ struct mbuf *m = NULL; /* newly allocated one */ struct ip6_hdr *ip6; /* m as struct ip6_hdr */ struct nd_redirect *nd_rd; size_t maxlen; u_char *p; struct ifnet *outif = NULL; struct sockaddr_in6 src_sa; icmp6_errcount(&icmp6stat.icp6s_outerrhist, ND_REDIRECT, 0); /* if we are not router, we don't send icmp6 redirect */ if (!ip6_forwarding || ip6_accept_rtadv) goto fail; /* sanity check */ if (!m0 || !rt || !(rt->rt_flags & RTF_UP) || !(ifp = rt->rt_ifp)) goto fail; /* * Address check: * the source address must identify a neighbor, and * the destination address must not be a multicast address * [RFC 2461, sec 8.2] */ sip6 = mtod(m0, struct ip6_hdr *); bzero(&src_sa, sizeof(src_sa)); src_sa.sin6_family = AF_INET6; src_sa.sin6_len = sizeof(src_sa); src_sa.sin6_addr = sip6->ip6_src; /* we don't currently use sin6_scope_id, but eventually use it */ src_sa.sin6_scope_id = in6_addr2scopeid(ifp, &sip6->ip6_src); if (nd6_is_addr_neighbor(&src_sa, ifp) == 0) goto fail; if (IN6_IS_ADDR_MULTICAST(&sip6->ip6_dst)) goto fail; /* what should we do here? */ /* rate limit */ if (icmp6_ratelimit(&sip6->ip6_src, ND_REDIRECT, 0)) goto fail; /* * Since we are going to append up to 1280 bytes (= IPV6_MMTU), * we almost always ask for an mbuf cluster for simplicity. * (MHLEN < IPV6_MMTU is almost always true) */ #if IPV6_MMTU >= MCLBYTES # error assumption failed about IPV6_MMTU and MCLBYTES #endif MGETHDR(m, M_DONTWAIT, MT_HEADER); if (m && IPV6_MMTU >= MHLEN) MCLGET(m, M_DONTWAIT); if (!m) goto fail; m->m_pkthdr.rcvif = NULL; m->m_len = 0; maxlen = M_TRAILINGSPACE(m); maxlen = min(IPV6_MMTU, maxlen); /* just for safety */ if (maxlen < sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) + ((sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7)) { goto fail; } { /* get ip6 linklocal address for ifp(my outgoing interface). */ struct in6_ifaddr *ia; if ((ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY| IN6_IFF_ANYCAST)) == NULL) goto fail; ifp_ll6 = &ia->ia_addr.sin6_addr; } /* get ip6 linklocal address for the router. */ if (rt->rt_gateway && (rt->rt_flags & RTF_GATEWAY)) { struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)rt->rt_gateway; router_ll6 = &sin6->sin6_addr; if (!IN6_IS_ADDR_LINKLOCAL(router_ll6)) router_ll6 = (struct in6_addr *)NULL; } else router_ll6 = (struct in6_addr *)NULL; /* ip6 */ ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_flow = 0; ip6->ip6_vfc &= ~IPV6_VERSION_MASK; ip6->ip6_vfc |= IPV6_VERSION; /* ip6->ip6_plen will be set later */ ip6->ip6_nxt = IPPROTO_ICMPV6; ip6->ip6_hlim = 255; /* ip6->ip6_src must be linklocal addr for my outgoing if. */ bcopy(ifp_ll6, &ip6->ip6_src, sizeof(struct in6_addr)); bcopy(&sip6->ip6_src, &ip6->ip6_dst, sizeof(struct in6_addr)); /* ND Redirect */ nd_rd = (struct nd_redirect *)(ip6 + 1); nd_rd->nd_rd_type = ND_REDIRECT; nd_rd->nd_rd_code = 0; nd_rd->nd_rd_reserved = 0; if (rt->rt_flags & RTF_GATEWAY) { /* * nd_rd->nd_rd_target must be a link-local address in * better router cases. */ if (!router_ll6) goto fail; bcopy(router_ll6, &nd_rd->nd_rd_target, sizeof(nd_rd->nd_rd_target)); bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst, sizeof(nd_rd->nd_rd_dst)); } else { /* make sure redtgt == reddst */ bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_target, sizeof(nd_rd->nd_rd_target)); bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst, sizeof(nd_rd->nd_rd_dst)); } p = (u_char *)(nd_rd + 1); if (!router_ll6) goto nolladdropt; { /* target lladdr option */ struct rtentry *rt_router = NULL; int len; struct sockaddr_dl *sdl; struct nd_opt_hdr *nd_opt; char *lladdr; rt_router = nd6_lookup(router_ll6, 0, ifp); if (!rt_router) goto nolladdropt; len = sizeof(*nd_opt) + ifp->if_addrlen; len = (len + 7) & ~7; /* round by 8 */ /* safety check */ if (len + (p - (u_char *)ip6) > maxlen) goto nolladdropt; if (!(rt_router->rt_flags & RTF_GATEWAY) && (rt_router->rt_flags & RTF_LLINFO) && (rt_router->rt_gateway->sa_family == AF_LINK) && (sdl = (struct sockaddr_dl *)rt_router->rt_gateway) && sdl->sdl_alen) { nd_opt = (struct nd_opt_hdr *)p; nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; nd_opt->nd_opt_len = len >> 3; lladdr = (char *)(nd_opt + 1); bcopy(LLADDR(sdl), lladdr, ifp->if_addrlen); p += len; } } nolladdropt:; m->m_pkthdr.len = m->m_len = p - (u_char *)ip6; /* just to be safe */ #ifdef M_DECRYPTED /*not openbsd*/ if (m0->m_flags & M_DECRYPTED) goto noredhdropt; #endif if (p - (u_char *)ip6 > maxlen) goto noredhdropt; { /* redirected header option */ int len; struct nd_opt_rd_hdr *nd_opt_rh; /* * compute the maximum size for icmp6 redirect header option. * XXX room for auth header? */ len = maxlen - (p - (u_char *)ip6); len &= ~7; /* This is just for simplicity. */ if (m0->m_pkthdr.len != m0->m_len) { if (m0->m_next) { m_freem(m0->m_next); m0->m_next = NULL; } m0->m_pkthdr.len = m0->m_len; } /* * Redirected header option spec (RFC2461 4.6.3) talks nothing * about padding/truncate rule for the original IP packet. * From the discussion on IPv6imp in Feb 1999, the consensus was: * - "attach as much as possible" is the goal * - pad if not aligned (original size can be guessed by original * ip6 header) * Following code adds the padding if it is simple enough, * and truncates if not. */ if (m0->m_next || m0->m_pkthdr.len != m0->m_len) panic("assumption failed in %s:%d", __FILE__, __LINE__); if (len - sizeof(*nd_opt_rh) < m0->m_pkthdr.len) { /* not enough room, truncate */ m0->m_pkthdr.len = m0->m_len = len - sizeof(*nd_opt_rh); } else { /* enough room, pad or truncate */ size_t extra; extra = m0->m_pkthdr.len % 8; if (extra) { /* pad if easy enough, truncate if not */ if (8 - extra <= M_TRAILINGSPACE(m0)) { /* pad */ m0->m_len += (8 - extra); m0->m_pkthdr.len += (8 - extra); } else { /* truncate */ m0->m_pkthdr.len -= extra; m0->m_len -= extra; } } len = m0->m_pkthdr.len + sizeof(*nd_opt_rh); m0->m_pkthdr.len = m0->m_len = len - sizeof(*nd_opt_rh); } nd_opt_rh = (struct nd_opt_rd_hdr *)p; bzero(nd_opt_rh, sizeof(*nd_opt_rh)); nd_opt_rh->nd_opt_rh_type = ND_OPT_REDIRECTED_HEADER; nd_opt_rh->nd_opt_rh_len = len >> 3; p += sizeof(*nd_opt_rh); m->m_pkthdr.len = m->m_len = p - (u_char *)ip6; /* connect m0 to m */ m_tag_delete_chain(m0, NULL); m0->m_flags &= ~M_PKTHDR; m->m_next = m0; m->m_pkthdr.len = m->m_len + m0->m_len; m0 = NULL; } noredhdropt:; if (m0) { m_freem(m0); m0 = NULL; } if (IN6_IS_ADDR_LINKLOCAL(&sip6->ip6_src)) sip6->ip6_src.s6_addr16[1] = 0; if (IN6_IS_ADDR_LINKLOCAL(&sip6->ip6_dst)) sip6->ip6_dst.s6_addr16[1] = 0; #if 0 if (IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_src)) ip6->ip6_src.s6_addr16[1] = 0; if (IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_dst)) ip6->ip6_dst.s6_addr16[1] = 0; #endif if (IN6_IS_ADDR_LINKLOCAL(&nd_rd->nd_rd_target)) nd_rd->nd_rd_target.s6_addr16[1] = 0; if (IN6_IS_ADDR_LINKLOCAL(&nd_rd->nd_rd_dst)) nd_rd->nd_rd_dst.s6_addr16[1] = 0; ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr)); nd_rd->nd_rd_cksum = 0; nd_rd->nd_rd_cksum = in6_cksum(m, IPPROTO_ICMPV6, sizeof(*ip6), ntohs(ip6->ip6_plen)); /* send the packet to outside... */ ip6_output(m, NULL, NULL, 0, NULL, &outif, NULL); if (outif) { icmp6_ifstat_inc(outif, ifs6_out_msg); icmp6_ifstat_inc(outif, ifs6_out_redirect); } icmp6stat.icp6s_outhist[ND_REDIRECT]++; return; fail: if (m) m_freem(m); if (m0) m_freem(m0); } #ifdef HAVE_NRL_INPCB #define sotoin6pcb sotoinpcb #define in6pcb inpcb #define in6p_icmp6filt inp_icmp6filt #endif /* * ICMPv6 socket option processing. */ int icmp6_ctloutput(so, sopt) struct socket *so; struct sockopt *sopt; { int error = 0; int optlen; struct inpcb *inp = sotoinpcb(so); int level, op, optname; if (sopt) { level = sopt->sopt_level; op = sopt->sopt_dir; optname = sopt->sopt_name; optlen = sopt->sopt_valsize; } else level = op = optname = optlen = 0; if (level != IPPROTO_ICMPV6) { return EINVAL; } switch (op) { case PRCO_SETOPT: switch (optname) { case ICMP6_FILTER: { struct icmp6_filter *p; if (optlen != sizeof(*p)) { error = EMSGSIZE; break; } if (inp->in6p_icmp6filt == NULL) { error = EINVAL; break; } error = sooptcopyin(sopt, inp->in6p_icmp6filt, optlen, optlen); break; } default: error = ENOPROTOOPT; break; } break; case PRCO_GETOPT: switch (optname) { case ICMP6_FILTER: { if (inp->in6p_icmp6filt == NULL) { error = EINVAL; break; } error = sooptcopyout(sopt, inp->in6p_icmp6filt, sizeof(struct icmp6_filter)); break; } default: error = ENOPROTOOPT; break; } break; } return(error); } #ifdef HAVE_NRL_INPCB #undef sotoin6pcb #undef in6pcb #undef in6p_icmp6filt #endif #ifndef HAVE_PPSRATECHECK #ifndef timersub #define timersub(tvp, uvp, vvp) \ do { \ (vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec; \ (vvp)->tv_usec = (tvp)->tv_usec - (uvp)->tv_usec; \ if ((vvp)->tv_usec < 0) { \ (vvp)->tv_sec--; \ (vvp)->tv_usec += 1000000; \ } \ } while (0) #endif /* * ppsratecheck(): packets (or events) per second limitation. */ static int ppsratecheck(lasttime, curpps, maxpps) struct timeval *lasttime; int *curpps; int maxpps; /* maximum pps allowed */ { struct timeval tv, delta; int s, rv; s = splclock(); microtime(&tv); splx(s); timersub(&tv, lasttime, &delta); /* * Check for 0,0 so that the message will be seen at least once. * If more than one second has passed since the last update of * lasttime, reset the counter. * * We do increment *curpps even in *curpps < maxpps case, as some may * try to use *curpps for stat purposes as well. */ if ((lasttime->tv_sec == 0 && lasttime->tv_usec == 0) || delta.tv_sec >= 1) { *lasttime = tv; *curpps = 0; rv = 1; } else if (maxpps < 0) rv = 1; else if (*curpps < maxpps) rv = 1; else rv = 0; #if 1 /* DIAGNOSTIC? */ /* be careful about wrap-around */ if (*curpps + 1 > *curpps) *curpps = *curpps + 1; #else /* * assume that there's not too many calls to this function. * not sure if the assumption holds, as it depends on *caller's* * behavior, not the behavior of this function. * IMHO it is wrong to make assumption on the caller's behavior, * so the above #if is #if 1, not #ifdef DIAGNOSTIC. */ *curpps = *curpps + 1; #endif return (rv); } #endif /* * Perform rate limit check. * Returns 0 if it is okay to send the icmp6 packet. * Returns 1 if the router SHOULD NOT send this icmp6 packet due to rate * limitation. * * XXX per-destination/type check necessary? */ static int icmp6_ratelimit(dst, type, code) const struct in6_addr *dst; /* not used at this moment */ const int type; /* not used at this moment */ const int code; /* not used at this moment */ { int ret; ret = 0; /* okay to send */ /* PPS limit */ if (!ppsratecheck(&icmp6errppslim_last, &icmp6errpps_count, icmp6errppslim)) { /* The packet is subject to rate limit */ ret++; } return ret; } Index: head/sys/netinet6/in6.c =================================================================== --- head/sys/netinet6/in6.c (revision 120726) +++ head/sys/netinet6/in6.c (revision 120727) @@ -1,2455 +1,2461 @@ /* $FreeBSD$ */ /* $KAME: in6.c,v 1.259 2002/01/21 11:37:50 keiichi Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)in.c 8.2 (Berkeley) 11/15/93 */ #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef SCOPEDROUTING #include #include #include #endif #include #include #include #include #include #include #include #ifndef SCOPEDROUTING #include #endif #include MALLOC_DEFINE(M_IPMADDR, "in6_multi", "internet multicast address"); /* * Definitions of some costant IP6 addresses. */ const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT; const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT; const struct in6_addr in6addr_nodelocal_allnodes = IN6ADDR_NODELOCAL_ALLNODES_INIT; const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT; const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT; const struct in6_addr in6mask0 = IN6MASK0; const struct in6_addr in6mask32 = IN6MASK32; const struct in6_addr in6mask64 = IN6MASK64; const struct in6_addr in6mask96 = IN6MASK96; const struct in6_addr in6mask128 = IN6MASK128; const struct sockaddr_in6 sa6_any = {sizeof(sa6_any), AF_INET6, 0, 0, IN6ADDR_ANY_INIT, 0}; static int in6_lifaddr_ioctl __P((struct socket *, u_long, caddr_t, struct ifnet *, struct thread *)); static int in6_ifinit __P((struct ifnet *, struct in6_ifaddr *, struct sockaddr_in6 *, int)); static void in6_unlink_ifa __P((struct in6_ifaddr *, struct ifnet *)); struct in6_multihead in6_multihead; /* XXX BSS initialization */ int (*faithprefix_p)(struct in6_addr *); /* * Subroutine for in6_ifaddloop() and in6_ifremloop(). * This routine does actual work. */ static void in6_ifloop_request(int cmd, struct ifaddr *ifa) { struct sockaddr_in6 all1_sa; struct rtentry *nrt = NULL; int e; bzero(&all1_sa, sizeof(all1_sa)); all1_sa.sin6_family = AF_INET6; all1_sa.sin6_len = sizeof(struct sockaddr_in6); all1_sa.sin6_addr = in6mask128; /* * We specify the address itself as the gateway, and set the * RTF_LLINFO flag, so that the corresponding host route would have * the flag, and thus applications that assume traditional behavior * would be happy. Note that we assume the caller of the function * (probably implicitly) set nd6_rtrequest() to ifa->ifa_rtrequest, * which changes the outgoing interface to the loopback interface. */ e = rtrequest(cmd, ifa->ifa_addr, ifa->ifa_addr, (struct sockaddr *)&all1_sa, RTF_UP|RTF_HOST|RTF_LLINFO, &nrt); if (e != 0) { log(LOG_ERR, "in6_ifloop_request: " "%s operation failed for %s (errno=%d)\n", cmd == RTM_ADD ? "ADD" : "DELETE", ip6_sprintf(&((struct in6_ifaddr *)ifa)->ia_addr.sin6_addr), e); } - /* - * Make sure rt_ifa be equal to IFA, the second argument of the - * function. - * We need this because when we refer to rt_ifa->ia6_flags in - * ip6_input, we assume that the rt_ifa points to the address instead - * of the loopback address. - */ - if (cmd == RTM_ADD && nrt && ifa != nrt->rt_ifa) { - IFAFREE(nrt->rt_ifa); - IFAREF(ifa); - nrt->rt_ifa = ifa; - } - - /* - * Report the addition/removal of the address to the routing socket. - * XXX: since we called rtinit for a p2p interface with a destination, - * we end up reporting twice in such a case. Should we rather - * omit the second report? - */ if (nrt) { + RT_LOCK(nrt); + /* + * Make sure rt_ifa be equal to IFA, the second argument of + * the function. We need this because when we refer to + * rt_ifa->ia6_flags in ip6_input, we assume that the rt_ifa + * points to the address instead of the loopback address. + */ + if (cmd == RTM_ADD && ifa != nrt->rt_ifa) { + IFAFREE(nrt->rt_ifa); + IFAREF(ifa); + nrt->rt_ifa = ifa; + } + + /* + * Report the addition/removal of the address to the routing + * socket. + * + * XXX: since we called rtinit for a p2p interface with a + * destination, we end up reporting twice in such a case. + * Should we rather omit the second report? + */ rt_newaddrmsg(cmd, ifa, e, nrt); if (cmd == RTM_DELETE) { - RTFREE(nrt); + rtfree(nrt); } else { /* the cmd must be RTM_ADD here */ nrt->rt_refcnt--; + RT_UNLOCK(nrt); } } } /* * Add ownaddr as loopback rtentry. We previously add the route only if * necessary (ex. on a p2p link). However, since we now manage addresses * separately from prefixes, we should always add the route. We can't * rely on the cloning mechanism from the corresponding interface route * any more. */ static void in6_ifaddloop(struct ifaddr *ifa) { struct rtentry *rt; /* If there is no loopback entry, allocate one. */ rt = rtalloc1(ifa->ifa_addr, 0, 0); if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0 || (rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0) in6_ifloop_request(RTM_ADD, ifa); if (rt) - rt->rt_refcnt--; + rtfree(rt); } /* * Remove loopback rtentry of ownaddr generated by in6_ifaddloop(), * if it exists. */ static void in6_ifremloop(struct ifaddr *ifa) { struct in6_ifaddr *ia; struct rtentry *rt; int ia_count = 0; /* * Some of BSD variants do not remove cloned routes * from an interface direct route, when removing the direct route * (see comments in net/net_osdep.h). Even for variants that do remove * cloned routes, they could fail to remove the cloned routes when * we handle multple addresses that share a common prefix. * So, we should remove the route corresponding to the deleted address * regardless of the result of in6_is_ifloop_auto(). */ /* * Delete the entry only if exact one ifa exists. More than one ifa * can exist if we assign a same single address to multiple * (probably p2p) interfaces. * XXX: we should avoid such a configuration in IPv6... */ for (ia = in6_ifaddr; ia; ia = ia->ia_next) { if (IN6_ARE_ADDR_EQUAL(IFA_IN6(ifa), &ia->ia_addr.sin6_addr)) { ia_count++; if (ia_count > 1) break; } } if (ia_count == 1) { /* * Before deleting, check if a corresponding loopbacked host * route surely exists. With this check, we can avoid to * delete an interface direct route whose destination is same * as the address being removed. This can happen when remofing * a subnet-router anycast address on an interface attahced * to a shared medium. */ rt = rtalloc1(ifa->ifa_addr, 0, 0); - if (rt != NULL && (rt->rt_flags & RTF_HOST) != 0 && - (rt->rt_ifp->if_flags & IFF_LOOPBACK) != 0) { - rt->rt_refcnt--; - in6_ifloop_request(RTM_DELETE, ifa); + if (rt != NULL) { + if ((rt->rt_flags & RTF_HOST) != 0 && + (rt->rt_ifp->if_flags & IFF_LOOPBACK) != 0) { + rtfree(rt); + in6_ifloop_request(RTM_DELETE, ifa); + } else + RT_UNLOCK(rt); } } } int in6_ifindex2scopeid(idx) int idx; { struct ifnet *ifp; struct ifaddr *ifa; struct sockaddr_in6 *sin6; if (idx < 0 || if_index < idx) return -1; ifp = ifnet_byindex(idx); TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; sin6 = (struct sockaddr_in6 *)ifa->ifa_addr; if (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr)) return sin6->sin6_scope_id & 0xffff; } return -1; } int in6_mask2len(mask, lim0) struct in6_addr *mask; u_char *lim0; { int x = 0, y; u_char *lim = lim0, *p; if (lim0 == NULL || lim0 - (u_char *)mask > sizeof(*mask)) /* ignore the scope_id part */ lim = (u_char *)mask + sizeof(*mask); for (p = (u_char *)mask; p < lim; x++, p++) { if (*p != 0xff) break; } y = 0; if (p < lim) { for (y = 0; y < 8; y++) { if ((*p & (0x80 >> y)) == 0) break; } } /* * when the limit pointer is given, do a stricter check on the * remaining bits. */ if (p < lim) { if (y != 0 && (*p & (0x00ff >> y)) != 0) return(-1); for (p = p + 1; p < lim; p++) if (*p != 0) return(-1); } return x * 8 + y; } void in6_len2mask(mask, len) struct in6_addr *mask; int len; { int i; bzero(mask, sizeof(*mask)); for (i = 0; i < len / 8; i++) mask->s6_addr8[i] = 0xff; if (len % 8) mask->s6_addr8[i] = (0xff00 >> (len % 8)) & 0xff; } #define ifa2ia6(ifa) ((struct in6_ifaddr *)(ifa)) #define ia62ifa(ia6) (&((ia6)->ia_ifa)) int in6_control(so, cmd, data, ifp, td) struct socket *so; u_long cmd; caddr_t data; struct ifnet *ifp; struct thread *td; { struct in6_ifreq *ifr = (struct in6_ifreq *)data; struct in6_ifaddr *ia = NULL; struct in6_aliasreq *ifra = (struct in6_aliasreq *)data; int privileged; privileged = 0; if (td == NULL || !suser(td)) privileged++; switch (cmd) { case SIOCGETSGCNT_IN6: case SIOCGETMIFCNT_IN6: return (mrt6_ioctl(cmd, data)); } if (ifp == NULL) return(EOPNOTSUPP); switch (cmd) { case SIOCSNDFLUSH_IN6: case SIOCSPFXFLUSH_IN6: case SIOCSRTRFLUSH_IN6: case SIOCSDEFIFACE_IN6: case SIOCSIFINFO_FLAGS: if (!privileged) return(EPERM); /* fall through */ case OSIOCGIFINFO_IN6: case SIOCGIFINFO_IN6: case SIOCGDRLST_IN6: case SIOCGPRLST_IN6: case SIOCGNBRINFO_IN6: case SIOCGDEFIFACE_IN6: return(nd6_ioctl(cmd, data, ifp)); } switch (cmd) { case SIOCSIFPREFIX_IN6: case SIOCDIFPREFIX_IN6: case SIOCAIFPREFIX_IN6: case SIOCCIFPREFIX_IN6: case SIOCSGIFPREFIX_IN6: case SIOCGIFPREFIX_IN6: log(LOG_NOTICE, "prefix ioctls are now invalidated. " "please use ifconfig.\n"); return(EOPNOTSUPP); } switch (cmd) { case SIOCSSCOPE6: if (!privileged) return(EPERM); return(scope6_set(ifp, ifr->ifr_ifru.ifru_scope_id)); break; case SIOCGSCOPE6: return(scope6_get(ifp, ifr->ifr_ifru.ifru_scope_id)); break; case SIOCGSCOPE6DEF: return(scope6_get_default(ifr->ifr_ifru.ifru_scope_id)); break; } switch (cmd) { case SIOCALIFADDR: case SIOCDLIFADDR: if (!privileged) return(EPERM); /* fall through */ case SIOCGLIFADDR: return in6_lifaddr_ioctl(so, cmd, data, ifp, td); } /* * Find address for this interface, if it exists. */ if (ifra->ifra_addr.sin6_family == AF_INET6) { /* XXX */ struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)&ifra->ifra_addr; if (IN6_IS_ADDR_LINKLOCAL(&sa6->sin6_addr)) { if (sa6->sin6_addr.s6_addr16[1] == 0) { /* link ID is not embedded by the user */ sa6->sin6_addr.s6_addr16[1] = htons(ifp->if_index); } else if (sa6->sin6_addr.s6_addr16[1] != htons(ifp->if_index)) { return(EINVAL); /* link ID contradicts */ } if (sa6->sin6_scope_id) { if (sa6->sin6_scope_id != (u_int32_t)ifp->if_index) return(EINVAL); sa6->sin6_scope_id = 0; /* XXX: good way? */ } } ia = in6ifa_ifpwithaddr(ifp, &ifra->ifra_addr.sin6_addr); } switch (cmd) { case SIOCSIFADDR_IN6: case SIOCSIFDSTADDR_IN6: case SIOCSIFNETMASK_IN6: /* * Since IPv6 allows a node to assign multiple addresses * on a single interface, SIOCSIFxxx ioctls are not suitable * and should be unused. */ /* we decided to obsolete this command (20000704) */ return(EINVAL); case SIOCDIFADDR_IN6: /* * for IPv4, we look for existing in_ifaddr here to allow * "ifconfig if0 delete" to remove first IPv4 address on the * interface. For IPv6, as the spec allow multiple interface * address from the day one, we consider "remove the first one" * semantics to be not preferable. */ if (ia == NULL) return(EADDRNOTAVAIL); /* FALLTHROUGH */ case SIOCAIFADDR_IN6: /* * We always require users to specify a valid IPv6 address for * the corresponding operation. */ if (ifra->ifra_addr.sin6_family != AF_INET6 || ifra->ifra_addr.sin6_len != sizeof(struct sockaddr_in6)) return(EAFNOSUPPORT); if (!privileged) return(EPERM); break; case SIOCGIFADDR_IN6: /* This interface is basically deprecated. use SIOCGIFCONF. */ /* fall through */ case SIOCGIFAFLAG_IN6: case SIOCGIFNETMASK_IN6: case SIOCGIFDSTADDR_IN6: case SIOCGIFALIFETIME_IN6: /* must think again about its semantics */ if (ia == NULL) return(EADDRNOTAVAIL); break; case SIOCSIFALIFETIME_IN6: { struct in6_addrlifetime *lt; if (!privileged) return(EPERM); if (ia == NULL) return(EADDRNOTAVAIL); /* sanity for overflow - beware unsigned */ lt = &ifr->ifr_ifru.ifru_lifetime; if (lt->ia6t_vltime != ND6_INFINITE_LIFETIME && lt->ia6t_vltime + time_second < time_second) { return EINVAL; } if (lt->ia6t_pltime != ND6_INFINITE_LIFETIME && lt->ia6t_pltime + time_second < time_second) { return EINVAL; } break; } } switch (cmd) { case SIOCGIFADDR_IN6: ifr->ifr_addr = ia->ia_addr; break; case SIOCGIFDSTADDR_IN6: if ((ifp->if_flags & IFF_POINTOPOINT) == 0) return(EINVAL); /* * XXX: should we check if ifa_dstaddr is NULL and return * an error? */ ifr->ifr_dstaddr = ia->ia_dstaddr; break; case SIOCGIFNETMASK_IN6: ifr->ifr_addr = ia->ia_prefixmask; break; case SIOCGIFAFLAG_IN6: ifr->ifr_ifru.ifru_flags6 = ia->ia6_flags; break; case SIOCGIFSTAT_IN6: if (ifp == NULL) return EINVAL; if (in6_ifstat == NULL || ifp->if_index >= in6_ifstatmax || in6_ifstat[ifp->if_index] == NULL) { /* return EAFNOSUPPORT? */ bzero(&ifr->ifr_ifru.ifru_stat, sizeof(ifr->ifr_ifru.ifru_stat)); } else ifr->ifr_ifru.ifru_stat = *in6_ifstat[ifp->if_index]; break; case SIOCGIFSTAT_ICMP6: if (ifp == NULL) return EINVAL; if (icmp6_ifstat == NULL || ifp->if_index >= icmp6_ifstatmax || icmp6_ifstat[ifp->if_index] == NULL) { /* return EAFNOSUPPORT? */ bzero(&ifr->ifr_ifru.ifru_stat, sizeof(ifr->ifr_ifru.ifru_icmp6stat)); } else ifr->ifr_ifru.ifru_icmp6stat = *icmp6_ifstat[ifp->if_index]; break; case SIOCGIFALIFETIME_IN6: ifr->ifr_ifru.ifru_lifetime = ia->ia6_lifetime; break; case SIOCSIFALIFETIME_IN6: ia->ia6_lifetime = ifr->ifr_ifru.ifru_lifetime; /* for sanity */ if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) { ia->ia6_lifetime.ia6t_expire = time_second + ia->ia6_lifetime.ia6t_vltime; } else ia->ia6_lifetime.ia6t_expire = 0; if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) { ia->ia6_lifetime.ia6t_preferred = time_second + ia->ia6_lifetime.ia6t_pltime; } else ia->ia6_lifetime.ia6t_preferred = 0; break; case SIOCAIFADDR_IN6: { int i, error = 0; struct nd_prefix pr0, *pr; /* * first, make or update the interface address structure, * and link it to the list. */ if ((error = in6_update_ifa(ifp, ifra, ia)) != 0) return(error); /* * then, make the prefix on-link on the interface. * XXX: we'd rather create the prefix before the address, but * we need at least one address to install the corresponding * interface route, so we configure the address first. */ /* * convert mask to prefix length (prefixmask has already * been validated in in6_update_ifa(). */ bzero(&pr0, sizeof(pr0)); pr0.ndpr_ifp = ifp; pr0.ndpr_plen = in6_mask2len(&ifra->ifra_prefixmask.sin6_addr, NULL); if (pr0.ndpr_plen == 128) break; /* we don't need to install a host route. */ pr0.ndpr_prefix = ifra->ifra_addr; pr0.ndpr_mask = ifra->ifra_prefixmask.sin6_addr; /* apply the mask for safety. */ for (i = 0; i < 4; i++) { pr0.ndpr_prefix.sin6_addr.s6_addr32[i] &= ifra->ifra_prefixmask.sin6_addr.s6_addr32[i]; } /* * XXX: since we don't have an API to set prefix (not address) * lifetimes, we just use the same lifetimes as addresses. * The (temporarily) installed lifetimes can be overridden by * later advertised RAs (when accept_rtadv is non 0), which is * an intended behavior. */ pr0.ndpr_raf_onlink = 1; /* should be configurable? */ pr0.ndpr_raf_auto = ((ifra->ifra_flags & IN6_IFF_AUTOCONF) != 0); pr0.ndpr_vltime = ifra->ifra_lifetime.ia6t_vltime; pr0.ndpr_pltime = ifra->ifra_lifetime.ia6t_pltime; /* add the prefix if there's one. */ if ((pr = nd6_prefix_lookup(&pr0)) == NULL) { /* * nd6_prelist_add will install the corresponding * interface route. */ if ((error = nd6_prelist_add(&pr0, NULL, &pr)) != 0) return(error); if (pr == NULL) { log(LOG_ERR, "nd6_prelist_add succedded but " "no prefix\n"); return(EINVAL); /* XXX panic here? */ } } if ((ia = in6ifa_ifpwithaddr(ifp, &ifra->ifra_addr.sin6_addr)) == NULL) { /* XXX: this should not happen! */ log(LOG_ERR, "in6_control: addition succeeded, but" " no ifaddr\n"); } else { if ((ia->ia6_flags & IN6_IFF_AUTOCONF) != 0 && ia->ia6_ndpr == NULL) { /* new autoconfed addr */ ia->ia6_ndpr = pr; pr->ndpr_refcnt++; /* * If this is the first autoconf address from * the prefix, create a temporary address * as well (when specified). */ if (ip6_use_tempaddr && pr->ndpr_refcnt == 1) { int e; if ((e = in6_tmpifadd(ia, 1)) != 0) { log(LOG_NOTICE, "in6_control: " "failed to create a " "temporary address, " "errno=%d\n", e); } } } /* * this might affect the status of autoconfigured * addresses, that is, this address might make * other addresses detached. */ pfxlist_onlink_check(); } break; } case SIOCDIFADDR_IN6: { int i = 0; struct nd_prefix pr0, *pr; /* * If the address being deleted is the only one that owns * the corresponding prefix, expire the prefix as well. * XXX: theoretically, we don't have to warry about such * relationship, since we separate the address management * and the prefix management. We do this, however, to provide * as much backward compatibility as possible in terms of * the ioctl operation. */ bzero(&pr0, sizeof(pr0)); pr0.ndpr_ifp = ifp; pr0.ndpr_plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); if (pr0.ndpr_plen == 128) goto purgeaddr; pr0.ndpr_prefix = ia->ia_addr; pr0.ndpr_mask = ia->ia_prefixmask.sin6_addr; for (i = 0; i < 4; i++) { pr0.ndpr_prefix.sin6_addr.s6_addr32[i] &= ia->ia_prefixmask.sin6_addr.s6_addr32[i]; } /* * The logic of the following condition is a bit complicated. * We expire the prefix when * 1. the address obeys autoconfiguration and it is the * only owner of the associated prefix, or * 2. the address does not obey autoconf and there is no * other owner of the prefix. */ if ((pr = nd6_prefix_lookup(&pr0)) != NULL && (((ia->ia6_flags & IN6_IFF_AUTOCONF) != 0 && pr->ndpr_refcnt == 1) || ((ia->ia6_flags & IN6_IFF_AUTOCONF) == 0 && pr->ndpr_refcnt == 0))) { pr->ndpr_expire = 1; /* XXX: just for expiration */ } purgeaddr: in6_purgeaddr(&ia->ia_ifa); break; } default: if (ifp == NULL || ifp->if_ioctl == 0) return(EOPNOTSUPP); return((*ifp->if_ioctl)(ifp, cmd, data)); } return(0); } /* * Update parameters of an IPv6 interface address. * If necessary, a new entry is created and linked into address chains. * This function is separated from in6_control(). * XXX: should this be performed under splnet()? */ int in6_update_ifa(ifp, ifra, ia) struct ifnet *ifp; struct in6_aliasreq *ifra; struct in6_ifaddr *ia; { int error = 0, hostIsNew = 0, plen = -1; struct in6_ifaddr *oia; struct sockaddr_in6 dst6; struct in6_addrlifetime *lt; /* Validate parameters */ if (ifp == NULL || ifra == NULL) /* this maybe redundant */ return(EINVAL); /* * The destination address for a p2p link must have a family * of AF_UNSPEC or AF_INET6. */ if ((ifp->if_flags & IFF_POINTOPOINT) != 0 && ifra->ifra_dstaddr.sin6_family != AF_INET6 && ifra->ifra_dstaddr.sin6_family != AF_UNSPEC) return(EAFNOSUPPORT); /* * validate ifra_prefixmask. don't check sin6_family, netmask * does not carry fields other than sin6_len. */ if (ifra->ifra_prefixmask.sin6_len > sizeof(struct sockaddr_in6)) return(EINVAL); /* * Because the IPv6 address architecture is classless, we require * users to specify a (non 0) prefix length (mask) for a new address. * We also require the prefix (when specified) mask is valid, and thus * reject a non-consecutive mask. */ if (ia == NULL && ifra->ifra_prefixmask.sin6_len == 0) return(EINVAL); if (ifra->ifra_prefixmask.sin6_len != 0) { plen = in6_mask2len(&ifra->ifra_prefixmask.sin6_addr, (u_char *)&ifra->ifra_prefixmask + ifra->ifra_prefixmask.sin6_len); if (plen <= 0) return(EINVAL); } else { /* * In this case, ia must not be NULL. We just use its prefix * length. */ plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); } /* * If the destination address on a p2p interface is specified, * and the address is a scoped one, validate/set the scope * zone identifier. */ dst6 = ifra->ifra_dstaddr; if ((ifp->if_flags & (IFF_POINTOPOINT|IFF_LOOPBACK)) && (dst6.sin6_family == AF_INET6)) { int scopeid; #ifndef SCOPEDROUTING if ((error = in6_recoverscope(&dst6, &ifra->ifra_dstaddr.sin6_addr, ifp)) != 0) return(error); #endif scopeid = in6_addr2scopeid(ifp, &dst6.sin6_addr); if (dst6.sin6_scope_id == 0) /* user omit to specify the ID. */ dst6.sin6_scope_id = scopeid; else if (dst6.sin6_scope_id != scopeid) return(EINVAL); /* scope ID mismatch. */ #ifndef SCOPEDROUTING if ((error = in6_embedscope(&dst6.sin6_addr, &dst6, NULL, NULL)) != 0) return(error); dst6.sin6_scope_id = 0; /* XXX */ #endif } /* * The destination address can be specified only for a p2p or a * loopback interface. If specified, the corresponding prefix length * must be 128. */ if (ifra->ifra_dstaddr.sin6_family == AF_INET6) { if ((ifp->if_flags & (IFF_POINTOPOINT|IFF_LOOPBACK)) == 0) { /* XXX: noisy message */ log(LOG_INFO, "in6_update_ifa: a destination can be " "specified for a p2p or a loopback IF only\n"); return(EINVAL); } if (plen != 128) { /* * The following message seems noisy, but we dare to * add it for diagnosis. */ log(LOG_INFO, "in6_update_ifa: prefixlen must be 128 " "when dstaddr is specified\n"); return(EINVAL); } } /* lifetime consistency check */ lt = &ifra->ifra_lifetime; if (lt->ia6t_vltime != ND6_INFINITE_LIFETIME && lt->ia6t_vltime + time_second < time_second) { return EINVAL; } if (lt->ia6t_vltime == 0) { /* * the following log might be noisy, but this is a typical * configuration mistake or a tool's bug. */ log(LOG_INFO, "in6_update_ifa: valid lifetime is 0 for %s\n", ip6_sprintf(&ifra->ifra_addr.sin6_addr)); } if (lt->ia6t_pltime != ND6_INFINITE_LIFETIME && lt->ia6t_pltime + time_second < time_second) { return EINVAL; } /* * If this is a new address, allocate a new ifaddr and link it * into chains. */ if (ia == NULL) { hostIsNew = 1; /* * When in6_update_ifa() is called in a process of a received * RA, it is called under splnet(). So, we should call malloc * with M_NOWAIT. */ ia = (struct in6_ifaddr *) malloc(sizeof(*ia), M_IFADDR, M_NOWAIT); if (ia == NULL) return (ENOBUFS); bzero((caddr_t)ia, sizeof(*ia)); /* Initialize the address and masks */ IFA_LOCK_INIT(&ia->ia_ifa); ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr; ia->ia_addr.sin6_family = AF_INET6; ia->ia_addr.sin6_len = sizeof(ia->ia_addr); if ((ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) != 0) { /* * XXX: some functions expect that ifa_dstaddr is not * NULL for p2p interfaces. */ ia->ia_ifa.ifa_dstaddr = (struct sockaddr *)&ia->ia_dstaddr; } else { ia->ia_ifa.ifa_dstaddr = NULL; } ia->ia_ifa.ifa_netmask = (struct sockaddr *)&ia->ia_prefixmask; ia->ia_ifp = ifp; if ((oia = in6_ifaddr) != NULL) { for ( ; oia->ia_next; oia = oia->ia_next) continue; oia->ia_next = ia; } else in6_ifaddr = ia; ia->ia_ifa.ifa_refcnt = 1; TAILQ_INSERT_TAIL(&ifp->if_addrlist, &ia->ia_ifa, ifa_list); } /* set prefix mask */ if (ifra->ifra_prefixmask.sin6_len) { /* * We prohibit changing the prefix length of an existing * address, because * + such an operation should be rare in IPv6, and * + the operation would confuse prefix management. */ if (ia->ia_prefixmask.sin6_len && in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL) != plen) { log(LOG_INFO, "in6_update_ifa: the prefix length of an" " existing (%s) address should not be changed\n", ip6_sprintf(&ia->ia_addr.sin6_addr)); error = EINVAL; goto unlink; } ia->ia_prefixmask = ifra->ifra_prefixmask; } /* * If a new destination address is specified, scrub the old one and * install the new destination. Note that the interface must be * p2p or loopback (see the check above.) */ if (dst6.sin6_family == AF_INET6 && !IN6_ARE_ADDR_EQUAL(&dst6.sin6_addr, &ia->ia_dstaddr.sin6_addr)) { int e; if ((ia->ia_flags & IFA_ROUTE) != 0 && (e = rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST)) != 0) { log(LOG_ERR, "in6_update_ifa: failed to remove " "a route to the old destination: %s\n", ip6_sprintf(&ia->ia_addr.sin6_addr)); /* proceed anyway... */ } else ia->ia_flags &= ~IFA_ROUTE; ia->ia_dstaddr = dst6; } /* reset the interface and routing table appropriately. */ if ((error = in6_ifinit(ifp, ia, &ifra->ifra_addr, hostIsNew)) != 0) goto unlink; /* * Beyond this point, we should call in6_purgeaddr upon an error, * not just go to unlink. */ #if 0 /* disable this mechanism for now */ /* update prefix list */ if (hostIsNew && (ifra->ifra_flags & IN6_IFF_NOPFX) == 0) { /* XXX */ int iilen; iilen = (sizeof(ia->ia_prefixmask.sin6_addr) << 3) - plen; if ((error = in6_prefix_add_ifid(iilen, ia)) != 0) { in6_purgeaddr((struct ifaddr *)ia); return(error); } } #endif if ((ifp->if_flags & IFF_MULTICAST) != 0) { struct sockaddr_in6 mltaddr, mltmask; struct in6_multi *in6m; if (hostIsNew) { /* * join solicited multicast addr for new host id */ struct in6_addr llsol; bzero(&llsol, sizeof(struct in6_addr)); llsol.s6_addr16[0] = htons(0xff02); llsol.s6_addr16[1] = htons(ifp->if_index); llsol.s6_addr32[1] = 0; llsol.s6_addr32[2] = htonl(1); llsol.s6_addr32[3] = ifra->ifra_addr.sin6_addr.s6_addr32[3]; llsol.s6_addr8[12] = 0xff; (void)in6_addmulti(&llsol, ifp, &error); if (error != 0) { log(LOG_WARNING, "in6_update_ifa: addmulti failed for " "%s on %s (errno=%d)\n", ip6_sprintf(&llsol), if_name(ifp), error); in6_purgeaddr((struct ifaddr *)ia); return(error); } } bzero(&mltmask, sizeof(mltmask)); mltmask.sin6_len = sizeof(struct sockaddr_in6); mltmask.sin6_family = AF_INET6; mltmask.sin6_addr = in6mask32; /* * join link-local all-nodes address */ bzero(&mltaddr, sizeof(mltaddr)); mltaddr.sin6_len = sizeof(struct sockaddr_in6); mltaddr.sin6_family = AF_INET6; mltaddr.sin6_addr = in6addr_linklocal_allnodes; mltaddr.sin6_addr.s6_addr16[1] = htons(ifp->if_index); IN6_LOOKUP_MULTI(mltaddr.sin6_addr, ifp, in6m); if (in6m == NULL) { rtrequest(RTM_ADD, (struct sockaddr *)&mltaddr, (struct sockaddr *)&ia->ia_addr, (struct sockaddr *)&mltmask, RTF_UP|RTF_CLONING, /* xxx */ (struct rtentry **)0); (void)in6_addmulti(&mltaddr.sin6_addr, ifp, &error); if (error != 0) { log(LOG_WARNING, "in6_update_ifa: addmulti failed for " "%s on %s (errno=%d)\n", ip6_sprintf(&mltaddr.sin6_addr), if_name(ifp), error); } } /* * join node information group address */ #define hostnamelen strlen(hostname) if (in6_nigroup(ifp, hostname, hostnamelen, &mltaddr.sin6_addr) == 0) { IN6_LOOKUP_MULTI(mltaddr.sin6_addr, ifp, in6m); if (in6m == NULL && ia != NULL) { (void)in6_addmulti(&mltaddr.sin6_addr, ifp, &error); if (error != 0) { log(LOG_WARNING, "in6_update_ifa: " "addmulti failed for " "%s on %s (errno=%d)\n", ip6_sprintf(&mltaddr.sin6_addr), if_name(ifp), error); } } } #undef hostnamelen /* * join node-local all-nodes address, on loopback. * XXX: since "node-local" is obsoleted by interface-local, * we have to join the group on every interface with * some interface-boundary restriction. */ if (ifp->if_flags & IFF_LOOPBACK) { struct in6_ifaddr *ia_loop; struct in6_addr loop6 = in6addr_loopback; ia_loop = in6ifa_ifpwithaddr(ifp, &loop6); mltaddr.sin6_addr = in6addr_nodelocal_allnodes; IN6_LOOKUP_MULTI(mltaddr.sin6_addr, ifp, in6m); if (in6m == NULL && ia_loop != NULL) { rtrequest(RTM_ADD, (struct sockaddr *)&mltaddr, (struct sockaddr *)&ia_loop->ia_addr, (struct sockaddr *)&mltmask, RTF_UP, (struct rtentry **)0); (void)in6_addmulti(&mltaddr.sin6_addr, ifp, &error); if (error != 0) { log(LOG_WARNING, "in6_update_ifa: " "addmulti failed for %s on %s " "(errno=%d)\n", ip6_sprintf(&mltaddr.sin6_addr), if_name(ifp), error); } } } } ia->ia6_flags = ifra->ifra_flags; ia->ia6_flags &= ~IN6_IFF_DUPLICATED; /*safety*/ ia->ia6_flags &= ~IN6_IFF_NODAD; /* Mobile IPv6 */ ia->ia6_lifetime = ifra->ifra_lifetime; /* for sanity */ if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) { ia->ia6_lifetime.ia6t_expire = time_second + ia->ia6_lifetime.ia6t_vltime; } else ia->ia6_lifetime.ia6t_expire = 0; if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) { ia->ia6_lifetime.ia6t_preferred = time_second + ia->ia6_lifetime.ia6t_pltime; } else ia->ia6_lifetime.ia6t_preferred = 0; /* * make sure to initialize ND6 information. this is to workaround * issues with interfaces with IPv6 addresses, which have never brought * up. We are assuming that it is safe to nd6_ifattach multiple times. */ nd6_ifattach(ifp); /* * Perform DAD, if needed. * XXX It may be of use, if we can administratively * disable DAD. */ if (in6if_do_dad(ifp) && (ifra->ifra_flags & IN6_IFF_NODAD) == 0) { ia->ia6_flags |= IN6_IFF_TENTATIVE; nd6_dad_start((struct ifaddr *)ia, NULL); } return(error); unlink: /* * XXX: if a change of an existing address failed, keep the entry * anyway. */ if (hostIsNew) in6_unlink_ifa(ia, ifp); return(error); } void in6_purgeaddr(ifa) struct ifaddr *ifa; { struct ifnet *ifp = ifa->ifa_ifp; struct in6_ifaddr *ia = (struct in6_ifaddr *) ifa; /* stop DAD processing */ nd6_dad_stop(ifa); /* * delete route to the destination of the address being purged. * The interface must be p2p or loopback in this case. */ if ((ia->ia_flags & IFA_ROUTE) != 0 && ia->ia_dstaddr.sin6_len != 0) { int e; if ((e = rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST)) != 0) { log(LOG_ERR, "in6_purgeaddr: failed to remove " "a route to the p2p destination: %s on %s, " "errno=%d\n", ip6_sprintf(&ia->ia_addr.sin6_addr), if_name(ifp), e); /* proceed anyway... */ } else ia->ia_flags &= ~IFA_ROUTE; } /* Remove ownaddr's loopback rtentry, if it exists. */ in6_ifremloop(&(ia->ia_ifa)); if (ifp->if_flags & IFF_MULTICAST) { /* * delete solicited multicast addr for deleting host id */ struct in6_multi *in6m; struct in6_addr llsol; bzero(&llsol, sizeof(struct in6_addr)); llsol.s6_addr16[0] = htons(0xff02); llsol.s6_addr16[1] = htons(ifp->if_index); llsol.s6_addr32[1] = 0; llsol.s6_addr32[2] = htonl(1); llsol.s6_addr32[3] = ia->ia_addr.sin6_addr.s6_addr32[3]; llsol.s6_addr8[12] = 0xff; IN6_LOOKUP_MULTI(llsol, ifp, in6m); if (in6m) in6_delmulti(in6m); } in6_unlink_ifa(ia, ifp); } static void in6_unlink_ifa(ia, ifp) struct in6_ifaddr *ia; struct ifnet *ifp; { int plen, iilen; struct in6_ifaddr *oia; int s = splnet(); TAILQ_REMOVE(&ifp->if_addrlist, &ia->ia_ifa, ifa_list); oia = ia; if (oia == (ia = in6_ifaddr)) in6_ifaddr = ia->ia_next; else { while (ia->ia_next && (ia->ia_next != oia)) ia = ia->ia_next; if (ia->ia_next) ia->ia_next = oia->ia_next; else { /* search failed */ printf("Couldn't unlink in6_ifaddr from in6_ifaddr\n"); } } if (oia->ia6_ifpr) { /* check for safety */ plen = in6_mask2len(&oia->ia_prefixmask.sin6_addr, NULL); iilen = (sizeof(oia->ia_prefixmask.sin6_addr) << 3) - plen; in6_prefix_remove_ifid(iilen, oia); } /* * When an autoconfigured address is being removed, release the * reference to the base prefix. Also, since the release might * affect the status of other (detached) addresses, call * pfxlist_onlink_check(). */ if ((oia->ia6_flags & IN6_IFF_AUTOCONF) != 0) { if (oia->ia6_ndpr == NULL) { log(LOG_NOTICE, "in6_unlink_ifa: autoconf'ed address " "%p has no prefix\n", oia); } else { oia->ia6_ndpr->ndpr_refcnt--; oia->ia6_flags &= ~IN6_IFF_AUTOCONF; oia->ia6_ndpr = NULL; } pfxlist_onlink_check(); } /* * release another refcnt for the link from in6_ifaddr. * Note that we should decrement the refcnt at least once for all *BSD. */ IFAFREE(&oia->ia_ifa); splx(s); } void in6_purgeif(ifp) struct ifnet *ifp; { struct ifaddr *ifa, *nifa; for (ifa = TAILQ_FIRST(&ifp->if_addrlist); ifa != NULL; ifa = nifa) { nifa = TAILQ_NEXT(ifa, ifa_list); if (ifa->ifa_addr->sa_family != AF_INET6) continue; in6_purgeaddr(ifa); } in6_ifdetach(ifp); } /* * SIOC[GAD]LIFADDR. * SIOCGLIFADDR: get first address. (?) * SIOCGLIFADDR with IFLR_PREFIX: * get first address that matches the specified prefix. * SIOCALIFADDR: add the specified address. * SIOCALIFADDR with IFLR_PREFIX: * add the specified prefix, filling hostid part from * the first link-local address. prefixlen must be <= 64. * SIOCDLIFADDR: delete the specified address. * SIOCDLIFADDR with IFLR_PREFIX: * delete the first address that matches the specified prefix. * return values: * EINVAL on invalid parameters * EADDRNOTAVAIL on prefix match failed/specified address not found * other values may be returned from in6_ioctl() * * NOTE: SIOCALIFADDR(with IFLR_PREFIX set) allows prefixlen less than 64. * this is to accomodate address naming scheme other than RFC2374, * in the future. * RFC2373 defines interface id to be 64bit, but it allows non-RFC2374 * address encoding scheme. (see figure on page 8) */ static int in6_lifaddr_ioctl(so, cmd, data, ifp, td) struct socket *so; u_long cmd; caddr_t data; struct ifnet *ifp; struct thread *td; { struct if_laddrreq *iflr = (struct if_laddrreq *)data; struct ifaddr *ifa; struct sockaddr *sa; /* sanity checks */ if (!data || !ifp) { panic("invalid argument to in6_lifaddr_ioctl"); /*NOTRECHED*/ } switch (cmd) { case SIOCGLIFADDR: /* address must be specified on GET with IFLR_PREFIX */ if ((iflr->flags & IFLR_PREFIX) == 0) break; /* FALLTHROUGH */ case SIOCALIFADDR: case SIOCDLIFADDR: /* address must be specified on ADD and DELETE */ sa = (struct sockaddr *)&iflr->addr; if (sa->sa_family != AF_INET6) return EINVAL; if (sa->sa_len != sizeof(struct sockaddr_in6)) return EINVAL; /* XXX need improvement */ sa = (struct sockaddr *)&iflr->dstaddr; if (sa->sa_family && sa->sa_family != AF_INET6) return EINVAL; if (sa->sa_len && sa->sa_len != sizeof(struct sockaddr_in6)) return EINVAL; break; default: /* shouldn't happen */ #if 0 panic("invalid cmd to in6_lifaddr_ioctl"); /* NOTREACHED */ #else return EOPNOTSUPP; #endif } if (sizeof(struct in6_addr) * 8 < iflr->prefixlen) return EINVAL; switch (cmd) { case SIOCALIFADDR: { struct in6_aliasreq ifra; struct in6_addr *hostid = NULL; int prefixlen; if ((iflr->flags & IFLR_PREFIX) != 0) { struct sockaddr_in6 *sin6; /* * hostid is to fill in the hostid part of the * address. hostid points to the first link-local * address attached to the interface. */ ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, 0); if (!ifa) return EADDRNOTAVAIL; hostid = IFA_IN6(ifa); /* prefixlen must be <= 64. */ if (64 < iflr->prefixlen) return EINVAL; prefixlen = iflr->prefixlen; /* hostid part must be zero. */ sin6 = (struct sockaddr_in6 *)&iflr->addr; if (sin6->sin6_addr.s6_addr32[2] != 0 || sin6->sin6_addr.s6_addr32[3] != 0) { return EINVAL; } } else prefixlen = iflr->prefixlen; /* copy args to in6_aliasreq, perform ioctl(SIOCAIFADDR_IN6). */ bzero(&ifra, sizeof(ifra)); bcopy(iflr->iflr_name, ifra.ifra_name, sizeof(ifra.ifra_name)); bcopy(&iflr->addr, &ifra.ifra_addr, ((struct sockaddr *)&iflr->addr)->sa_len); if (hostid) { /* fill in hostid part */ ifra.ifra_addr.sin6_addr.s6_addr32[2] = hostid->s6_addr32[2]; ifra.ifra_addr.sin6_addr.s6_addr32[3] = hostid->s6_addr32[3]; } if (((struct sockaddr *)&iflr->dstaddr)->sa_family) { /*XXX*/ bcopy(&iflr->dstaddr, &ifra.ifra_dstaddr, ((struct sockaddr *)&iflr->dstaddr)->sa_len); if (hostid) { ifra.ifra_dstaddr.sin6_addr.s6_addr32[2] = hostid->s6_addr32[2]; ifra.ifra_dstaddr.sin6_addr.s6_addr32[3] = hostid->s6_addr32[3]; } } ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6); in6_len2mask(&ifra.ifra_prefixmask.sin6_addr, prefixlen); ifra.ifra_flags = iflr->flags & ~IFLR_PREFIX; return in6_control(so, SIOCAIFADDR_IN6, (caddr_t)&ifra, ifp, td); } case SIOCGLIFADDR: case SIOCDLIFADDR: { struct in6_ifaddr *ia; struct in6_addr mask, candidate, match; struct sockaddr_in6 *sin6; int cmp; bzero(&mask, sizeof(mask)); if (iflr->flags & IFLR_PREFIX) { /* lookup a prefix rather than address. */ in6_len2mask(&mask, iflr->prefixlen); sin6 = (struct sockaddr_in6 *)&iflr->addr; bcopy(&sin6->sin6_addr, &match, sizeof(match)); match.s6_addr32[0] &= mask.s6_addr32[0]; match.s6_addr32[1] &= mask.s6_addr32[1]; match.s6_addr32[2] &= mask.s6_addr32[2]; match.s6_addr32[3] &= mask.s6_addr32[3]; /* if you set extra bits, that's wrong */ if (bcmp(&match, &sin6->sin6_addr, sizeof(match))) return EINVAL; cmp = 1; } else { if (cmd == SIOCGLIFADDR) { /* on getting an address, take the 1st match */ cmp = 0; /* XXX */ } else { /* on deleting an address, do exact match */ in6_len2mask(&mask, 128); sin6 = (struct sockaddr_in6 *)&iflr->addr; bcopy(&sin6->sin6_addr, &match, sizeof(match)); cmp = 1; } } TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; if (!cmp) break; bcopy(IFA_IN6(ifa), &candidate, sizeof(candidate)); #ifndef SCOPEDROUTING /* * XXX: this is adhoc, but is necessary to allow * a user to specify fe80::/64 (not /10) for a * link-local address. */ if (IN6_IS_ADDR_LINKLOCAL(&candidate)) candidate.s6_addr16[1] = 0; #endif candidate.s6_addr32[0] &= mask.s6_addr32[0]; candidate.s6_addr32[1] &= mask.s6_addr32[1]; candidate.s6_addr32[2] &= mask.s6_addr32[2]; candidate.s6_addr32[3] &= mask.s6_addr32[3]; if (IN6_ARE_ADDR_EQUAL(&candidate, &match)) break; } if (!ifa) return EADDRNOTAVAIL; ia = ifa2ia6(ifa); if (cmd == SIOCGLIFADDR) { #ifndef SCOPEDROUTING struct sockaddr_in6 *s6; #endif /* fill in the if_laddrreq structure */ bcopy(&ia->ia_addr, &iflr->addr, ia->ia_addr.sin6_len); #ifndef SCOPEDROUTING /* XXX see above */ s6 = (struct sockaddr_in6 *)&iflr->addr; if (IN6_IS_ADDR_LINKLOCAL(&s6->sin6_addr)) { s6->sin6_addr.s6_addr16[1] = 0; s6->sin6_scope_id = in6_addr2scopeid(ifp, &s6->sin6_addr); } #endif if ((ifp->if_flags & IFF_POINTOPOINT) != 0) { bcopy(&ia->ia_dstaddr, &iflr->dstaddr, ia->ia_dstaddr.sin6_len); #ifndef SCOPEDROUTING /* XXX see above */ s6 = (struct sockaddr_in6 *)&iflr->dstaddr; if (IN6_IS_ADDR_LINKLOCAL(&s6->sin6_addr)) { s6->sin6_addr.s6_addr16[1] = 0; s6->sin6_scope_id = in6_addr2scopeid(ifp, &s6->sin6_addr); } #endif } else bzero(&iflr->dstaddr, sizeof(iflr->dstaddr)); iflr->prefixlen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); iflr->flags = ia->ia6_flags; /* XXX */ return 0; } else { struct in6_aliasreq ifra; /* fill in6_aliasreq and do ioctl(SIOCDIFADDR_IN6) */ bzero(&ifra, sizeof(ifra)); bcopy(iflr->iflr_name, ifra.ifra_name, sizeof(ifra.ifra_name)); bcopy(&ia->ia_addr, &ifra.ifra_addr, ia->ia_addr.sin6_len); if ((ifp->if_flags & IFF_POINTOPOINT) != 0) { bcopy(&ia->ia_dstaddr, &ifra.ifra_dstaddr, ia->ia_dstaddr.sin6_len); } else { bzero(&ifra.ifra_dstaddr, sizeof(ifra.ifra_dstaddr)); } bcopy(&ia->ia_prefixmask, &ifra.ifra_dstaddr, ia->ia_prefixmask.sin6_len); ifra.ifra_flags = ia->ia6_flags; return in6_control(so, SIOCDIFADDR_IN6, (caddr_t)&ifra, ifp, td); } } } return EOPNOTSUPP; /* just for safety */ } /* * Initialize an interface's intetnet6 address * and routing table entry. */ static int in6_ifinit(ifp, ia, sin6, newhost) struct ifnet *ifp; struct in6_ifaddr *ia; struct sockaddr_in6 *sin6; int newhost; { int error = 0, plen, ifacount = 0; int s = splimp(); struct ifaddr *ifa; /* * Give the interface a chance to initialize * if this is its first address, * and to validate the address if necessary. */ TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { if (ifa->ifa_addr == NULL) continue; /* just for safety */ if (ifa->ifa_addr->sa_family != AF_INET6) continue; ifacount++; } ia->ia_addr = *sin6; if (ifacount <= 1 && ifp->if_ioctl && (error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia))) { splx(s); return(error); } splx(s); ia->ia_ifa.ifa_metric = ifp->if_metric; /* we could do in(6)_socktrim here, but just omit it at this moment. */ /* * Special case: * If the destination address is specified for a point-to-point * interface, install a route to the destination as an interface * direct route. */ plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); /* XXX */ if (plen == 128 && ia->ia_dstaddr.sin6_family == AF_INET6) { if ((error = rtinit(&(ia->ia_ifa), (int)RTM_ADD, RTF_UP | RTF_HOST)) != 0) return(error); ia->ia_flags |= IFA_ROUTE; } if (plen < 128) { /* * The RTF_CLONING flag is necessary for in6_is_ifloop_auto(). */ ia->ia_ifa.ifa_flags |= RTF_CLONING; } /* Add ownaddr as loopback rtentry, if necessary (ex. on p2p link). */ if (newhost) { /* set the rtrequest function to create llinfo */ ia->ia_ifa.ifa_rtrequest = nd6_rtrequest; in6_ifaddloop(&(ia->ia_ifa)); } return(error); } /* * Add an address to the list of IP6 multicast addresses for a * given interface. */ struct in6_multi * in6_addmulti(maddr6, ifp, errorp) struct in6_addr *maddr6; struct ifnet *ifp; int *errorp; { struct in6_multi *in6m; struct sockaddr_in6 sin6; struct ifmultiaddr *ifma; int s = splnet(); *errorp = 0; /* * Call generic routine to add membership or increment * refcount. It wants addresses in the form of a sockaddr, * so we build one here (being careful to zero the unused bytes). */ bzero(&sin6, sizeof sin6); sin6.sin6_family = AF_INET6; sin6.sin6_len = sizeof sin6; sin6.sin6_addr = *maddr6; *errorp = if_addmulti(ifp, (struct sockaddr *)&sin6, &ifma); if (*errorp) { splx(s); return 0; } /* * If ifma->ifma_protospec is null, then if_addmulti() created * a new record. Otherwise, we are done. */ if (ifma->ifma_protospec != 0) return ifma->ifma_protospec; /* XXX - if_addmulti uses M_WAITOK. Can this really be called at interrupt time? If so, need to fix if_addmulti. XXX */ in6m = (struct in6_multi *)malloc(sizeof(*in6m), M_IPMADDR, M_NOWAIT); if (in6m == NULL) { splx(s); return (NULL); } bzero(in6m, sizeof *in6m); in6m->in6m_addr = *maddr6; in6m->in6m_ifp = ifp; in6m->in6m_ifma = ifma; ifma->ifma_protospec = in6m; LIST_INSERT_HEAD(&in6_multihead, in6m, in6m_entry); /* * Let MLD6 know that we have joined a new IP6 multicast * group. */ mld6_start_listening(in6m); splx(s); return(in6m); } /* * Delete a multicast address record. */ void in6_delmulti(in6m) struct in6_multi *in6m; { struct ifmultiaddr *ifma = in6m->in6m_ifma; int s = splnet(); if (ifma->ifma_refcount == 1) { /* * No remaining claims to this record; let MLD6 know * that we are leaving the multicast group. */ mld6_stop_listening(in6m); ifma->ifma_protospec = 0; LIST_REMOVE(in6m, in6m_entry); free(in6m, M_IPMADDR); } /* XXX - should be separate API for when we have an ifma? */ if_delmulti(ifma->ifma_ifp, ifma->ifma_addr); splx(s); } /* * Find an IPv6 interface link-local address specific to an interface. */ struct in6_ifaddr * in6ifa_ifpforlinklocal(ifp, ignoreflags) struct ifnet *ifp; int ignoreflags; { struct ifaddr *ifa; TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { if (ifa->ifa_addr == NULL) continue; /* just for safety */ if (ifa->ifa_addr->sa_family != AF_INET6) continue; if (IN6_IS_ADDR_LINKLOCAL(IFA_IN6(ifa))) { if ((((struct in6_ifaddr *)ifa)->ia6_flags & ignoreflags) != 0) continue; break; } } return((struct in6_ifaddr *)ifa); } /* * find the internet address corresponding to a given interface and address. */ struct in6_ifaddr * in6ifa_ifpwithaddr(ifp, addr) struct ifnet *ifp; struct in6_addr *addr; { struct ifaddr *ifa; TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { if (ifa->ifa_addr == NULL) continue; /* just for safety */ if (ifa->ifa_addr->sa_family != AF_INET6) continue; if (IN6_ARE_ADDR_EQUAL(addr, IFA_IN6(ifa))) break; } return((struct in6_ifaddr *)ifa); } /* * Convert IP6 address to printable (loggable) representation. */ static char digits[] = "0123456789abcdef"; static int ip6round = 0; char * ip6_sprintf(addr) const struct in6_addr *addr; { static char ip6buf[8][48]; int i; char *cp; const u_short *a = (const u_short *)addr; const u_char *d; int dcolon = 0; ip6round = (ip6round + 1) & 7; cp = ip6buf[ip6round]; for (i = 0; i < 8; i++) { if (dcolon == 1) { if (*a == 0) { if (i == 7) *cp++ = ':'; a++; continue; } else dcolon = 2; } if (*a == 0) { if (dcolon == 0 && *(a + 1) == 0) { if (i == 0) *cp++ = ':'; *cp++ = ':'; dcolon = 1; } else { *cp++ = '0'; *cp++ = ':'; } a++; continue; } d = (const u_char *)a; *cp++ = digits[*d >> 4]; *cp++ = digits[*d++ & 0xf]; *cp++ = digits[*d >> 4]; *cp++ = digits[*d & 0xf]; *cp++ = ':'; a++; } *--cp = 0; return(ip6buf[ip6round]); } int in6_localaddr(in6) struct in6_addr *in6; { struct in6_ifaddr *ia; if (IN6_IS_ADDR_LOOPBACK(in6) || IN6_IS_ADDR_LINKLOCAL(in6)) return 1; for (ia = in6_ifaddr; ia; ia = ia->ia_next) if (IN6_ARE_MASKED_ADDR_EQUAL(in6, &ia->ia_addr.sin6_addr, &ia->ia_prefixmask.sin6_addr)) return 1; return (0); } int in6_is_addr_deprecated(sa6) struct sockaddr_in6 *sa6; { struct in6_ifaddr *ia; for (ia = in6_ifaddr; ia; ia = ia->ia_next) { if (IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr, &sa6->sin6_addr) && #ifdef SCOPEDROUTING ia->ia_addr.sin6_scope_id == sa6->sin6_scope_id && #endif (ia->ia6_flags & IN6_IFF_DEPRECATED) != 0) return(1); /* true */ /* XXX: do we still have to go thru the rest of the list? */ } return(0); /* false */ } /* * return length of part which dst and src are equal * hard coding... */ int in6_matchlen(src, dst) struct in6_addr *src, *dst; { int match = 0; u_char *s = (u_char *)src, *d = (u_char *)dst; u_char *lim = s + 16, r; while (s < lim) if ((r = (*d++ ^ *s++)) != 0) { while (r < 128) { match++; r <<= 1; } break; } else match += 8; return match; } /* XXX: to be scope conscious */ int in6_are_prefix_equal(p1, p2, len) struct in6_addr *p1, *p2; int len; { int bytelen, bitlen; /* sanity check */ if (0 > len || len > 128) { log(LOG_ERR, "in6_are_prefix_equal: invalid prefix length(%d)\n", len); return(0); } bytelen = len / 8; bitlen = len % 8; if (bcmp(&p1->s6_addr, &p2->s6_addr, bytelen)) return(0); if (p1->s6_addr[bytelen] >> (8 - bitlen) != p2->s6_addr[bytelen] >> (8 - bitlen)) return(0); return(1); } void in6_prefixlen2mask(maskp, len) struct in6_addr *maskp; int len; { u_char maskarray[8] = {0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff}; int bytelen, bitlen, i; /* sanity check */ if (0 > len || len > 128) { log(LOG_ERR, "in6_prefixlen2mask: invalid prefix length(%d)\n", len); return; } bzero(maskp, sizeof(*maskp)); bytelen = len / 8; bitlen = len % 8; for (i = 0; i < bytelen; i++) maskp->s6_addr[i] = 0xff; if (bitlen) maskp->s6_addr[bytelen] = maskarray[bitlen - 1]; } /* * return the best address out of the same scope */ struct in6_ifaddr * in6_ifawithscope(oifp, dst) struct ifnet *oifp; struct in6_addr *dst; { int dst_scope = in6_addrscope(dst), src_scope, best_scope = 0; int blen = -1; struct ifaddr *ifa; struct ifnet *ifp; struct in6_ifaddr *ifa_best = NULL; if (oifp == NULL) { #if 0 printf("in6_ifawithscope: output interface is not specified\n"); #endif return(NULL); } /* * We search for all addresses on all interfaces from the beginning. * Comparing an interface with the outgoing interface will be done * only at the final stage of tiebreaking. */ IFNET_RLOCK(); for (ifp = TAILQ_FIRST(&ifnet); ifp; ifp = TAILQ_NEXT(ifp, if_list)) { /* * We can never take an address that breaks the scope zone * of the destination. */ if (in6_addr2scopeid(ifp, dst) != in6_addr2scopeid(oifp, dst)) continue; TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { int tlen = -1, dscopecmp, bscopecmp, matchcmp; if (ifa->ifa_addr->sa_family != AF_INET6) continue; src_scope = in6_addrscope(IFA_IN6(ifa)); /* * Don't use an address before completing DAD * nor a duplicated address. */ if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_NOTREADY) continue; /* XXX: is there any case to allow anycasts? */ if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_ANYCAST) continue; if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DETACHED) continue; /* * If this is the first address we find, * keep it anyway. */ if (ifa_best == NULL) goto replace; /* * ifa_best is never NULL beyond this line except * within the block labeled "replace". */ /* * If ifa_best has a smaller scope than dst and * the current address has a larger one than * (or equal to) dst, always replace ifa_best. * Also, if the current address has a smaller scope * than dst, ignore it unless ifa_best also has a * smaller scope. * Consequently, after the two if-clause below, * the followings must be satisfied: * (scope(src) < scope(dst) && * scope(best) < scope(dst)) * OR * (scope(best) >= scope(dst) && * scope(src) >= scope(dst)) */ if (IN6_ARE_SCOPE_CMP(best_scope, dst_scope) < 0 && IN6_ARE_SCOPE_CMP(src_scope, dst_scope) >= 0) goto replace; /* (A) */ if (IN6_ARE_SCOPE_CMP(src_scope, dst_scope) < 0 && IN6_ARE_SCOPE_CMP(best_scope, dst_scope) >= 0) continue; /* (B) */ /* * A deprecated address SHOULD NOT be used in new * communications if an alternate (non-deprecated) * address is available and has sufficient scope. * RFC 2462, Section 5.5.4. */ if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DEPRECATED) { /* * Ignore any deprecated addresses if * specified by configuration. */ if (!ip6_use_deprecated) continue; /* * If we have already found a non-deprecated * candidate, just ignore deprecated addresses. */ if ((ifa_best->ia6_flags & IN6_IFF_DEPRECATED) == 0) continue; } /* * A non-deprecated address is always preferred * to a deprecated one regardless of scopes and * address matching (Note invariants ensured by the * conditions (A) and (B) above.) */ if ((ifa_best->ia6_flags & IN6_IFF_DEPRECATED) && (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DEPRECATED) == 0) goto replace; /* * When we use temporary addresses described in * RFC 3041, we prefer temporary addresses to * public autoconf addresses. Again, note the * invariants from (A) and (B). Also note that we * don't have any preference between static addresses * and autoconf addresses (despite of whether or not * the latter is temporary or public.) */ if (ip6_use_tempaddr) { struct in6_ifaddr *ifat; ifat = (struct in6_ifaddr *)ifa; if ((ifa_best->ia6_flags & (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY)) == IN6_IFF_AUTOCONF && (ifat->ia6_flags & (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY)) == (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY)) { goto replace; } if ((ifa_best->ia6_flags & (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY)) == (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY) && (ifat->ia6_flags & (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY)) == IN6_IFF_AUTOCONF) { continue; } } /* * At this point, we have two cases: * 1. we are looking at a non-deprecated address, * and ifa_best is also non-deprecated. * 2. we are looking at a deprecated address, * and ifa_best is also deprecated. * Also, we do not have to consider a case where * the scope of if_best is larger(smaller) than dst and * the scope of the current address is smaller(larger) * than dst. Such a case has already been covered. * Tiebreaking is done according to the following * items: * - the scope comparison between the address and * dst (dscopecmp) * - the scope comparison between the address and * ifa_best (bscopecmp) * - if the address match dst longer than ifa_best * (matchcmp) * - if the address is on the outgoing I/F (outI/F) * * Roughly speaking, the selection policy is * - the most important item is scope. The same scope * is best. Then search for a larger scope. * Smaller scopes are the last resort. * - A deprecated address is chosen only when we have * no address that has an enough scope, but is * prefered to any addresses of smaller scopes * (this must be already done above.) * - addresses on the outgoing I/F are preferred to * ones on other interfaces if none of above * tiebreaks. In the table below, the column "bI" * means if the best_ifa is on the outgoing * interface, and the column "sI" means if the ifa * is on the outgoing interface. * - If there is no other reasons to choose one, * longest address match against dst is considered. * * The precise decision table is as follows: * dscopecmp bscopecmp match bI oI | replace? * N/A equal N/A Y N | No (1) * N/A equal N/A N Y | Yes (2) * N/A equal larger N/A | Yes (3) * N/A equal !larger N/A | No (4) * larger larger N/A N/A | No (5) * larger smaller N/A N/A | Yes (6) * smaller larger N/A N/A | Yes (7) * smaller smaller N/A N/A | No (8) * equal smaller N/A N/A | Yes (9) * equal larger (already done at A above) */ dscopecmp = IN6_ARE_SCOPE_CMP(src_scope, dst_scope); bscopecmp = IN6_ARE_SCOPE_CMP(src_scope, best_scope); if (bscopecmp == 0) { struct ifnet *bifp = ifa_best->ia_ifp; if (bifp == oifp && ifp != oifp) /* (1) */ continue; if (bifp != oifp && ifp == oifp) /* (2) */ goto replace; /* * Both bifp and ifp are on the outgoing * interface, or both two are on a different * interface from the outgoing I/F. * now we need address matching against dst * for tiebreaking. */ tlen = in6_matchlen(IFA_IN6(ifa), dst); matchcmp = tlen - blen; if (matchcmp > 0) /* (3) */ goto replace; continue; /* (4) */ } if (dscopecmp > 0) { if (bscopecmp > 0) /* (5) */ continue; goto replace; /* (6) */ } if (dscopecmp < 0) { if (bscopecmp > 0) /* (7) */ goto replace; continue; /* (8) */ } /* now dscopecmp must be 0 */ if (bscopecmp < 0) goto replace; /* (9) */ replace: ifa_best = (struct in6_ifaddr *)ifa; blen = tlen >= 0 ? tlen : in6_matchlen(IFA_IN6(ifa), dst); best_scope = in6_addrscope(&ifa_best->ia_addr.sin6_addr); } } IFNET_RUNLOCK(); /* count statistics for future improvements */ if (ifa_best == NULL) ip6stat.ip6s_sources_none++; else { if (oifp == ifa_best->ia_ifp) ip6stat.ip6s_sources_sameif[best_scope]++; else ip6stat.ip6s_sources_otherif[best_scope]++; if (best_scope == dst_scope) ip6stat.ip6s_sources_samescope[best_scope]++; else ip6stat.ip6s_sources_otherscope[best_scope]++; if ((ifa_best->ia6_flags & IN6_IFF_DEPRECATED) != 0) ip6stat.ip6s_sources_deprecated[best_scope]++; } return(ifa_best); } /* * return the best address out of the same scope. if no address was * found, return the first valid address from designated IF. */ struct in6_ifaddr * in6_ifawithifp(ifp, dst) struct ifnet *ifp; struct in6_addr *dst; { int dst_scope = in6_addrscope(dst), blen = -1, tlen; struct ifaddr *ifa; struct in6_ifaddr *besta = 0; struct in6_ifaddr *dep[2]; /* last-resort: deprecated */ dep[0] = dep[1] = NULL; /* * We first look for addresses in the same scope. * If there is one, return it. * If two or more, return one which matches the dst longest. * If none, return one of global addresses assigned other ifs. */ TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_ANYCAST) continue; /* XXX: is there any case to allow anycast? */ if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_NOTREADY) continue; /* don't use this interface */ if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DETACHED) continue; if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DEPRECATED) { if (ip6_use_deprecated) dep[0] = (struct in6_ifaddr *)ifa; continue; } if (dst_scope == in6_addrscope(IFA_IN6(ifa))) { /* * call in6_matchlen() as few as possible */ if (besta) { if (blen == -1) blen = in6_matchlen(&besta->ia_addr.sin6_addr, dst); tlen = in6_matchlen(IFA_IN6(ifa), dst); if (tlen > blen) { blen = tlen; besta = (struct in6_ifaddr *)ifa; } } else besta = (struct in6_ifaddr *)ifa; } } if (besta) return(besta); TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_ANYCAST) continue; /* XXX: is there any case to allow anycast? */ if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_NOTREADY) continue; /* don't use this interface */ if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DETACHED) continue; if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DEPRECATED) { if (ip6_use_deprecated) dep[1] = (struct in6_ifaddr *)ifa; continue; } return (struct in6_ifaddr *)ifa; } /* use the last-resort values, that are, deprecated addresses */ if (dep[0]) return dep[0]; if (dep[1]) return dep[1]; return NULL; } /* * perform DAD when interface becomes IFF_UP. */ void in6_if_up(ifp) struct ifnet *ifp; { struct ifaddr *ifa; struct in6_ifaddr *ia; int dad_delay; /* delay ticks before DAD output */ /* * special cases, like 6to4, are handled in in6_ifattach */ in6_ifattach(ifp, NULL); dad_delay = 0; TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; ia = (struct in6_ifaddr *)ifa; if (ia->ia6_flags & IN6_IFF_TENTATIVE) nd6_dad_start(ifa, &dad_delay); } } int in6if_do_dad(ifp) struct ifnet *ifp; { if ((ifp->if_flags & IFF_LOOPBACK) != 0) return(0); switch (ifp->if_type) { #ifdef IFT_DUMMY case IFT_DUMMY: #endif case IFT_FAITH: /* * These interfaces do not have the IFF_LOOPBACK flag, * but loop packets back. We do not have to do DAD on such * interfaces. We should even omit it, because loop-backed * NS would confuse the DAD procedure. */ return(0); default: /* * Our DAD routine requires the interface up and running. * However, some interfaces can be up before the RUNNING * status. Additionaly, users may try to assign addresses * before the interface becomes up (or running). * We simply skip DAD in such a case as a work around. * XXX: we should rather mark "tentative" on such addresses, * and do DAD after the interface becomes ready. */ if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) return(0); return(1); } } /* * Calculate max IPv6 MTU through all the interfaces and store it * to in6_maxmtu. */ void in6_setmaxmtu() { unsigned long maxmtu = 0; struct ifnet *ifp; IFNET_RLOCK(); for (ifp = TAILQ_FIRST(&ifnet); ifp; ifp = TAILQ_NEXT(ifp, if_list)) { if ((ifp->if_flags & IFF_LOOPBACK) == 0 && nd_ifinfo[ifp->if_index].linkmtu > maxmtu) maxmtu = nd_ifinfo[ifp->if_index].linkmtu; } IFNET_RUNLOCK(); if (maxmtu) /* update only when maxmtu is positive */ in6_maxmtu = maxmtu; } /* * Convert sockaddr_in6 to sockaddr_in. Original sockaddr_in6 must be * v4 mapped addr or v4 compat addr */ void in6_sin6_2_sin(struct sockaddr_in *sin, struct sockaddr_in6 *sin6) { bzero(sin, sizeof(*sin)); sin->sin_len = sizeof(struct sockaddr_in); sin->sin_family = AF_INET; sin->sin_port = sin6->sin6_port; sin->sin_addr.s_addr = sin6->sin6_addr.s6_addr32[3]; } /* Convert sockaddr_in to sockaddr_in6 in v4 mapped addr format. */ void in6_sin_2_v4mapsin6(struct sockaddr_in *sin, struct sockaddr_in6 *sin6) { bzero(sin6, sizeof(*sin6)); sin6->sin6_len = sizeof(struct sockaddr_in6); sin6->sin6_family = AF_INET6; sin6->sin6_port = sin->sin_port; sin6->sin6_addr.s6_addr32[0] = 0; sin6->sin6_addr.s6_addr32[1] = 0; sin6->sin6_addr.s6_addr32[2] = IPV6_ADDR_INT32_SMP; sin6->sin6_addr.s6_addr32[3] = sin->sin_addr.s_addr; } /* Convert sockaddr_in6 into sockaddr_in. */ void in6_sin6_2_sin_in_sock(struct sockaddr *nam) { struct sockaddr_in *sin_p; struct sockaddr_in6 sin6; /* * Save original sockaddr_in6 addr and convert it * to sockaddr_in. */ sin6 = *(struct sockaddr_in6 *)nam; sin_p = (struct sockaddr_in *)nam; in6_sin6_2_sin(sin_p, &sin6); } /* Convert sockaddr_in into sockaddr_in6 in v4 mapped addr format. */ void in6_sin_2_v4mapsin6_in_sock(struct sockaddr **nam) { struct sockaddr_in *sin_p; struct sockaddr_in6 *sin6_p; MALLOC(sin6_p, struct sockaddr_in6 *, sizeof *sin6_p, M_SONAME, M_WAITOK); sin_p = (struct sockaddr_in *)*nam; in6_sin_2_v4mapsin6(sin_p, sin6_p); FREE(*nam, M_SONAME); *nam = (struct sockaddr *)sin6_p; } Index: head/sys/netinet6/in6_ifattach.c =================================================================== --- head/sys/netinet6/in6_ifattach.c (revision 120726) +++ head/sys/netinet6/in6_ifattach.c (revision 120727) @@ -1,1053 +1,1057 @@ /* $FreeBSD$ */ /* $KAME: in6_ifattach.c,v 1.118 2001/05/24 07:44:00 itojun Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct in6_ifstat **in6_ifstat = NULL; struct icmp6_ifstat **icmp6_ifstat = NULL; size_t in6_ifstatmax = 0; size_t icmp6_ifstatmax = 0; unsigned long in6_maxmtu = 0; #ifdef IP6_AUTO_LINKLOCAL int ip6_auto_linklocal = IP6_AUTO_LINKLOCAL; #else int ip6_auto_linklocal = 1; /* enable by default */ #endif struct callout in6_tmpaddrtimer_ch; extern struct inpcbinfo udbinfo; extern struct inpcbinfo ripcbinfo; static int get_rand_ifid __P((struct ifnet *, struct in6_addr *)); static int generate_tmp_ifid __P((u_int8_t *, const u_int8_t *, u_int8_t *)); static int get_hw_ifid __P((struct ifnet *, struct in6_addr *)); static int get_ifid __P((struct ifnet *, struct ifnet *, struct in6_addr *)); static int in6_ifattach_linklocal __P((struct ifnet *, struct ifnet *)); static int in6_ifattach_loopback __P((struct ifnet *)); #define EUI64_GBIT 0x01 #define EUI64_UBIT 0x02 #define EUI64_TO_IFID(in6) do {(in6)->s6_addr[8] ^= EUI64_UBIT; } while (0) #define EUI64_GROUP(in6) ((in6)->s6_addr[8] & EUI64_GBIT) #define EUI64_INDIVIDUAL(in6) (!EUI64_GROUP(in6)) #define EUI64_LOCAL(in6) ((in6)->s6_addr[8] & EUI64_UBIT) #define EUI64_UNIVERSAL(in6) (!EUI64_LOCAL(in6)) #define IFID_LOCAL(in6) (!EUI64_LOCAL(in6)) #define IFID_UNIVERSAL(in6) (!EUI64_UNIVERSAL(in6)) /* * Generate a last-resort interface identifier, when the machine has no * IEEE802/EUI64 address sources. * The goal here is to get an interface identifier that is * (1) random enough and (2) does not change across reboot. * We currently use MD5(hostname) for it. */ static int get_rand_ifid(ifp, in6) struct ifnet *ifp; struct in6_addr *in6; /* upper 64bits are preserved */ { MD5_CTX ctxt; u_int8_t digest[16]; int hostnamelen = strlen(hostname); #if 0 /* we need at least several letters as seed for ifid */ if (hostnamelen < 3) return -1; #endif /* generate 8 bytes of pseudo-random value. */ bzero(&ctxt, sizeof(ctxt)); MD5Init(&ctxt); MD5Update(&ctxt, hostname, hostnamelen); MD5Final(digest, &ctxt); /* assumes sizeof(digest) > sizeof(ifid) */ bcopy(digest, &in6->s6_addr[8], 8); /* make sure to set "u" bit to local, and "g" bit to individual. */ in6->s6_addr[8] &= ~EUI64_GBIT; /* g bit to "individual" */ in6->s6_addr[8] |= EUI64_UBIT; /* u bit to "local" */ /* convert EUI64 into IPv6 interface identifier */ EUI64_TO_IFID(in6); return 0; } static int generate_tmp_ifid(seed0, seed1, ret) u_int8_t *seed0, *ret; const u_int8_t *seed1; { MD5_CTX ctxt; u_int8_t seed[16], digest[16], nullbuf[8]; u_int32_t val32; struct timeval tv; /* If there's no hisotry, start with a random seed. */ bzero(nullbuf, sizeof(nullbuf)); if (bcmp(nullbuf, seed0, sizeof(nullbuf)) == 0) { int i; for (i = 0; i < 2; i++) { microtime(&tv); val32 = random() ^ tv.tv_usec; bcopy(&val32, seed + sizeof(val32) * i, sizeof(val32)); } } else { bcopy(seed0, seed, 8); } /* copy the right-most 64-bits of the given address */ /* XXX assumption on the size of IFID */ bcopy(seed1, &seed[8], 8); if (0) { /* for debugging purposes only */ int i; printf("generate_tmp_ifid: new randomized ID from: "); for (i = 0; i < 16; i++) printf("%02x", seed[i]); printf(" "); } /* generate 16 bytes of pseudo-random value. */ bzero(&ctxt, sizeof(ctxt)); MD5Init(&ctxt); MD5Update(&ctxt, seed, sizeof(seed)); MD5Final(digest, &ctxt); /* * RFC 3041 3.2.1. (3) * Take the left-most 64-bits of the MD5 digest and set bit 6 (the * left-most bit is numbered 0) to zero. */ bcopy(digest, ret, 8); ret[0] &= ~EUI64_UBIT; /* * XXX: we'd like to ensure that the generated value is not zero * for simplicity. If the caclculated digest happens to be zero, * use a random non-zero value as the last resort. */ if (bcmp(nullbuf, ret, sizeof(nullbuf)) == 0) { log(LOG_INFO, "generate_tmp_ifid: computed MD5 value is zero.\n"); microtime(&tv); val32 = random() ^ tv.tv_usec; val32 = 1 + (val32 % (0xffffffff - 1)); } /* * RFC 3041 3.2.1. (4) * Take the rightmost 64-bits of the MD5 digest and save them in * stable storage as the history value to be used in the next * iteration of the algorithm. */ bcopy(&digest[8], seed0, 8); if (0) { /* for debugging purposes only */ int i; printf("to: "); for (i = 0; i < 16; i++) printf("%02x", digest[i]); printf("\n"); } return 0; } /* * Get interface identifier for the specified interface. * XXX assumes single sockaddr_dl (AF_LINK address) per an interface */ static int get_hw_ifid(ifp, in6) struct ifnet *ifp; struct in6_addr *in6; /* upper 64bits are preserved */ { struct ifaddr *ifa; struct sockaddr_dl *sdl; u_int8_t *addr; size_t addrlen; static u_int8_t allzero[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; static u_int8_t allone[8] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; for (ifa = ifp->if_addrlist.tqh_first; ifa; ifa = ifa->ifa_list.tqe_next) { if (ifa->ifa_addr->sa_family != AF_LINK) continue; sdl = (struct sockaddr_dl *)ifa->ifa_addr; if (sdl == NULL) continue; if (sdl->sdl_alen == 0) continue; goto found; } return -1; found: addr = LLADDR(sdl); addrlen = sdl->sdl_alen; /* get EUI64 */ switch (ifp->if_type) { case IFT_ETHER: case IFT_FDDI: case IFT_ISO88025: case IFT_ATM: case IFT_IEEE1394: #ifdef IFT_IEEE80211 case IFT_IEEE80211: #endif /* IEEE802/EUI64 cases - what others? */ /* IEEE1394 uses 16byte length address starting with EUI64 */ if (addrlen > 8) addrlen = 8; /* look at IEEE802/EUI64 only */ if (addrlen != 8 && addrlen != 6) return -1; /* * check for invalid MAC address - on bsdi, we see it a lot * since wildboar configures all-zero MAC on pccard before * card insertion. */ if (bcmp(addr, allzero, addrlen) == 0) return -1; if (bcmp(addr, allone, addrlen) == 0) return -1; /* make EUI64 address */ if (addrlen == 8) bcopy(addr, &in6->s6_addr[8], 8); else if (addrlen == 6) { in6->s6_addr[8] = addr[0]; in6->s6_addr[9] = addr[1]; in6->s6_addr[10] = addr[2]; in6->s6_addr[11] = 0xff; in6->s6_addr[12] = 0xfe; in6->s6_addr[13] = addr[3]; in6->s6_addr[14] = addr[4]; in6->s6_addr[15] = addr[5]; } break; case IFT_ARCNET: if (addrlen != 1) return -1; if (!addr[0]) return -1; bzero(&in6->s6_addr[8], 8); in6->s6_addr[15] = addr[0]; /* * due to insufficient bitwidth, we mark it local. */ in6->s6_addr[8] &= ~EUI64_GBIT; /* g bit to "individual" */ in6->s6_addr[8] |= EUI64_UBIT; /* u bit to "local" */ break; case IFT_GIF: #ifdef IFT_STF case IFT_STF: #endif /* * RFC2893 says: "SHOULD use IPv4 address as ifid source". * however, IPv4 address is not very suitable as unique * identifier source (can be renumbered). * we don't do this. */ return -1; default: return -1; } /* sanity check: g bit must not indicate "group" */ if (EUI64_GROUP(in6)) return -1; /* convert EUI64 into IPv6 interface identifier */ EUI64_TO_IFID(in6); /* * sanity check: ifid must not be all zero, avoid conflict with * subnet router anycast */ if ((in6->s6_addr[8] & ~(EUI64_GBIT | EUI64_UBIT)) == 0x00 && bcmp(&in6->s6_addr[9], allzero, 7) == 0) { return -1; } return 0; } /* * Get interface identifier for the specified interface. If it is not * available on ifp0, borrow interface identifier from other information * sources. */ static int get_ifid(ifp0, altifp, in6) struct ifnet *ifp0; struct ifnet *altifp; /* secondary EUI64 source */ struct in6_addr *in6; { struct ifnet *ifp; /* first, try to get it from the interface itself */ if (get_hw_ifid(ifp0, in6) == 0) { nd6log((LOG_DEBUG, "%s: got interface identifier from itself\n", if_name(ifp0))); goto success; } /* try secondary EUI64 source. this basically is for ATM PVC */ if (altifp && get_hw_ifid(altifp, in6) == 0) { nd6log((LOG_DEBUG, "%s: got interface identifier from %s\n", if_name(ifp0), if_name(altifp))); goto success; } /* next, try to get it from some other hardware interface */ IFNET_RLOCK(); for (ifp = ifnet.tqh_first; ifp; ifp = ifp->if_list.tqe_next) { if (ifp == ifp0) continue; if (get_hw_ifid(ifp, in6) != 0) continue; /* * to borrow ifid from other interface, ifid needs to be * globally unique */ if (IFID_UNIVERSAL(in6)) { nd6log((LOG_DEBUG, "%s: borrow interface identifier from %s\n", if_name(ifp0), if_name(ifp))); IFNET_RUNLOCK(); goto success; } } IFNET_RUNLOCK(); /* last resort: get from random number source */ if (get_rand_ifid(ifp, in6) == 0) { nd6log((LOG_DEBUG, "%s: interface identifier generated by random number\n", if_name(ifp0))); goto success; } printf("%s: failed to get interface identifier\n", if_name(ifp0)); return -1; success: nd6log((LOG_INFO, "%s: ifid: " "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x\n", if_name(ifp0), in6->s6_addr[8], in6->s6_addr[9], in6->s6_addr[10], in6->s6_addr[11], in6->s6_addr[12], in6->s6_addr[13], in6->s6_addr[14], in6->s6_addr[15])); return 0; } static int in6_ifattach_linklocal(ifp, altifp) struct ifnet *ifp; struct ifnet *altifp; /* secondary EUI64 source */ { struct in6_ifaddr *ia; struct in6_aliasreq ifra; struct nd_prefix pr0; int i, error; /* * configure link-local address. */ bzero(&ifra, sizeof(ifra)); /* * in6_update_ifa() does not use ifra_name, but we accurately set it * for safety. */ strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name)); ifra.ifra_addr.sin6_family = AF_INET6; ifra.ifra_addr.sin6_len = sizeof(struct sockaddr_in6); ifra.ifra_addr.sin6_addr.s6_addr16[0] = htons(0xfe80); #ifdef SCOPEDROUTING ifra.ifra_addr.sin6_addr.s6_addr16[1] = 0 #else ifra.ifra_addr.sin6_addr.s6_addr16[1] = htons(ifp->if_index); /* XXX */ #endif ifra.ifra_addr.sin6_addr.s6_addr32[1] = 0; if ((ifp->if_flags & IFF_LOOPBACK) != 0) { ifra.ifra_addr.sin6_addr.s6_addr32[2] = 0; ifra.ifra_addr.sin6_addr.s6_addr32[3] = htonl(1); } else { if (get_ifid(ifp, altifp, &ifra.ifra_addr.sin6_addr) != 0) { nd6log((LOG_ERR, "%s: no ifid available\n", if_name(ifp))); return -1; } } #ifdef SCOPEDROUTING ifra.ifra_addr.sin6_scope_id = in6_addr2scopeid(ifp, &ifra.ifra_addr.sin6_addr); #endif ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6); ifra.ifra_prefixmask.sin6_family = AF_INET6; ifra.ifra_prefixmask.sin6_addr = in6mask64; #ifdef SCOPEDROUTING /* take into accound the sin6_scope_id field for routing */ ifra.ifra_prefixmask.sin6_scope_id = 0xffffffff; #endif /* link-local addresses should NEVER expire. */ ifra.ifra_lifetime.ia6t_vltime = ND6_INFINITE_LIFETIME; ifra.ifra_lifetime.ia6t_pltime = ND6_INFINITE_LIFETIME; /* * Do not let in6_update_ifa() do DAD, since we need a random delay * before sending an NS at the first time the interface becomes up. * Instead, in6_if_up() will start DAD with a proper random delay. */ ifra.ifra_flags |= IN6_IFF_NODAD; /* * Now call in6_update_ifa() to do a bunch of procedures to configure * a link-local address. We can set NULL to the 3rd argument, because * we know there's no other link-local address on the interface * and therefore we are adding one (instead of updating one). */ if ((error = in6_update_ifa(ifp, &ifra, NULL)) != 0) { /* * XXX: When the interface does not support IPv6, this call * would fail in the SIOCSIFADDR ioctl. I believe the * notification is rather confusing in this case, so just * supress it. (jinmei@kame.net 20010130) */ if (error != EAFNOSUPPORT) log(LOG_NOTICE, "in6_ifattach_linklocal: failed to " "configure a link-local address on %s " "(errno=%d)\n", if_name(ifp), error); return(-1); } /* * Adjust ia6_flags so that in6_if_up will perform DAD. * XXX: Some P2P interfaces seem not to send packets just after * becoming up, so we skip p2p interfaces for safety. */ ia = in6ifa_ifpforlinklocal(ifp, 0); /* ia must not be NULL */ #ifdef DIAGNOSTIC if (!ia) { panic("ia == NULL in in6_ifattach_linklocal"); /* NOTREACHED */ } #endif if (in6if_do_dad(ifp) && (ifp->if_flags & IFF_POINTOPOINT) == 0) { ia->ia6_flags &= ~IN6_IFF_NODAD; ia->ia6_flags |= IN6_IFF_TENTATIVE; } /* * Make the link-local prefix (fe80::/64%link) as on-link. * Since we'd like to manage prefixes separately from addresses, * we make an ND6 prefix structure for the link-local prefix, * and add it to the prefix list as a never-expire prefix. * XXX: this change might affect some existing code base... */ bzero(&pr0, sizeof(pr0)); pr0.ndpr_ifp = ifp; /* this should be 64 at this moment. */ pr0.ndpr_plen = in6_mask2len(&ifra.ifra_prefixmask.sin6_addr, NULL); pr0.ndpr_mask = ifra.ifra_prefixmask.sin6_addr; pr0.ndpr_prefix = ifra.ifra_addr; /* apply the mask for safety. (nd6_prelist_add will apply it again) */ for (i = 0; i < 4; i++) { pr0.ndpr_prefix.sin6_addr.s6_addr32[i] &= in6mask64.s6_addr32[i]; } /* * Initialize parameters. The link-local prefix must always be * on-link, and its lifetimes never expire. */ pr0.ndpr_raf_onlink = 1; pr0.ndpr_raf_auto = 1; /* probably meaningless */ pr0.ndpr_vltime = ND6_INFINITE_LIFETIME; pr0.ndpr_pltime = ND6_INFINITE_LIFETIME; /* * Since there is no other link-local addresses, nd6_prefix_lookup() * probably returns NULL. However, we cannot always expect the result. * For example, if we first remove the (only) existing link-local * address, and then reconfigure another one, the prefix is still * valid with referring to the old link-local address. */ if (nd6_prefix_lookup(&pr0) == NULL) { if ((error = nd6_prelist_add(&pr0, NULL, NULL)) != 0) return(error); } return 0; } static int in6_ifattach_loopback(ifp) struct ifnet *ifp; /* must be IFT_LOOP */ { struct in6_aliasreq ifra; int error; bzero(&ifra, sizeof(ifra)); /* * in6_update_ifa() does not use ifra_name, but we accurately set it * for safety. */ strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name)); ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6); ifra.ifra_prefixmask.sin6_family = AF_INET6; ifra.ifra_prefixmask.sin6_addr = in6mask128; /* * Always initialize ia_dstaddr (= broadcast address) to loopback * address. Follows IPv4 practice - see in_ifinit(). */ ifra.ifra_dstaddr.sin6_len = sizeof(struct sockaddr_in6); ifra.ifra_dstaddr.sin6_family = AF_INET6; ifra.ifra_dstaddr.sin6_addr = in6addr_loopback; ifra.ifra_addr.sin6_len = sizeof(struct sockaddr_in6); ifra.ifra_addr.sin6_family = AF_INET6; ifra.ifra_addr.sin6_addr = in6addr_loopback; /* the loopback address should NEVER expire. */ ifra.ifra_lifetime.ia6t_vltime = ND6_INFINITE_LIFETIME; ifra.ifra_lifetime.ia6t_pltime = ND6_INFINITE_LIFETIME; /* we don't need to perform DAD on loopback interfaces. */ ifra.ifra_flags |= IN6_IFF_NODAD; /* skip registration to the prefix list. XXX should be temporary. */ ifra.ifra_flags |= IN6_IFF_NOPFX; /* * We are sure that this is a newly assigned address, so we can set * NULL to the 3rd arg. */ if ((error = in6_update_ifa(ifp, &ifra, NULL)) != 0) { log(LOG_ERR, "in6_ifattach_loopback: failed to configure " "the loopback address on %s (errno=%d)\n", if_name(ifp), error); return(-1); } return 0; } /* * compute NI group address, based on the current hostname setting. * see draft-ietf-ipngwg-icmp-name-lookup-* (04 and later). * * when ifp == NULL, the caller is responsible for filling scopeid. */ int in6_nigroup(ifp, name, namelen, in6) struct ifnet *ifp; const char *name; int namelen; struct in6_addr *in6; { const char *p; u_char *q; MD5_CTX ctxt; u_int8_t digest[16]; char l; char n[64]; /* a single label must not exceed 63 chars */ if (!namelen || !name) return -1; p = name; while (p && *p && *p != '.' && p - name < namelen) p++; if (p - name > sizeof(n) - 1) return -1; /* label too long */ l = p - name; strncpy(n, name, l); n[(int)l] = '\0'; for (q = n; *q; q++) { if ('A' <= *q && *q <= 'Z') *q = *q - 'A' + 'a'; } /* generate 8 bytes of pseudo-random value. */ bzero(&ctxt, sizeof(ctxt)); MD5Init(&ctxt); MD5Update(&ctxt, &l, sizeof(l)); MD5Update(&ctxt, n, l); MD5Final(digest, &ctxt); bzero(in6, sizeof(*in6)); in6->s6_addr16[0] = htons(0xff02); if (ifp) in6->s6_addr16[1] = htons(ifp->if_index); in6->s6_addr8[11] = 2; bcopy(digest, &in6->s6_addr32[3], sizeof(in6->s6_addr32[3])); return 0; } void in6_nigroup_attach(name, namelen) const char *name; int namelen; { struct ifnet *ifp; struct sockaddr_in6 mltaddr; struct in6_multi *in6m; int error; bzero(&mltaddr, sizeof(mltaddr)); mltaddr.sin6_family = AF_INET6; mltaddr.sin6_len = sizeof(struct sockaddr_in6); if (in6_nigroup(NULL, name, namelen, &mltaddr.sin6_addr) != 0) return; IFNET_RLOCK(); for (ifp = ifnet.tqh_first; ifp; ifp = ifp->if_list.tqe_next) { mltaddr.sin6_addr.s6_addr16[1] = htons(ifp->if_index); IN6_LOOKUP_MULTI(mltaddr.sin6_addr, ifp, in6m); if (!in6m) { if (!in6_addmulti(&mltaddr.sin6_addr, ifp, &error)) { nd6log((LOG_ERR, "%s: failed to join %s " "(errno=%d)\n", if_name(ifp), ip6_sprintf(&mltaddr.sin6_addr), error)); } } } IFNET_RUNLOCK(); } void in6_nigroup_detach(name, namelen) const char *name; int namelen; { struct ifnet *ifp; struct sockaddr_in6 mltaddr; struct in6_multi *in6m; bzero(&mltaddr, sizeof(mltaddr)); mltaddr.sin6_family = AF_INET6; mltaddr.sin6_len = sizeof(struct sockaddr_in6); if (in6_nigroup(NULL, name, namelen, &mltaddr.sin6_addr) != 0) return; IFNET_RLOCK(); for (ifp = ifnet.tqh_first; ifp; ifp = ifp->if_list.tqe_next) { mltaddr.sin6_addr.s6_addr16[1] = htons(ifp->if_index); IN6_LOOKUP_MULTI(mltaddr.sin6_addr, ifp, in6m); if (in6m) in6_delmulti(in6m); } IFNET_RUNLOCK(); } /* * XXX multiple loopback interface needs more care. for instance, * nodelocal address needs to be configured onto only one of them. * XXX multiple link-local address case */ void in6_ifattach(ifp, altifp) struct ifnet *ifp; struct ifnet *altifp; /* secondary EUI64 source */ { static size_t if_indexlim = 8; struct in6_ifaddr *ia; struct in6_addr in6; /* some of the interfaces are inherently not IPv6 capable */ switch (ifp->if_type) { #ifdef IFT_BRIDGE /*OpenBSD 2.8*/ case IFT_BRIDGE: return; #endif } /* * We have some arrays that should be indexed by if_index. * since if_index will grow dynamically, they should grow too. * struct in6_ifstat **in6_ifstat * struct icmp6_ifstat **icmp6_ifstat */ if (in6_ifstat == NULL || icmp6_ifstat == NULL || if_index >= if_indexlim) { size_t n; caddr_t q; size_t olim; olim = if_indexlim; while (if_index >= if_indexlim) if_indexlim <<= 1; /* grow in6_ifstat */ n = if_indexlim * sizeof(struct in6_ifstat *); q = (caddr_t)malloc(n, M_IFADDR, M_WAITOK); bzero(q, n); if (in6_ifstat) { bcopy((caddr_t)in6_ifstat, q, olim * sizeof(struct in6_ifstat *)); free((caddr_t)in6_ifstat, M_IFADDR); } in6_ifstat = (struct in6_ifstat **)q; in6_ifstatmax = if_indexlim; /* grow icmp6_ifstat */ n = if_indexlim * sizeof(struct icmp6_ifstat *); q = (caddr_t)malloc(n, M_IFADDR, M_WAITOK); bzero(q, n); if (icmp6_ifstat) { bcopy((caddr_t)icmp6_ifstat, q, olim * sizeof(struct icmp6_ifstat *)); free((caddr_t)icmp6_ifstat, M_IFADDR); } icmp6_ifstat = (struct icmp6_ifstat **)q; icmp6_ifstatmax = if_indexlim; } /* initialize scope identifiers */ scope6_ifattach(ifp); /* * quirks based on interface type */ switch (ifp->if_type) { #ifdef IFT_STF case IFT_STF: /* * 6to4 interface is a very special kind of beast. * no multicast, no linklocal. RFC2529 specifies how to make * linklocals for 6to4 interface, but there's no use and * it is rather harmful to have one. */ goto statinit; #endif default: break; } /* * usually, we require multicast capability to the interface */ if ((ifp->if_flags & IFF_MULTICAST) == 0) { log(LOG_INFO, "in6_ifattach: " "%s is not multicast capable, IPv6 not enabled\n", if_name(ifp)); return; } /* * assign loopback address for loopback interface. * XXX multiple loopback interface case. */ if ((ifp->if_flags & IFF_LOOPBACK) != 0) { in6 = in6addr_loopback; if (in6ifa_ifpwithaddr(ifp, &in6) == NULL) { if (in6_ifattach_loopback(ifp) != 0) return; } } /* * assign a link-local address, if there's none. */ if (ip6_auto_linklocal) { ia = in6ifa_ifpforlinklocal(ifp, 0); if (ia == NULL) { if (in6_ifattach_linklocal(ifp, altifp) == 0) { /* linklocal address assigned */ } else { /* failed to assign linklocal address. bark? */ } } } #ifdef IFT_STF /* XXX */ statinit: #endif /* update dynamically. */ if (in6_maxmtu < ifp->if_mtu) in6_maxmtu = ifp->if_mtu; if (in6_ifstat[ifp->if_index] == NULL) { in6_ifstat[ifp->if_index] = (struct in6_ifstat *) malloc(sizeof(struct in6_ifstat), M_IFADDR, M_WAITOK); bzero(in6_ifstat[ifp->if_index], sizeof(struct in6_ifstat)); } if (icmp6_ifstat[ifp->if_index] == NULL) { icmp6_ifstat[ifp->if_index] = (struct icmp6_ifstat *) malloc(sizeof(struct icmp6_ifstat), M_IFADDR, M_WAITOK); bzero(icmp6_ifstat[ifp->if_index], sizeof(struct icmp6_ifstat)); } /* initialize NDP variables */ nd6_ifattach(ifp); } /* * NOTE: in6_ifdetach() does not support loopback if at this moment. * We don't need this function in bsdi, because interfaces are never removed * from the ifnet list in bsdi. */ void in6_ifdetach(ifp) struct ifnet *ifp; { struct in6_ifaddr *ia, *oia; struct ifaddr *ifa, *next; struct rtentry *rt; short rtflags; struct sockaddr_in6 sin6; struct in6_multi *in6m; struct in6_multi *in6m_next; /* nuke prefix list. this may try to remove some of ifaddrs as well */ in6_purgeprefix(ifp); /* remove neighbor management table */ nd6_purge(ifp); /* nuke any of IPv6 addresses we have */ for (ifa = ifp->if_addrlist.tqh_first; ifa; ifa = next) { next = ifa->ifa_list.tqe_next; if (ifa->ifa_addr->sa_family != AF_INET6) continue; in6_purgeaddr(ifa); } /* undo everything done by in6_ifattach(), just in case */ for (ifa = ifp->if_addrlist.tqh_first; ifa; ifa = next) { next = ifa->ifa_list.tqe_next; if (ifa->ifa_addr->sa_family != AF_INET6 || !IN6_IS_ADDR_LINKLOCAL(&satosin6(&ifa->ifa_addr)->sin6_addr)) { continue; } ia = (struct in6_ifaddr *)ifa; /* remove from the routing table */ if ((ia->ia_flags & IFA_ROUTE) && (rt = rtalloc1((struct sockaddr *)&ia->ia_addr, 0, 0UL))) { rtflags = rt->rt_flags; rtfree(rt); rtrequest(RTM_DELETE, (struct sockaddr *)&ia->ia_addr, (struct sockaddr *)&ia->ia_addr, (struct sockaddr *)&ia->ia_prefixmask, rtflags, (struct rtentry **)0); } /* remove from the linked list */ TAILQ_REMOVE(&ifp->if_addrlist, (struct ifaddr *)ia, ifa_list); IFAFREE(&ia->ia_ifa); /* also remove from the IPv6 address chain(itojun&jinmei) */ oia = ia; if (oia == (ia = in6_ifaddr)) in6_ifaddr = ia->ia_next; else { while (ia->ia_next && (ia->ia_next != oia)) ia = ia->ia_next; if (ia->ia_next) ia->ia_next = oia->ia_next; else { nd6log((LOG_ERR, "%s: didn't unlink in6ifaddr from " "list\n", if_name(ifp))); } } IFAFREE(&oia->ia_ifa); } /* leave from all multicast groups joined */ if (udbinfo.listhead != NULL) in6_pcbpurgeif0(LIST_FIRST(udbinfo.listhead), ifp); if (ripcbinfo.listhead != NULL) in6_pcbpurgeif0(LIST_FIRST(ripcbinfo.listhead), ifp); for (in6m = LIST_FIRST(&in6_multihead); in6m; in6m = in6m_next) { in6m_next = LIST_NEXT(in6m, in6m_entry); if (in6m->in6m_ifp != ifp) continue; in6_delmulti(in6m); in6m = NULL; } /* * remove neighbor management table. we call it twice just to make * sure we nuke everything. maybe we need just one call. * XXX: since the first call did not release addresses, some prefixes * might remain. We should call nd6_purge() again to release the * prefixes after removing all addresses above. * (Or can we just delay calling nd6_purge until at this point?) */ nd6_purge(ifp); /* remove route to link-local allnodes multicast (ff02::1) */ bzero(&sin6, sizeof(sin6)); sin6.sin6_len = sizeof(struct sockaddr_in6); sin6.sin6_family = AF_INET6; sin6.sin6_addr = in6addr_linklocal_allnodes; sin6.sin6_addr.s6_addr16[1] = htons(ifp->if_index); rt = rtalloc1((struct sockaddr *)&sin6, 0, 0UL); - if (rt && rt->rt_ifp == ifp) { - rtrequest(RTM_DELETE, (struct sockaddr *)rt_key(rt), - rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0); - rtfree(rt); + if (rt) { + if (rt->rt_ifp == ifp) { + RT_UNLOCK(rt); + rtrequest(RTM_DELETE, (struct sockaddr *)rt_key(rt), + rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0); + RTFREE(rt); + } else + rtfree(rt); } } void in6_get_tmpifid(ifp, retbuf, baseid, generate) struct ifnet *ifp; u_int8_t *retbuf; const u_int8_t *baseid; int generate; { u_int8_t nullbuf[8]; struct nd_ifinfo *ndi = &nd_ifinfo[ifp->if_index]; bzero(nullbuf, sizeof(nullbuf)); if (bcmp(ndi->randomid, nullbuf, sizeof(nullbuf)) == 0) { /* we've never created a random ID. Create a new one. */ generate = 1; } if (generate) { bcopy(baseid, ndi->randomseed1, sizeof(ndi->randomseed1)); /* generate_tmp_ifid will update seedn and buf */ (void)generate_tmp_ifid(ndi->randomseed0, ndi->randomseed1, ndi->randomid); } bcopy(ndi->randomid, retbuf, 8); } void in6_tmpaddrtimer(ignored_arg) void *ignored_arg; { int i; struct nd_ifinfo *ndi; u_int8_t nullbuf[8]; int s = splnet(); callout_reset(&in6_tmpaddrtimer_ch, (ip6_temp_preferred_lifetime - ip6_desync_factor - ip6_temp_regen_advance) * hz, in6_tmpaddrtimer, NULL); bzero(nullbuf, sizeof(nullbuf)); for (i = 1; i < if_index + 1; i++) { ndi = &nd_ifinfo[i]; if (bcmp(ndi->randomid, nullbuf, sizeof(nullbuf)) != 0) { /* * We've been generating a random ID on this interface. * Create a new one. */ (void)generate_tmp_ifid(ndi->randomseed0, ndi->randomseed1, ndi->randomid); } } splx(s); } Index: head/sys/netinet6/in6_pcb.c =================================================================== --- head/sys/netinet6/in6_pcb.c (revision 120726) +++ head/sys/netinet6/in6_pcb.c (revision 120727) @@ -1,1163 +1,1167 @@ /* $FreeBSD$ */ /* $KAME: in6_pcb.c,v 1.31 2001/05/21 05:45:10 jinmei Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ /* * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94 */ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include "opt_random_ip_id.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef IPSEC #include #ifdef INET6 #include #endif #include #ifdef INET6 #include #endif #include #endif /* IPSEC */ #ifdef FAST_IPSEC #include #include #include #define IPSEC #endif /* FAST_IPSEC */ struct in6_addr zeroin6_addr; int in6_pcbbind(inp, nam, td) register struct inpcb *inp; struct sockaddr *nam; struct thread *td; { struct socket *so = inp->inp_socket; struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)NULL; struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; u_short lport = 0; int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); if (!in6_ifaddr) /* XXX broken! */ return (EADDRNOTAVAIL); if (inp->inp_lport || !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) return(EINVAL); if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) wild = 1; if (nam) { sin6 = (struct sockaddr_in6 *)nam; if (nam->sa_len != sizeof(*sin6)) return(EINVAL); /* * family check. */ if (nam->sa_family != AF_INET6) return(EAFNOSUPPORT); /* KAME hack: embed scopeid */ if (in6_embedscope(&sin6->sin6_addr, sin6, inp, NULL) != 0) return EINVAL; /* this must be cleared for ifa_ifwithaddr() */ sin6->sin6_scope_id = 0; lport = sin6->sin6_port; if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { /* * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; * allow compepte duplication of binding if * SO_REUSEPORT is set, or if SO_REUSEADDR is set * and a multicast address is bound on both * new and duplicated sockets. */ if (so->so_options & SO_REUSEADDR) reuseport = SO_REUSEADDR|SO_REUSEPORT; } else if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { struct ifaddr *ia = NULL; sin6->sin6_port = 0; /* yech... */ if ((ia = ifa_ifwithaddr((struct sockaddr *)sin6)) == 0) return(EADDRNOTAVAIL); /* * XXX: bind to an anycast address might accidentally * cause sending a packet with anycast source address. * We should allow to bind to a deprecated address, since * the application dare to use it. */ if (ia && ((struct in6_ifaddr *)ia)->ia6_flags & (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY|IN6_IFF_DETACHED)) { return(EADDRNOTAVAIL); } } if (lport) { struct inpcb *t; /* GROSS */ if (ntohs(lport) < IPV6PORT_RESERVED && td && suser_cred(td->td_ucred, PRISON_ROOT)) return(EACCES); if (so->so_cred->cr_uid != 0 && !IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { t = in6_pcblookup_local(pcbinfo, &sin6->sin6_addr, lport, INPLOOKUP_WILDCARD); if (t && (t->inp_vflag & INP_TIMEWAIT)) { if ((!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) || !IN6_IS_ADDR_UNSPECIFIED(&t->in6p_laddr) || !(intotw(t)->tw_so_options & SO_REUSEPORT)) && so->so_cred->cr_uid != intotw(t)->tw_cred->cr_uid) return (EADDRINUSE); } else if (t && (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) || !IN6_IS_ADDR_UNSPECIFIED(&t->in6p_laddr) || (t->inp_socket->so_options & SO_REUSEPORT) == 0) && (so->so_cred->cr_uid != t->inp_socket->so_cred->cr_uid)) return (EADDRINUSE); if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 && IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { struct sockaddr_in sin; in6_sin6_2_sin(&sin, sin6); t = in_pcblookup_local(pcbinfo, sin.sin_addr, lport, INPLOOKUP_WILDCARD); if (t && (t->inp_vflag & INP_TIMEWAIT)) { if (so->so_cred->cr_uid != intotw(t)->tw_cred->cr_uid && (ntohl(t->inp_laddr.s_addr) != INADDR_ANY || ((inp->inp_vflag & INP_IPV6PROTO) == (t->inp_vflag & INP_IPV6PROTO)))) return (EADDRINUSE); } else if (t && (so->so_cred->cr_uid != t->inp_socket->so_cred->cr_uid) && (ntohl(t->inp_laddr.s_addr) != INADDR_ANY || INP_SOCKAF(so) == INP_SOCKAF(t->inp_socket))) return (EADDRINUSE); } } t = in6_pcblookup_local(pcbinfo, &sin6->sin6_addr, lport, wild); if (t && (reuseport & ((t->inp_vflag & INP_TIMEWAIT) ? intotw(t)->tw_so_options : t->inp_socket->so_options)) == 0) return(EADDRINUSE); if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 && IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { struct sockaddr_in sin; in6_sin6_2_sin(&sin, sin6); t = in_pcblookup_local(pcbinfo, sin.sin_addr, lport, wild); if (t && t->inp_vflag & INP_TIMEWAIT) { if ((reuseport & intotw(t)->tw_so_options) == 0 && (ntohl(t->inp_laddr.s_addr) != INADDR_ANY || ((inp->inp_vflag & INP_IPV6PROTO) == (t->inp_vflag & INP_IPV6PROTO)))) return (EADDRINUSE); } else if (t && (reuseport & t->inp_socket->so_options) == 0 && (ntohl(t->inp_laddr.s_addr) != INADDR_ANY || INP_SOCKAF(so) == INP_SOCKAF(t->inp_socket))) return (EADDRINUSE); } } inp->in6p_laddr = sin6->sin6_addr; } if (lport == 0) { int e; if ((e = in6_pcbsetport(&inp->in6p_laddr, inp, td)) != 0) return(e); } else { inp->inp_lport = lport; if (in_pcbinshash(inp) != 0) { inp->in6p_laddr = in6addr_any; inp->inp_lport = 0; return (EAGAIN); } } return(0); } /* * Transform old in6_pcbconnect() into an inner subroutine for new * in6_pcbconnect(): Do some validity-checking on the remote * address (in mbuf 'nam') and then determine local host address * (i.e., which interface) to use to access that remote host. * * This preserves definition of in6_pcbconnect(), while supporting a * slightly different version for T/TCP. (This is more than * a bit of a kludge, but cleaning up the internal interfaces would * have forced minor changes in every protocol). */ int in6_pcbladdr(inp, nam, plocal_addr6) register struct inpcb *inp; struct sockaddr *nam; struct in6_addr **plocal_addr6; { register struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; struct ifnet *ifp = NULL; int error = 0; if (nam->sa_len != sizeof (*sin6)) return (EINVAL); if (sin6->sin6_family != AF_INET6) return (EAFNOSUPPORT); if (sin6->sin6_port == 0) return (EADDRNOTAVAIL); /* KAME hack: embed scopeid */ if (in6_embedscope(&sin6->sin6_addr, sin6, inp, &ifp) != 0) return EINVAL; if (in6_ifaddr) { /* * If the destination address is UNSPECIFIED addr, * use the loopback addr, e.g ::1. */ if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) sin6->sin6_addr = in6addr_loopback; } { /* * XXX: in6_selectsrc might replace the bound local address * with the address specified by setsockopt(IPV6_PKTINFO). * Is it the intended behavior? */ *plocal_addr6 = in6_selectsrc(sin6, inp->in6p_outputopts, inp->in6p_moptions, &inp->in6p_route, &inp->in6p_laddr, &error); if (*plocal_addr6 == 0) { if (error == 0) error = EADDRNOTAVAIL; return(error); } /* * Don't do pcblookup call here; return interface in * plocal_addr6 * and exit to caller, that will do the lookup. */ } if (inp->in6p_route.ro_rt) ifp = inp->in6p_route.ro_rt->rt_ifp; return(0); } /* * Outer subroutine: * Connect from a socket to a specified address. * Both address and port must be specified in argument sin. * If don't have a local address for this socket yet, * then pick one. */ int in6_pcbconnect(inp, nam, td) register struct inpcb *inp; struct sockaddr *nam; struct thread *td; { struct in6_addr *addr6; register struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; int error; /* * Call inner routine, to assign local interface address. * in6_pcbladdr() may automatically fill in sin6_scope_id. */ if ((error = in6_pcbladdr(inp, nam, &addr6)) != 0) return(error); if (in6_pcblookup_hash(inp->inp_pcbinfo, &sin6->sin6_addr, sin6->sin6_port, IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) ? addr6 : &inp->in6p_laddr, inp->inp_lport, 0, NULL) != NULL) { return (EADDRINUSE); } if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { if (inp->inp_lport == 0) { error = in6_pcbbind(inp, (struct sockaddr *)0, td); if (error) return (error); } inp->in6p_laddr = *addr6; } inp->in6p_faddr = sin6->sin6_addr; inp->inp_fport = sin6->sin6_port; /* update flowinfo - draft-itojun-ipv6-flowlabel-api-00 */ inp->in6p_flowinfo &= ~IPV6_FLOWLABEL_MASK; if (inp->in6p_flags & IN6P_AUTOFLOWLABEL) inp->in6p_flowinfo |= #ifdef RANDOM_IP_ID (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK); #else (htonl(ip6_flow_seq++) & IPV6_FLOWLABEL_MASK); #endif in_pcbrehash(inp); return (0); } #if 0 /* * Return an IPv6 address, which is the most appropriate for given * destination and user specified options. * If necessary, this function lookups the routing table and return * an entry to the caller for later use. */ struct in6_addr * in6_selectsrc(dstsock, opts, mopts, ro, laddr, errorp) struct sockaddr_in6 *dstsock; struct ip6_pktopts *opts; struct ip6_moptions *mopts; struct route_in6 *ro; struct in6_addr *laddr; int *errorp; { struct in6_addr *dst; struct in6_ifaddr *ia6 = 0; struct in6_pktinfo *pi = NULL; dst = &dstsock->sin6_addr; *errorp = 0; /* * If the source address is explicitly specified by the caller, * use it. */ if (opts && (pi = opts->ip6po_pktinfo) && !IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr)) return(&pi->ipi6_addr); /* * If the source address is not specified but the socket(if any) * is already bound, use the bound address. */ if (laddr && !IN6_IS_ADDR_UNSPECIFIED(laddr)) return(laddr); /* * If the caller doesn't specify the source address but * the outgoing interface, use an address associated with * the interface. */ if (pi && pi->ipi6_ifindex) { /* XXX boundary check is assumed to be already done. */ ia6 = in6_ifawithscope(ifnet_byindex(pi->ipi6_ifindex), dst); if (ia6 == 0) { *errorp = EADDRNOTAVAIL; return(0); } return(&satosin6(&ia6->ia_addr)->sin6_addr); } /* * If the destination address is a link-local unicast address or * a multicast address, and if the outgoing interface is specified * by the sin6_scope_id filed, use an address associated with the * interface. * XXX: We're now trying to define more specific semantics of * sin6_scope_id field, so this part will be rewritten in * the near future. */ if ((IN6_IS_ADDR_LINKLOCAL(dst) || IN6_IS_ADDR_MULTICAST(dst)) && dstsock->sin6_scope_id) { /* * I'm not sure if boundary check for scope_id is done * somewhere... */ if (dstsock->sin6_scope_id < 0 || if_index < dstsock->sin6_scope_id) { *errorp = ENXIO; /* XXX: better error? */ return(0); } ia6 = in6_ifawithscope(ifnet_byindex(dstsock->sin6_scope_id), dst); if (ia6 == 0) { *errorp = EADDRNOTAVAIL; return(0); } return(&satosin6(&ia6->ia_addr)->sin6_addr); } /* * If the destination address is a multicast address and * the outgoing interface for the address is specified * by the caller, use an address associated with the interface. * There is a sanity check here; if the destination has node-local * scope, the outgoing interfacde should be a loopback address. * Even if the outgoing interface is not specified, we also * choose a loopback interface as the outgoing interface. */ if (IN6_IS_ADDR_MULTICAST(dst)) { struct ifnet *ifp = mopts ? mopts->im6o_multicast_ifp : NULL; if (ifp == NULL && IN6_IS_ADDR_MC_NODELOCAL(dst)) { ifp = &loif[0]; } if (ifp) { ia6 = in6_ifawithscope(ifp, dst); if (ia6 == 0) { *errorp = EADDRNOTAVAIL; return(0); } return(&ia6->ia_addr.sin6_addr); } } /* * If the next hop address for the packet is specified * by caller, use an address associated with the route * to the next hop. */ { struct sockaddr_in6 *sin6_next; struct rtentry *rt; if (opts && opts->ip6po_nexthop) { sin6_next = satosin6(opts->ip6po_nexthop); rt = nd6_lookup(&sin6_next->sin6_addr, 1, NULL); if (rt) { ia6 = in6_ifawithscope(rt->rt_ifp, dst); if (ia6 == 0) ia6 = ifatoia6(rt->rt_ifa); } if (ia6 == 0) { *errorp = EADDRNOTAVAIL; return(0); } return(&satosin6(&ia6->ia_addr)->sin6_addr); } } /* * If route is known or can be allocated now, * our src addr is taken from the i/f, else punt. */ if (ro) { if (ro->ro_rt && !IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr, dst)) { RTFREE(ro->ro_rt); ro->ro_rt = (struct rtentry *)0; } if (ro->ro_rt == (struct rtentry *)0 || ro->ro_rt->rt_ifp == (struct ifnet *)0) { struct sockaddr_in6 *dst6; /* No route yet, so try to acquire one */ bzero(&ro->ro_dst, sizeof(struct sockaddr_in6)); dst6 = (struct sockaddr_in6 *)&ro->ro_dst; dst6->sin6_family = AF_INET6; dst6->sin6_len = sizeof(struct sockaddr_in6); dst6->sin6_addr = *dst; if (IN6_IS_ADDR_MULTICAST(dst)) { ro->ro_rt = rtalloc1(&((struct route *)ro) ->ro_dst, 0, 0UL); + RT_UNLOCK(ro->ro_rt); } else { rtalloc((struct route *)ro); } } /* * in_pcbconnect() checks out IFF_LOOPBACK to skip using * the address. But we don't know why it does so. * It is necessary to ensure the scope even for lo0 * so doesn't check out IFF_LOOPBACK. */ if (ro->ro_rt) { ia6 = in6_ifawithscope(ro->ro_rt->rt_ifa->ifa_ifp, dst); if (ia6 == 0) /* xxx scope error ?*/ ia6 = ifatoia6(ro->ro_rt->rt_ifa); } if (ia6 == 0) { *errorp = EHOSTUNREACH; /* no route */ return(0); } return(&satosin6(&ia6->ia_addr)->sin6_addr); } *errorp = EADDRNOTAVAIL; return(0); } /* * Default hop limit selection. The precedence is as follows: * 1. Hoplimit valued specified via ioctl. * 2. (If the outgoing interface is detected) the current * hop limit of the interface specified by router advertisement. * 3. The system default hoplimit. */ int in6_selecthlim(in6p, ifp) struct in6pcb *in6p; struct ifnet *ifp; { if (in6p && in6p->in6p_hops >= 0) return(in6p->in6p_hops); else if (ifp) return(nd_ifinfo[ifp->if_index].chlim); else return(ip6_defhlim); } #endif void in6_pcbdisconnect(inp) struct inpcb *inp; { bzero((caddr_t)&inp->in6p_faddr, sizeof(inp->in6p_faddr)); inp->inp_fport = 0; /* clear flowinfo - draft-itojun-ipv6-flowlabel-api-00 */ inp->in6p_flowinfo &= ~IPV6_FLOWLABEL_MASK; in_pcbrehash(inp); if (inp->inp_socket->so_state & SS_NOFDREF) in6_pcbdetach(inp); } void in6_pcbdetach(inp) struct inpcb *inp; { struct socket *so = inp->inp_socket; struct inpcbinfo *ipi = inp->inp_pcbinfo; #ifdef IPSEC if (inp->in6p_sp != NULL) ipsec6_delete_pcbpolicy(inp); #endif /* IPSEC */ inp->inp_gencnt = ++ipi->ipi_gencnt; in_pcbremlists(inp); if (so) { so->so_pcb = NULL; sotryfree(so); } if (inp->in6p_options) m_freem(inp->in6p_options); ip6_freepcbopts(inp->in6p_outputopts); ip6_freemoptions(inp->in6p_moptions); if (inp->in6p_route.ro_rt) - rtfree(inp->in6p_route.ro_rt); + RTFREE(inp->in6p_route.ro_rt); /* Check and free IPv4 related resources in case of mapped addr */ if (inp->inp_options) (void)m_free(inp->inp_options); ip_freemoptions(inp->inp_moptions); inp->inp_vflag = 0; INP_LOCK_DESTROY(inp); uma_zfree(ipi->ipi_zone, inp); } struct sockaddr * in6_sockaddr(port, addr_p) in_port_t port; struct in6_addr *addr_p; { struct sockaddr_in6 *sin6; MALLOC(sin6, struct sockaddr_in6 *, sizeof *sin6, M_SONAME, M_WAITOK); bzero(sin6, sizeof *sin6); sin6->sin6_family = AF_INET6; sin6->sin6_len = sizeof(*sin6); sin6->sin6_port = port; sin6->sin6_addr = *addr_p; if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]); else sin6->sin6_scope_id = 0; /*XXX*/ if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) sin6->sin6_addr.s6_addr16[1] = 0; return (struct sockaddr *)sin6; } struct sockaddr * in6_v4mapsin6_sockaddr(port, addr_p) in_port_t port; struct in_addr *addr_p; { struct sockaddr_in sin; struct sockaddr_in6 *sin6_p; bzero(&sin, sizeof sin); sin.sin_family = AF_INET; sin.sin_len = sizeof(sin); sin.sin_port = port; sin.sin_addr = *addr_p; MALLOC(sin6_p, struct sockaddr_in6 *, sizeof *sin6_p, M_SONAME, M_WAITOK); in6_sin_2_v4mapsin6(&sin, sin6_p); return (struct sockaddr *)sin6_p; } /* * The calling convention of in6_setsockaddr() and in6_setpeeraddr() was * modified to match the pru_sockaddr() and pru_peeraddr() entry points * in struct pr_usrreqs, so that protocols can just reference then directly * without the need for a wrapper function. The socket must have a valid * (i.e., non-nil) PCB, but it should be impossible to get an invalid one * except through a kernel programming error, so it is acceptable to panic * (or in this case trap) if the PCB is invalid. (Actually, we don't trap * because there actually /is/ a programming error somewhere... XXX) */ int in6_setsockaddr(so, nam) struct socket *so; struct sockaddr **nam; { int s; register struct inpcb *inp; struct in6_addr addr; in_port_t port; s = splnet(); inp = sotoinpcb(so); if (!inp) { splx(s); return EINVAL; } port = inp->inp_lport; addr = inp->in6p_laddr; splx(s); *nam = in6_sockaddr(port, &addr); return 0; } int in6_setpeeraddr(so, nam) struct socket *so; struct sockaddr **nam; { int s; struct inpcb *inp; struct in6_addr addr; in_port_t port; s = splnet(); inp = sotoinpcb(so); if (!inp) { splx(s); return EINVAL; } port = inp->inp_fport; addr = inp->in6p_faddr; splx(s); *nam = in6_sockaddr(port, &addr); return 0; } int in6_mapped_sockaddr(struct socket *so, struct sockaddr **nam) { struct inpcb *inp = sotoinpcb(so); int error; if (inp == NULL) return EINVAL; if (inp->inp_vflag & INP_IPV4) { error = in_setsockaddr(so, nam, &tcbinfo); if (error == 0) in6_sin_2_v4mapsin6_in_sock(nam); } else /* scope issues will be handled in in6_setsockaddr(). */ error = in6_setsockaddr(so, nam); return error; } int in6_mapped_peeraddr(struct socket *so, struct sockaddr **nam) { struct inpcb *inp = sotoinpcb(so); int error; if (inp == NULL) return EINVAL; if (inp->inp_vflag & INP_IPV4) { error = in_setpeeraddr(so, nam, &tcbinfo); if (error == 0) in6_sin_2_v4mapsin6_in_sock(nam); } else /* scope issues will be handled in in6_setpeeraddr(). */ error = in6_setpeeraddr(so, nam); return error; } /* * Pass some notification to all connections of a protocol * associated with address dst. The local address and/or port numbers * may be specified to limit the search. The "usual action" will be * taken, depending on the ctlinput cmd. The caller must filter any * cmds that are uninteresting (e.g., no error in the map). * Call the protocol specific routine (if any) to report * any errors for each matching socket. * * Must be called at splnet. */ void in6_pcbnotify(head, dst, fport_arg, src, lport_arg, cmd, notify) struct inpcbhead *head; struct sockaddr *dst; const struct sockaddr *src; u_int fport_arg, lport_arg; int cmd; struct inpcb *(*notify) __P((struct inpcb *, int)); { struct inpcb *inp, *ninp; struct sockaddr_in6 sa6_src, *sa6_dst; u_short fport = fport_arg, lport = lport_arg; u_int32_t flowinfo; int errno, s; if ((unsigned)cmd >= PRC_NCMDS || dst->sa_family != AF_INET6) return; sa6_dst = (struct sockaddr_in6 *)dst; if (IN6_IS_ADDR_UNSPECIFIED(&sa6_dst->sin6_addr)) return; /* * note that src can be NULL when we get notify by local fragmentation. */ sa6_src = (src == NULL) ? sa6_any : *(const struct sockaddr_in6 *)src; flowinfo = sa6_src.sin6_flowinfo; /* * Redirects go to all references to the destination, * and use in6_rtchange to invalidate the route cache. * Dead host indications: also use in6_rtchange to invalidate * the cache, and deliver the error to all the sockets. * Otherwise, if we have knowledge of the local port and address, * deliver only to that socket. */ if (PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) { fport = 0; lport = 0; bzero((caddr_t)&sa6_src.sin6_addr, sizeof(sa6_src.sin6_addr)); if (cmd != PRC_HOSTDEAD) notify = in6_rtchange; } errno = inet6ctlerrmap[cmd]; s = splnet(); for (inp = LIST_FIRST(head); inp != NULL; inp = ninp) { ninp = LIST_NEXT(inp, inp_list); if ((inp->inp_vflag & INP_IPV6) == 0) continue; /* * Detect if we should notify the error. If no source and * destination ports are specifed, but non-zero flowinfo and * local address match, notify the error. This is the case * when the error is delivered with an encrypted buffer * by ESP. Otherwise, just compare addresses and ports * as usual. */ if (lport == 0 && fport == 0 && flowinfo && inp->inp_socket != NULL && flowinfo == (inp->in6p_flowinfo & IPV6_FLOWLABEL_MASK) && IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &sa6_src.sin6_addr)) goto do_notify; else if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &sa6_dst->sin6_addr) || inp->inp_socket == 0 || (lport && inp->inp_lport != lport) || (!IN6_IS_ADDR_UNSPECIFIED(&sa6_src.sin6_addr) && !IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &sa6_src.sin6_addr)) || (fport && inp->inp_fport != fport)) continue; do_notify: if (notify) (*notify)(inp, errno); } splx(s); } /* * Lookup a PCB based on the local address and port. */ struct inpcb * in6_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay) struct inpcbinfo *pcbinfo; struct in6_addr *laddr; u_int lport_arg; int wild_okay; { register struct inpcb *inp; int matchwild = 3, wildcard; u_short lport = lport_arg; if (!wild_okay) { struct inpcbhead *head; /* * Look for an unconnected (wildcard foreign addr) PCB that * matches the local address and port we're looking for. */ head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)]; LIST_FOREACH(inp, head, inp_hash) { if ((inp->inp_vflag & INP_IPV6) == 0) continue; if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) && IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) && inp->inp_lport == lport) { /* * Found. */ return (inp); } } /* * Not found. */ return (NULL); } else { struct inpcbporthead *porthash; struct inpcbport *phd; struct inpcb *match = NULL; /* * Best fit PCB lookup. * * First see if this local port is in use by looking on the * port hash list. */ porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport, pcbinfo->porthashmask)]; LIST_FOREACH(phd, porthash, phd_hash) { if (phd->phd_port == lport) break; } if (phd != NULL) { /* * Port is in use by one or more PCBs. Look for best * fit. */ LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { wildcard = 0; if ((inp->inp_vflag & INP_IPV6) == 0) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) wildcard++; if (!IN6_IS_ADDR_UNSPECIFIED( &inp->in6p_laddr)) { if (IN6_IS_ADDR_UNSPECIFIED(laddr)) wildcard++; else if (!IN6_ARE_ADDR_EQUAL( &inp->in6p_laddr, laddr)) continue; } else { if (!IN6_IS_ADDR_UNSPECIFIED(laddr)) wildcard++; } if (wildcard < matchwild) { match = inp; matchwild = wildcard; if (matchwild == 0) { break; } } } } return (match); } } void in6_pcbpurgeif0(head, ifp) struct in6pcb *head; struct ifnet *ifp; { struct in6pcb *in6p; struct ip6_moptions *im6o; struct in6_multi_mship *imm, *nimm; for (in6p = head; in6p != NULL; in6p = LIST_NEXT(in6p, inp_list)) { im6o = in6p->in6p_moptions; if ((in6p->inp_vflag & INP_IPV6) && im6o) { /* * Unselect the outgoing interface if it is being * detached. */ if (im6o->im6o_multicast_ifp == ifp) im6o->im6o_multicast_ifp = NULL; /* * Drop multicast group membership if we joined * through the interface being detached. * XXX controversial - is it really legal for kernel * to force this? */ for (imm = im6o->im6o_memberships.lh_first; imm != NULL; imm = nimm) { nimm = imm->i6mm_chain.le_next; if (imm->i6mm_maddr->in6m_ifp == ifp) { LIST_REMOVE(imm, i6mm_chain); in6_delmulti(imm->i6mm_maddr); free(imm, M_IPMADDR); } } } } } /* * Check for alternatives when higher level complains * about service problems. For now, invalidate cached * routing information. If the route was created dynamically * (by a redirect), time to try a default gateway again. */ void in6_losing(in6p) struct inpcb *in6p; { struct rtentry *rt; struct rt_addrinfo info; if ((rt = in6p->in6p_route.ro_rt) != NULL) { + RT_LOCK(rt); + in6p->in6p_route.ro_rt = NULL; bzero((caddr_t)&info, sizeof(info)); info.rti_flags = rt->rt_flags; info.rti_info[RTAX_DST] = rt_key(rt); info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; info.rti_info[RTAX_NETMASK] = rt_mask(rt); rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0); - if (rt->rt_flags & RTF_DYNAMIC) + if (rt->rt_flags & RTF_DYNAMIC) { + RT_UNLOCK(rt); /* XXX refcnt? */ (void)rtrequest1(RTM_DELETE, &info, NULL); - in6p->in6p_route.ro_rt = NULL; - rtfree(rt); + } else + rtfree(rt); /* * A new route can be allocated * the next time output is attempted. */ } } /* * After a routing change, flush old routing * and allocate a (hopefully) better one. */ struct inpcb * in6_rtchange(inp, errno) struct inpcb *inp; int errno; { if (inp->in6p_route.ro_rt) { - rtfree(inp->in6p_route.ro_rt); + RTFREE(inp->in6p_route.ro_rt); inp->in6p_route.ro_rt = 0; /* * A new route can be allocated the next time * output is attempted. */ } return inp; } /* * Lookup PCB in hash list. */ struct inpcb * in6_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard, ifp) struct inpcbinfo *pcbinfo; struct in6_addr *faddr, *laddr; u_int fport_arg, lport_arg; int wildcard; struct ifnet *ifp; { struct inpcbhead *head; register struct inpcb *inp; u_short fport = fport_arg, lport = lport_arg; int faith; if (faithprefix_p != NULL) faith = (*faithprefix_p)(laddr); else faith = 0; /* * First look for an exact match. */ head = &pcbinfo->hashbase[INP_PCBHASH(faddr->s6_addr32[3] /* XXX */, lport, fport, pcbinfo->hashmask)]; LIST_FOREACH(inp, head, inp_hash) { if ((inp->inp_vflag & INP_IPV6) == 0) continue; if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) && IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) && inp->inp_fport == fport && inp->inp_lport == lport) { /* * Found. */ return (inp); } } if (wildcard) { struct inpcb *local_wild = NULL; head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)]; LIST_FOREACH(inp, head, inp_hash) { if ((inp->inp_vflag & INP_IPV6) == 0) continue; if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) && inp->inp_lport == lport) { if (faith && (inp->inp_flags & INP_FAITH) == 0) continue; if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr)) return (inp); else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) local_wild = inp; } } return (local_wild); } /* * Not found. */ return (NULL); } void init_sin6(struct sockaddr_in6 *sin6, struct mbuf *m) { struct ip6_hdr *ip; ip = mtod(m, struct ip6_hdr *); bzero(sin6, sizeof(*sin6)); sin6->sin6_len = sizeof(*sin6); sin6->sin6_family = AF_INET6; sin6->sin6_addr = ip->ip6_src; if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) sin6->sin6_addr.s6_addr16[1] = 0; sin6->sin6_scope_id = (m->m_pkthdr.rcvif && IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) ? m->m_pkthdr.rcvif->if_index : 0; return; } Index: head/sys/netinet6/in6_rmx.c =================================================================== --- head/sys/netinet6/in6_rmx.c (revision 120726) +++ head/sys/netinet6/in6_rmx.c (revision 120727) @@ -1,487 +1,490 @@ /* $FreeBSD$ */ /* $KAME: in6_rmx.c,v 1.11 2001/07/26 06:53:16 jinmei Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Copyright 1994, 1995 Massachusetts Institute of Technology * * Permission to use, copy, modify, and distribute this software and * its documentation for any purpose and without fee is hereby * granted, provided that both the above copyright notice and this * permission notice appear in all copies, that both the above * copyright notice and this permission notice appear in all * supporting documentation, and that the name of M.I.T. not be used * in advertising or publicity pertaining to distribution of the * software without specific, written prior permission. M.I.T. makes * no representations about the suitability of this software for any * purpose. It is provided "as is" without express or implied * warranty. * * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ /* * This code does two things necessary for the enhanced TCP metrics to * function in a useful manner: * 1) It marks all non-host routes as `cloning', thus ensuring that * every actual reference to such a route actually gets turned * into a reference to a host route to the specific destination * requested. * 2) When such routes lose all their references, it arranges for them * to be deleted in some random collection of circumstances, so that * a large quantity of stale routing data is not kept in kernel memory * indefinitely. See in6_rtqtimo() below for the exact mechanism. */ #include #include #include #include #include #include #include #include #include +#include #include #include #include #include #include #include #include #include #include #include #include #include extern int in6_inithead __P((void **head, int off)); #define RTPRF_OURS RTF_PROTO3 /* set on routes we manage */ /* * Do what we need to do when inserting a route. */ static struct radix_node * in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, struct radix_node *treenodes) { struct rtentry *rt = (struct rtentry *)treenodes; struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)rt_key(rt); struct radix_node *ret; /* * For IPv6, all unicast non-host routes are automatically cloning. */ if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) rt->rt_flags |= RTF_MULTICAST; if (!(rt->rt_flags & (RTF_HOST | RTF_CLONING | RTF_MULTICAST))) { rt->rt_flags |= RTF_PRCLONING; } /* * A little bit of help for both IPv6 output and input: * For local addresses, we make sure that RTF_LOCAL is set, * with the thought that this might one day be used to speed up * ip_input(). * * We also mark routes to multicast addresses as such, because * it's easy to do and might be useful (but this is much more * dubious since it's so easy to inspect the address). (This * is done above.) * * XXX * should elaborate the code. */ if (rt->rt_flags & RTF_HOST) { if (IN6_ARE_ADDR_EQUAL(&satosin6(rt->rt_ifa->ifa_addr) ->sin6_addr, &sin6->sin6_addr)) { rt->rt_flags |= RTF_LOCAL; } } if (!rt->rt_rmx.rmx_mtu && !(rt->rt_rmx.rmx_locks & RTV_MTU) && rt->rt_ifp) rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; ret = rn_addroute(v_arg, n_arg, head, treenodes); if (ret == NULL && rt->rt_flags & RTF_HOST) { struct rtentry *rt2; /* * We are trying to add a host route, but can't. * Find out if it is because of an * ARP entry and delete it if so. */ rt2 = rtalloc1((struct sockaddr *)sin6, 0, RTF_CLONING | RTF_PRCLONING); if (rt2) { if (rt2->rt_flags & RTF_LLINFO && rt2->rt_flags & RTF_HOST && rt2->rt_gateway && rt2->rt_gateway->sa_family == AF_LINK) { + /* NB: must unlock to avoid recursion */ + RT_UNLOCK(rt2); rtrequest(RTM_DELETE, (struct sockaddr *)rt_key(rt2), rt2->rt_gateway, rt_mask(rt2), rt2->rt_flags, 0); ret = rn_addroute(v_arg, n_arg, head, treenodes); + RT_LOCK(rt2); } - RTFREE(rt2); + RTFREE_LOCKED(rt2); } } else if (ret == NULL && rt->rt_flags & RTF_CLONING) { struct rtentry *rt2; /* * We are trying to add a net route, but can't. * The following case should be allowed, so we'll make a * special check for this: * Two IPv6 addresses with the same prefix is assigned * to a single interrface. * # ifconfig if0 inet6 3ffe:0501::1 prefix 64 alias (*1) * # ifconfig if0 inet6 3ffe:0501::2 prefix 64 alias (*2) * In this case, (*1) and (*2) want to add the same * net route entry, 3ffe:0501:: -> if0. * This case should not raise an error. */ rt2 = rtalloc1((struct sockaddr *)sin6, 0, RTF_CLONING | RTF_PRCLONING); if (rt2) { if ((rt2->rt_flags & (RTF_CLONING|RTF_HOST|RTF_GATEWAY)) == RTF_CLONING && rt2->rt_gateway && rt2->rt_gateway->sa_family == AF_LINK && rt2->rt_ifp == rt->rt_ifp) { ret = rt2->rt_nodes; } - RTFREE(rt2); + RTFREE_LOCKED(rt2); } } return ret; } /* * This code is the inverse of in6_clsroute: on first reference, if we * were managing the route, stop doing so and set the expiration timer * back off again. */ static struct radix_node * in6_matroute(void *v_arg, struct radix_node_head *head) { struct radix_node *rn = rn_match(v_arg, head); struct rtentry *rt = (struct rtentry *)rn; if (rt && rt->rt_refcnt == 0) { /* this is first reference */ if (rt->rt_flags & RTPRF_OURS) { rt->rt_flags &= ~RTPRF_OURS; rt->rt_rmx.rmx_expire = 0; } } return rn; } SYSCTL_DECL(_net_inet6_ip6); static int rtq_reallyold = 60*60; /* one hour is ``really old'' */ SYSCTL_INT(_net_inet6_ip6, IPV6CTL_RTEXPIRE, rtexpire, CTLFLAG_RW, &rtq_reallyold , 0, ""); static int rtq_minreallyold = 10; /* never automatically crank down to less */ SYSCTL_INT(_net_inet6_ip6, IPV6CTL_RTMINEXPIRE, rtminexpire, CTLFLAG_RW, &rtq_minreallyold , 0, ""); static int rtq_toomany = 128; /* 128 cached routes is ``too many'' */ SYSCTL_INT(_net_inet6_ip6, IPV6CTL_RTMAXCACHE, rtmaxcache, CTLFLAG_RW, &rtq_toomany , 0, ""); /* * On last reference drop, mark the route as belong to us so that it can be * timed out. */ static void in6_clsroute(struct radix_node *rn, struct radix_node_head *head) { struct rtentry *rt = (struct rtentry *)rn; + RT_LOCK_ASSERT(rt); + if (!(rt->rt_flags & RTF_UP)) return; /* prophylactic measures */ if ((rt->rt_flags & (RTF_LLINFO | RTF_HOST)) != RTF_HOST) return; if ((rt->rt_flags & (RTF_WASCLONED | RTPRF_OURS)) != RTF_WASCLONED) return; /* * As requested by David Greenman: * If rtq_reallyold is 0, just delete the route without * waiting for a timeout cycle to kill it. */ if (rtq_reallyold != 0) { rt->rt_flags |= RTPRF_OURS; rt->rt_rmx.rmx_expire = time_second + rtq_reallyold; } else { + /* NB: must unlock to avoid recursion */ + RT_UNLOCK(rt); rtrequest(RTM_DELETE, (struct sockaddr *)rt_key(rt), rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0); + RT_LOCK(rt); } } struct rtqk_arg { struct radix_node_head *rnh; int mode; int updating; int draining; int killed; int found; time_t nextstop; }; /* * Get rid of old routes. When draining, this deletes everything, even when * the timeout is not expired yet. When updating, this makes sure that * nothing has a timeout longer than the current value of rtq_reallyold. */ static int in6_rtqkill(struct radix_node *rn, void *rock) { struct rtqk_arg *ap = rock; struct rtentry *rt = (struct rtentry *)rn; int err; if (rt->rt_flags & RTPRF_OURS) { ap->found++; if (ap->draining || rt->rt_rmx.rmx_expire <= time_second) { if (rt->rt_refcnt > 0) panic("rtqkill route really not free"); err = rtrequest(RTM_DELETE, (struct sockaddr *)rt_key(rt), rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0); if (err) { log(LOG_WARNING, "in6_rtqkill: error %d", err); } else { ap->killed++; } } else { if (ap->updating && (rt->rt_rmx.rmx_expire - time_second > rtq_reallyold)) { rt->rt_rmx.rmx_expire = time_second + rtq_reallyold; } ap->nextstop = lmin(ap->nextstop, rt->rt_rmx.rmx_expire); } } return 0; } #define RTQ_TIMEOUT 60*10 /* run no less than once every ten minutes */ static int rtq_timeout = RTQ_TIMEOUT; +static struct callout rtq_timer; static void in6_rtqtimo(void *rock) { struct radix_node_head *rnh = rock; struct rtqk_arg arg; struct timeval atv; static time_t last_adjusted_timeout = 0; - int s; arg.found = arg.killed = 0; arg.rnh = rnh; arg.nextstop = time_second + rtq_timeout; arg.draining = arg.updating = 0; - s = splnet(); RADIX_NODE_HEAD_LOCK(rnh); rnh->rnh_walktree(rnh, in6_rtqkill, &arg); RADIX_NODE_HEAD_UNLOCK(rnh); - splx(s); /* * Attempt to be somewhat dynamic about this: * If there are ``too many'' routes sitting around taking up space, * then crank down the timeout, and see if we can't make some more * go away. However, we make sure that we will never adjust more * than once in rtq_timeout seconds, to keep from cranking down too * hard. */ if ((arg.found - arg.killed > rtq_toomany) && (time_second - last_adjusted_timeout >= rtq_timeout) && rtq_reallyold > rtq_minreallyold) { rtq_reallyold = 2*rtq_reallyold / 3; if (rtq_reallyold < rtq_minreallyold) { rtq_reallyold = rtq_minreallyold; } last_adjusted_timeout = time_second; #ifdef DIAGNOSTIC log(LOG_DEBUG, "in6_rtqtimo: adjusted rtq_reallyold to %d", rtq_reallyold); #endif arg.found = arg.killed = 0; arg.updating = 1; - s = splnet(); RADIX_NODE_HEAD_LOCK(rnh); rnh->rnh_walktree(rnh, in6_rtqkill, &arg); RADIX_NODE_HEAD_UNLOCK(rnh); - splx(s); } atv.tv_usec = 0; atv.tv_sec = arg.nextstop; - timeout(in6_rtqtimo, rock, tvtohz(&atv)); + callout_reset(&rtq_timer, tvtohz(&atv), in6_rtqtimo, rock); } /* * Age old PMTUs. */ struct mtuex_arg { struct radix_node_head *rnh; time_t nextstop; }; +static struct callout rtq_mtutimer; static int in6_mtuexpire(struct radix_node *rn, void *rock) { struct rtentry *rt = (struct rtentry *)rn; struct mtuex_arg *ap = rock; /* sanity */ if (!rt) panic("rt == NULL in in6_mtuexpire"); if (rt->rt_rmx.rmx_expire && !(rt->rt_flags & RTF_PROBEMTU)) { if (rt->rt_rmx.rmx_expire <= time_second) { rt->rt_flags |= RTF_PROBEMTU; } else { ap->nextstop = lmin(ap->nextstop, rt->rt_rmx.rmx_expire); } } return 0; } #define MTUTIMO_DEFAULT (60*1) static void in6_mtutimo(void *rock) { struct radix_node_head *rnh = rock; struct mtuex_arg arg; struct timeval atv; - int s; arg.rnh = rnh; arg.nextstop = time_second + MTUTIMO_DEFAULT; - s = splnet(); RADIX_NODE_HEAD_LOCK(rnh); rnh->rnh_walktree(rnh, in6_mtuexpire, &arg); RADIX_NODE_HEAD_UNLOCK(rnh); - splx(s); atv.tv_usec = 0; atv.tv_sec = arg.nextstop; if (atv.tv_sec < time_second) { printf("invalid mtu expiration time on routing table\n"); arg.nextstop = time_second + 30; /* last resort */ } - timeout(in6_mtutimo, rock, tvtohz(&atv)); + callout_reset(&rtq_mtutimer, tvtohz(&atv), in6_mtutimo, rock); } #if 0 void in6_rtqdrain() { struct radix_node_head *rnh = rt_tables[AF_INET6]; struct rtqk_arg arg; - int s; + arg.found = arg.killed = 0; arg.rnh = rnh; arg.nextstop = 0; arg.draining = 1; arg.updating = 0; - s = splnet(); RADIX_NODE_HEAD_LOCK(rnh); rnh->rnh_walktree(rnh, in6_rtqkill, &arg); RADIX_NODE_HEAD_UNLOCK(rnh); - splx(s); } #endif /* * Initialize our routing tree. */ int in6_inithead(void **head, int off) { struct radix_node_head *rnh; if (!rn_inithead(head, off)) return 0; if (head != (void **)&rt_tables[AF_INET6]) /* BOGUS! */ return 1; /* only do this for the real routing table */ rnh = *head; rnh->rnh_addaddr = in6_addroute; rnh->rnh_matchaddr = in6_matroute; rnh->rnh_close = in6_clsroute; + callout_init(&rtq_timer, CALLOUT_MPSAFE); in6_rtqtimo(rnh); /* kick off timeout first time */ + callout_init(&rtq_mtutimer, CALLOUT_MPSAFE); in6_mtutimo(rnh); /* kick off timeout first time */ return 1; } Index: head/sys/netinet6/in6_src.c =================================================================== --- head/sys/netinet6/in6_src.c (revision 120726) +++ head/sys/netinet6/in6_src.c (revision 120727) @@ -1,558 +1,559 @@ /* $FreeBSD$ */ /* $KAME: in6_src.c,v 1.37 2001/03/29 05:34:31 itojun Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94 */ #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef ENABLE_DEFAULT_SCOPE #include #endif #include /* * Return an IPv6 address, which is the most appropriate for a given * destination and user specified options. * If necessary, this function lookups the routing table and returns * an entry to the caller for later use. */ struct in6_addr * in6_selectsrc(dstsock, opts, mopts, ro, laddr, errorp) struct sockaddr_in6 *dstsock; struct ip6_pktopts *opts; struct ip6_moptions *mopts; struct route_in6 *ro; struct in6_addr *laddr; int *errorp; { struct in6_addr *dst; struct in6_ifaddr *ia6 = 0; struct in6_pktinfo *pi = NULL; dst = &dstsock->sin6_addr; *errorp = 0; /* * If the source address is explicitly specified by the caller, * use it. */ if (opts && (pi = opts->ip6po_pktinfo) && !IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr)) return(&pi->ipi6_addr); /* * If the source address is not specified but the socket(if any) * is already bound, use the bound address. */ if (laddr && !IN6_IS_ADDR_UNSPECIFIED(laddr)) return(laddr); /* * If the caller doesn't specify the source address but * the outgoing interface, use an address associated with * the interface. */ if (pi && pi->ipi6_ifindex) { /* XXX boundary check is assumed to be already done. */ ia6 = in6_ifawithscope(ifnet_byindex(pi->ipi6_ifindex), dst); if (ia6 == 0) { *errorp = EADDRNOTAVAIL; return(0); } return(&satosin6(&ia6->ia_addr)->sin6_addr); } /* * If the destination address is a link-local unicast address or * a multicast address, and if the outgoing interface is specified * by the sin6_scope_id filed, use an address associated with the * interface. * XXX: We're now trying to define more specific semantics of * sin6_scope_id field, so this part will be rewritten in * the near future. */ if ((IN6_IS_ADDR_LINKLOCAL(dst) || IN6_IS_ADDR_MULTICAST(dst)) && dstsock->sin6_scope_id) { /* * I'm not sure if boundary check for scope_id is done * somewhere... */ if (dstsock->sin6_scope_id < 0 || if_index < dstsock->sin6_scope_id) { *errorp = ENXIO; /* XXX: better error? */ return(0); } ia6 = in6_ifawithscope(ifnet_byindex(dstsock->sin6_scope_id), dst); if (ia6 == 0) { *errorp = EADDRNOTAVAIL; return(0); } return(&satosin6(&ia6->ia_addr)->sin6_addr); } /* * If the destination address is a multicast address and * the outgoing interface for the address is specified * by the caller, use an address associated with the interface. * There is a sanity check here; if the destination has node-local * scope, the outgoing interfacde should be a loopback address. * Even if the outgoing interface is not specified, we also * choose a loopback interface as the outgoing interface. */ if (IN6_IS_ADDR_MULTICAST(dst)) { struct ifnet *ifp = mopts ? mopts->im6o_multicast_ifp : NULL; if (ifp == NULL && IN6_IS_ADDR_MC_NODELOCAL(dst)) { ifp = &loif[0]; } if (ifp) { ia6 = in6_ifawithscope(ifp, dst); if (ia6 == 0) { *errorp = EADDRNOTAVAIL; return(0); } return(&satosin6(&ia6->ia_addr)->sin6_addr); } } /* * If the next hop address for the packet is specified * by caller, use an address associated with the route * to the next hop. */ { struct sockaddr_in6 *sin6_next; struct rtentry *rt; if (opts && opts->ip6po_nexthop) { sin6_next = satosin6(opts->ip6po_nexthop); rt = nd6_lookup(&sin6_next->sin6_addr, 1, NULL); if (rt) { ia6 = in6_ifawithscope(rt->rt_ifp, dst); if (ia6 == 0) ia6 = ifatoia6(rt->rt_ifa); } if (ia6 == 0) { *errorp = EADDRNOTAVAIL; return(0); } return(&satosin6(&ia6->ia_addr)->sin6_addr); } } /* * If route is known or can be allocated now, * our src addr is taken from the i/f, else punt. */ if (ro) { if (ro->ro_rt && (!(ro->ro_rt->rt_flags & RTF_UP) || satosin6(&ro->ro_dst)->sin6_family != AF_INET6 || !IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr, dst))) { RTFREE(ro->ro_rt); ro->ro_rt = (struct rtentry *)0; } if (ro->ro_rt == (struct rtentry *)0 || ro->ro_rt->rt_ifp == (struct ifnet *)0) { struct sockaddr_in6 *sa6; /* No route yet, so try to acquire one */ bzero(&ro->ro_dst, sizeof(struct sockaddr_in6)); sa6 = (struct sockaddr_in6 *)&ro->ro_dst; sa6->sin6_family = AF_INET6; sa6->sin6_len = sizeof(struct sockaddr_in6); sa6->sin6_addr = *dst; sa6->sin6_scope_id = dstsock->sin6_scope_id; if (IN6_IS_ADDR_MULTICAST(dst)) { ro->ro_rt = rtalloc1(&((struct route *)ro) ->ro_dst, 0, 0UL); + RT_UNLOCK(ro->ro_rt); } else { rtalloc((struct route *)ro); } } /* * in_pcbconnect() checks out IFF_LOOPBACK to skip using * the address. But we don't know why it does so. * It is necessary to ensure the scope even for lo0 * so doesn't check out IFF_LOOPBACK. */ if (ro->ro_rt) { ia6 = in6_ifawithscope(ro->ro_rt->rt_ifa->ifa_ifp, dst); if (ia6 == 0) /* xxx scope error ?*/ ia6 = ifatoia6(ro->ro_rt->rt_ifa); } #if 0 /* * xxx The followings are necessary? (kazu) * I don't think so. * It's for SO_DONTROUTE option in IPv4.(jinmei) */ if (ia6 == 0) { struct sockaddr_in6 sin6 = {sizeof(sin6), AF_INET6, 0}; sin6->sin6_addr = *dst; ia6 = ifatoia6(ifa_ifwithdstaddr(sin6tosa(&sin6))); if (ia6 == 0) ia6 = ifatoia6(ifa_ifwithnet(sin6tosa(&sin6))); if (ia6 == 0) return(0); return(&satosin6(&ia6->ia_addr)->sin6_addr); } #endif /* 0 */ if (ia6 == 0) { *errorp = EHOSTUNREACH; /* no route */ return(0); } return(&satosin6(&ia6->ia_addr)->sin6_addr); } *errorp = EADDRNOTAVAIL; return(0); } /* * Default hop limit selection. The precedence is as follows: * 1. Hoplimit value specified via ioctl. * 2. (If the outgoing interface is detected) the current * hop limit of the interface specified by router advertisement. * 3. The system default hoplimit. */ int in6_selecthlim(in6p, ifp) struct in6pcb *in6p; struct ifnet *ifp; { if (in6p && in6p->in6p_hops >= 0) return(in6p->in6p_hops); else if (ifp) return(nd_ifinfo[ifp->if_index].chlim); else return(ip6_defhlim); } /* * XXX: this is borrowed from in6_pcbbind(). If possible, we should * share this function by all *bsd*... */ int in6_pcbsetport(laddr, inp, td) struct in6_addr *laddr; struct inpcb *inp; struct thread *td; { struct socket *so = inp->inp_socket; u_int16_t lport = 0, first, last, *lastport; int count, error = 0, wild = 0; struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; /* XXX: this is redundant when called from in6_pcbbind */ if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) wild = INPLOOKUP_WILDCARD; inp->inp_flags |= INP_ANONPORT; if (inp->inp_flags & INP_HIGHPORT) { first = ipport_hifirstauto; /* sysctl */ last = ipport_hilastauto; lastport = &pcbinfo->lasthi; } else if (inp->inp_flags & INP_LOWPORT) { if (td && (error = suser(td))) return error; first = ipport_lowfirstauto; /* 1023 */ last = ipport_lowlastauto; /* 600 */ lastport = &pcbinfo->lastlow; } else { first = ipport_firstauto; /* sysctl */ last = ipport_lastauto; lastport = &pcbinfo->lastport; } /* * Simple check to ensure all ports are not used up causing * a deadlock here. * * We split the two cases (up and down) so that the direction * is not being tested on each round of the loop. */ if (first > last) { /* * counting down */ count = first - last; do { if (count-- < 0) { /* completely used? */ /* * Undo any address bind that may have * occurred above. */ inp->in6p_laddr = in6addr_any; return (EAGAIN); } --*lastport; if (*lastport > first || *lastport < last) *lastport = first; lport = htons(*lastport); } while (in6_pcblookup_local(pcbinfo, &inp->in6p_laddr, lport, wild)); } else { /* * counting up */ count = last - first; do { if (count-- < 0) { /* completely used? */ /* * Undo any address bind that may have * occurred above. */ inp->in6p_laddr = in6addr_any; return (EAGAIN); } ++*lastport; if (*lastport < first || *lastport > last) *lastport = first; lport = htons(*lastport); } while (in6_pcblookup_local(pcbinfo, &inp->in6p_laddr, lport, wild)); } inp->inp_lport = lport; if (in_pcbinshash(inp) != 0) { inp->in6p_laddr = in6addr_any; inp->inp_lport = 0; return (EAGAIN); } return(0); } /* * generate kernel-internal form (scopeid embedded into s6_addr16[1]). * If the address scope of is link-local, embed the interface index in the * address. The routine determines our precedence * between advanced API scope/interface specification and basic API * specification. * * this function should be nuked in the future, when we get rid of * embedded scopeid thing. * * XXX actually, it is over-specification to return ifp against sin6_scope_id. * there can be multiple interfaces that belong to a particular scope zone * (in specification, we have 1:N mapping between a scope zone and interfaces). * we may want to change the function to return something other than ifp. */ int in6_embedscope(in6, sin6, in6p, ifpp) struct in6_addr *in6; const struct sockaddr_in6 *sin6; #ifdef HAVE_NRL_INPCB struct inpcb *in6p; #define in6p_outputopts inp_outputopts6 #define in6p_moptions inp_moptions6 #else struct in6pcb *in6p; #endif struct ifnet **ifpp; { struct ifnet *ifp = NULL; u_int32_t scopeid; *in6 = sin6->sin6_addr; scopeid = sin6->sin6_scope_id; if (ifpp) *ifpp = NULL; /* * don't try to read sin6->sin6_addr beyond here, since the caller may * ask us to overwrite existing sockaddr_in6 */ #ifdef ENABLE_DEFAULT_SCOPE if (scopeid == 0) scopeid = scope6_addr2default(in6); #endif if (IN6_IS_SCOPE_LINKLOCAL(in6)) { struct in6_pktinfo *pi; /* * KAME assumption: link id == interface id */ if (in6p && in6p->in6p_outputopts && (pi = in6p->in6p_outputopts->ip6po_pktinfo) && pi->ipi6_ifindex) { ifp = ifnet_byindex(pi->ipi6_ifindex); in6->s6_addr16[1] = htons(pi->ipi6_ifindex); } else if (in6p && IN6_IS_ADDR_MULTICAST(in6) && in6p->in6p_moptions && in6p->in6p_moptions->im6o_multicast_ifp) { ifp = in6p->in6p_moptions->im6o_multicast_ifp; in6->s6_addr16[1] = htons(ifp->if_index); } else if (scopeid) { /* boundary check */ if (scopeid < 0 || if_index < scopeid) return ENXIO; /* XXX EINVAL? */ ifp = ifnet_byindex(scopeid); /*XXX assignment to 16bit from 32bit variable */ in6->s6_addr16[1] = htons(scopeid & 0xffff); } if (ifpp) *ifpp = ifp; } return 0; } #ifdef HAVE_NRL_INPCB #undef in6p_outputopts #undef in6p_moptions #endif /* * generate standard sockaddr_in6 from embedded form. * touches sin6_addr and sin6_scope_id only. * * this function should be nuked in the future, when we get rid of * embedded scopeid thing. */ int in6_recoverscope(sin6, in6, ifp) struct sockaddr_in6 *sin6; const struct in6_addr *in6; struct ifnet *ifp; { u_int32_t scopeid; sin6->sin6_addr = *in6; /* * don't try to read *in6 beyond here, since the caller may * ask us to overwrite existing sockaddr_in6 */ sin6->sin6_scope_id = 0; if (IN6_IS_SCOPE_LINKLOCAL(in6)) { /* * KAME assumption: link id == interface id */ scopeid = ntohs(sin6->sin6_addr.s6_addr16[1]); if (scopeid) { /* sanity check */ if (scopeid < 0 || if_index < scopeid) return ENXIO; if (ifp && ifp->if_index != scopeid) return ENXIO; sin6->sin6_addr.s6_addr16[1] = 0; sin6->sin6_scope_id = scopeid; } } return 0; } /* * just clear the embedded scope identifer. * XXX: currently used for bsdi4 only as a supplement function. */ void in6_clearscope(addr) struct in6_addr *addr; { if (IN6_IS_SCOPE_LINKLOCAL(addr)) addr->s6_addr16[1] = 0; } Index: head/sys/netinet6/ip6_output.c =================================================================== --- head/sys/netinet6/ip6_output.c (revision 120726) +++ head/sys/netinet6/ip6_output.c (revision 120727) @@ -1,2604 +1,2605 @@ /* $FreeBSD$ */ /* $KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Copyright (c) 1982, 1986, 1988, 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 */ #include "opt_ip6fw.h" #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include "opt_pfil_hooks.h" #include "opt_random_ip_id.h" #include #include #include #include #include #include #include #include #include #include #include #include #ifdef PFIL_HOOKS #include #endif #include #include #include #include #include #include #include #include #ifdef IPSEC #include #ifdef INET6 #include #endif #include #endif /* IPSEC */ #ifdef FAST_IPSEC #include #include #include #endif /* FAST_IPSEC */ #include #include #include static MALLOC_DEFINE(M_IPMOPTS, "ip6_moptions", "internet multicast options"); struct ip6_exthdrs { struct mbuf *ip6e_ip6; struct mbuf *ip6e_hbh; struct mbuf *ip6e_dest1; struct mbuf *ip6e_rthdr; struct mbuf *ip6e_dest2; }; static int ip6_pcbopts __P((struct ip6_pktopts **, struct mbuf *, struct socket *, struct sockopt *sopt)); static int ip6_setmoptions __P((int, struct ip6_moptions **, struct mbuf *)); static int ip6_getmoptions __P((int, struct ip6_moptions *, struct mbuf **)); static int ip6_copyexthdr __P((struct mbuf **, caddr_t, int)); static int ip6_insertfraghdr __P((struct mbuf *, struct mbuf *, int, struct ip6_frag **)); static int ip6_insert_jumboopt __P((struct ip6_exthdrs *, u_int32_t)); static int ip6_splithdr __P((struct mbuf *, struct ip6_exthdrs *)); /* * IP6 output. The packet in mbuf chain m contains a skeletal IP6 * header (with pri, len, nxt, hlim, src, dst). * This function may modify ver and hlim only. * The mbuf chain containing the packet will be freed. * The mbuf opt, if present, will not be freed. * * type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and * nd_ifinfo.linkmtu is u_int32_t. so we use u_long to hold largest one, * which is rt_rmx.rmx_mtu. */ int ip6_output(m0, opt, ro, flags, im6o, ifpp, inp) struct mbuf *m0; struct ip6_pktopts *opt; struct route_in6 *ro; int flags; struct ip6_moptions *im6o; struct ifnet **ifpp; /* XXX: just for statistics */ struct inpcb *inp; { struct ip6_hdr *ip6, *mhip6; struct ifnet *ifp, *origifp; struct mbuf *m = m0; int hlen, tlen, len, off; struct route_in6 ip6route; struct sockaddr_in6 *dst; int error = 0; struct in6_ifaddr *ia = NULL; u_long mtu; u_int32_t optlen = 0, plen = 0, unfragpartlen = 0; struct ip6_exthdrs exthdrs; struct in6_addr finaldst; struct route_in6 *ro_pmtu = NULL; int hdrsplit = 0; int needipsec = 0; #ifdef IPSEC int needipsectun = 0; struct secpolicy *sp = NULL; ip6 = mtod(m, struct ip6_hdr *); #endif /* IPSEC */ #ifdef FAST_IPSEC int needipsectun = 0; struct secpolicy *sp = NULL; ip6 = mtod(m, struct ip6_hdr *); #endif /* FAST_IPSEC */ #define MAKE_EXTHDR(hp, mp) \ do { \ if (hp) { \ struct ip6_ext *eh = (struct ip6_ext *)(hp); \ error = ip6_copyexthdr((mp), (caddr_t)(hp), \ ((eh)->ip6e_len + 1) << 3); \ if (error) \ goto freehdrs; \ } \ } while (0) bzero(&exthdrs, sizeof(exthdrs)); if (opt) { /* Hop-by-Hop options header */ MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh); /* Destination options header(1st part) */ MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1); /* Routing header */ MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr); /* Destination options header(2nd part) */ MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2); } #ifdef IPSEC /* get a security policy for this packet */ if (inp == NULL) sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error); else sp = ipsec6_getpolicybypcb(m, IPSEC_DIR_OUTBOUND, inp, &error); if (sp == NULL) { ipsec6stat.out_inval++; goto freehdrs; } error = 0; /* check policy */ switch (sp->policy) { case IPSEC_POLICY_DISCARD: /* * This packet is just discarded. */ ipsec6stat.out_polvio++; goto freehdrs; case IPSEC_POLICY_BYPASS: case IPSEC_POLICY_NONE: /* no need to do IPsec. */ needipsec = 0; break; case IPSEC_POLICY_IPSEC: if (sp->req == NULL) { /* acquire a policy */ error = key_spdacquire(sp); goto freehdrs; } needipsec = 1; break; case IPSEC_POLICY_ENTRUST: default: printf("ip6_output: Invalid policy found. %d\n", sp->policy); } #endif /* IPSEC */ #ifdef FAST_IPSEC /* get a security policy for this packet */ if (inp == NULL) sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error); else sp = ipsec_getpolicybysock(m, IPSEC_DIR_OUTBOUND, inp, &error); if (sp == NULL) { newipsecstat.ips_out_inval++; goto freehdrs; } error = 0; /* check policy */ switch (sp->policy) { case IPSEC_POLICY_DISCARD: /* * This packet is just discarded. */ newipsecstat.ips_out_polvio++; goto freehdrs; case IPSEC_POLICY_BYPASS: case IPSEC_POLICY_NONE: /* no need to do IPsec. */ needipsec = 0; break; case IPSEC_POLICY_IPSEC: if (sp->req == NULL) { /* acquire a policy */ error = key_spdacquire(sp); goto freehdrs; } needipsec = 1; break; case IPSEC_POLICY_ENTRUST: default: printf("ip6_output: Invalid policy found. %d\n", sp->policy); } #endif /* FAST_IPSEC */ /* * Calculate the total length of the extension header chain. * Keep the length of the unfragmentable part for fragmentation. */ optlen = 0; if (exthdrs.ip6e_hbh) optlen += exthdrs.ip6e_hbh->m_len; if (exthdrs.ip6e_dest1) optlen += exthdrs.ip6e_dest1->m_len; if (exthdrs.ip6e_rthdr) optlen += exthdrs.ip6e_rthdr->m_len; unfragpartlen = optlen + sizeof(struct ip6_hdr); /* NOTE: we don't add AH/ESP length here. do that later. */ if (exthdrs.ip6e_dest2) optlen += exthdrs.ip6e_dest2->m_len; /* * If we need IPsec, or there is at least one extension header, * separate IP6 header from the payload. */ if ((needipsec || optlen) && !hdrsplit) { if ((error = ip6_splithdr(m, &exthdrs)) != 0) { m = NULL; goto freehdrs; } m = exthdrs.ip6e_ip6; hdrsplit++; } /* adjust pointer */ ip6 = mtod(m, struct ip6_hdr *); /* adjust mbuf packet header length */ m->m_pkthdr.len += optlen; plen = m->m_pkthdr.len - sizeof(*ip6); /* If this is a jumbo payload, insert a jumbo payload option. */ if (plen > IPV6_MAXPACKET) { if (!hdrsplit) { if ((error = ip6_splithdr(m, &exthdrs)) != 0) { m = NULL; goto freehdrs; } m = exthdrs.ip6e_ip6; hdrsplit++; } /* adjust pointer */ ip6 = mtod(m, struct ip6_hdr *); if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0) goto freehdrs; ip6->ip6_plen = 0; } else ip6->ip6_plen = htons(plen); /* * Concatenate headers and fill in next header fields. * Here we have, on "m" * IPv6 payload * and we insert headers accordingly. Finally, we should be getting: * IPv6 hbh dest1 rthdr ah* [esp* dest2 payload] * * during the header composing process, "m" points to IPv6 header. * "mprev" points to an extension header prior to esp. */ { u_char *nexthdrp = &ip6->ip6_nxt; struct mbuf *mprev = m; /* * we treat dest2 specially. this makes IPsec processing * much easier. the goal here is to make mprev point the * mbuf prior to dest2. * * result: IPv6 dest2 payload * m and mprev will point to IPv6 header. */ if (exthdrs.ip6e_dest2) { if (!hdrsplit) panic("assumption failed: hdr not split"); exthdrs.ip6e_dest2->m_next = m->m_next; m->m_next = exthdrs.ip6e_dest2; *mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt; ip6->ip6_nxt = IPPROTO_DSTOPTS; } #define MAKE_CHAIN(m, mp, p, i)\ do {\ if (m) {\ if (!hdrsplit) \ panic("assumption failed: hdr not split"); \ *mtod((m), u_char *) = *(p);\ *(p) = (i);\ p = mtod((m), u_char *);\ (m)->m_next = (mp)->m_next;\ (mp)->m_next = (m);\ (mp) = (m);\ }\ } while (0) /* * result: IPv6 hbh dest1 rthdr dest2 payload * m will point to IPv6 header. mprev will point to the * extension header prior to dest2 (rthdr in the above case). */ MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS); MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp, IPPROTO_DSTOPTS); MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp, IPPROTO_ROUTING); #if defined(IPSEC) || defined(FAST_IPSEC) if (!needipsec) goto skip_ipsec2; /* * pointers after IPsec headers are not valid any more. * other pointers need a great care too. * (IPsec routines should not mangle mbufs prior to AH/ESP) */ exthdrs.ip6e_dest2 = NULL; { struct ip6_rthdr *rh = NULL; int segleft_org = 0; struct ipsec_output_state state; if (exthdrs.ip6e_rthdr) { rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *); segleft_org = rh->ip6r_segleft; rh->ip6r_segleft = 0; } bzero(&state, sizeof(state)); state.m = m; error = ipsec6_output_trans(&state, nexthdrp, mprev, sp, flags, &needipsectun); m = state.m; if (error) { /* mbuf is already reclaimed in ipsec6_output_trans. */ m = NULL; switch (error) { case EHOSTUNREACH: case ENETUNREACH: case EMSGSIZE: case ENOBUFS: case ENOMEM: break; default: printf("ip6_output (ipsec): error code %d\n", error); /* fall through */ case ENOENT: /* don't show these error codes to the user */ error = 0; break; } goto bad; } if (exthdrs.ip6e_rthdr) { /* ah6_output doesn't modify mbuf chain */ rh->ip6r_segleft = segleft_org; } } skip_ipsec2:; #endif } /* * If there is a routing header, replace destination address field * with the first hop of the routing header. */ if (exthdrs.ip6e_rthdr) { struct ip6_rthdr *rh = (struct ip6_rthdr *)(mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *)); struct ip6_rthdr0 *rh0; finaldst = ip6->ip6_dst; switch (rh->ip6r_type) { case IPV6_RTHDR_TYPE_0: rh0 = (struct ip6_rthdr0 *)rh; ip6->ip6_dst = rh0->ip6r0_addr[0]; bcopy((caddr_t)&rh0->ip6r0_addr[1], (caddr_t)&rh0->ip6r0_addr[0], sizeof(struct in6_addr)*(rh0->ip6r0_segleft - 1) ); rh0->ip6r0_addr[rh0->ip6r0_segleft - 1] = finaldst; break; default: /* is it possible? */ error = EINVAL; goto bad; } } /* Source address validation */ if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) && (flags & IPV6_DADOUTPUT) == 0) { error = EOPNOTSUPP; ip6stat.ip6s_badscope++; goto bad; } if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) { error = EOPNOTSUPP; ip6stat.ip6s_badscope++; goto bad; } ip6stat.ip6s_localout++; /* * Route packet. */ if (ro == 0) { ro = &ip6route; bzero((caddr_t)ro, sizeof(*ro)); } ro_pmtu = ro; if (opt && opt->ip6po_rthdr) ro = &opt->ip6po_route; dst = (struct sockaddr_in6 *)&ro->ro_dst; /* * If there is a cached route, * check that it is to the same destination * and is still up. If not, free it and try again. */ if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 || dst->sin6_family != AF_INET6 || !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_dst))) { RTFREE(ro->ro_rt); ro->ro_rt = (struct rtentry *)0; } if (ro->ro_rt == 0) { bzero(dst, sizeof(*dst)); dst->sin6_family = AF_INET6; dst->sin6_len = sizeof(struct sockaddr_in6); dst->sin6_addr = ip6->ip6_dst; #ifdef SCOPEDROUTING /* XXX: sin6_scope_id should already be fixed at this point */ if (IN6_IS_SCOPE_LINKLOCAL(&dst->sin6_addr)) dst->sin6_scope_id = ntohs(dst->sin6_addr.s6_addr16[1]); #endif } #if defined(IPSEC) || defined(FAST_IPSEC) if (needipsec && needipsectun) { struct ipsec_output_state state; /* * All the extension headers will become inaccessible * (since they can be encrypted). * Don't panic, we need no more updates to extension headers * on inner IPv6 packet (since they are now encapsulated). * * IPv6 [ESP|AH] IPv6 [extension headers] payload */ bzero(&exthdrs, sizeof(exthdrs)); exthdrs.ip6e_ip6 = m; bzero(&state, sizeof(state)); state.m = m; state.ro = (struct route *)ro; state.dst = (struct sockaddr *)dst; error = ipsec6_output_tunnel(&state, sp, flags); m = state.m; ro = (struct route_in6 *)state.ro; dst = (struct sockaddr_in6 *)state.dst; if (error) { /* mbuf is already reclaimed in ipsec6_output_tunnel. */ m0 = m = NULL; m = NULL; switch (error) { case EHOSTUNREACH: case ENETUNREACH: case EMSGSIZE: case ENOBUFS: case ENOMEM: break; default: printf("ip6_output (ipsec): error code %d\n", error); /* fall through */ case ENOENT: /* don't show these error codes to the user */ error = 0; break; } goto bad; } exthdrs.ip6e_ip6 = m; } #endif /* IPSEC */ if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { /* Unicast */ #define ifatoia6(ifa) ((struct in6_ifaddr *)(ifa)) #define sin6tosa(sin6) ((struct sockaddr *)(sin6)) /* xxx * interface selection comes here * if an interface is specified from an upper layer, * ifp must point it. */ if (ro->ro_rt == 0) { /* * non-bsdi always clone routes, if parent is * PRF_CLONING. */ rtalloc((struct route *)ro); } if (ro->ro_rt == 0) { ip6stat.ip6s_noroute++; error = EHOSTUNREACH; /* XXX in6_ifstat_inc(ifp, ifs6_out_discard); */ goto bad; } ia = ifatoia6(ro->ro_rt->rt_ifa); ifp = ro->ro_rt->rt_ifp; ro->ro_rt->rt_use++; if (ro->ro_rt->rt_flags & RTF_GATEWAY) dst = (struct sockaddr_in6 *)ro->ro_rt->rt_gateway; m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */ in6_ifstat_inc(ifp, ifs6_out_request); /* * Check if the outgoing interface conflicts with * the interface specified by ifi6_ifindex (if specified). * Note that loopback interface is always okay. * (this may happen when we are sending a packet to one of * our own addresses.) */ if (opt && opt->ip6po_pktinfo && opt->ip6po_pktinfo->ipi6_ifindex) { if (!(ifp->if_flags & IFF_LOOPBACK) && ifp->if_index != opt->ip6po_pktinfo->ipi6_ifindex) { ip6stat.ip6s_noroute++; in6_ifstat_inc(ifp, ifs6_out_discard); error = EHOSTUNREACH; goto bad; } } if (opt && opt->ip6po_hlim != -1) ip6->ip6_hlim = opt->ip6po_hlim & 0xff; } else { /* Multicast */ struct in6_multi *in6m; m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST; /* * See if the caller provided any multicast options */ ifp = NULL; if (im6o != NULL) { ip6->ip6_hlim = im6o->im6o_multicast_hlim; if (im6o->im6o_multicast_ifp != NULL) ifp = im6o->im6o_multicast_ifp; } else ip6->ip6_hlim = ip6_defmcasthlim; /* * See if the caller provided the outgoing interface * as an ancillary data. * Boundary check for ifindex is assumed to be already done. */ if (opt && opt->ip6po_pktinfo && opt->ip6po_pktinfo->ipi6_ifindex) ifp = ifnet_byindex(opt->ip6po_pktinfo->ipi6_ifindex); /* * If the destination is a node-local scope multicast, * the packet should be loop-backed only. */ if (IN6_IS_ADDR_MC_NODELOCAL(&ip6->ip6_dst)) { /* * If the outgoing interface is already specified, * it should be a loopback interface. */ if (ifp && (ifp->if_flags & IFF_LOOPBACK) == 0) { ip6stat.ip6s_badscope++; error = ENETUNREACH; /* XXX: better error? */ /* XXX correct ifp? */ in6_ifstat_inc(ifp, ifs6_out_discard); goto bad; } else { ifp = &loif[0]; } } if (opt && opt->ip6po_hlim != -1) ip6->ip6_hlim = opt->ip6po_hlim & 0xff; /* * If caller did not provide an interface lookup a * default in the routing table. This is either a * default for the speicfied group (i.e. a host * route), or a multicast default (a route for the * ``net'' ff00::/8). */ if (ifp == NULL) { if (ro->ro_rt == 0) { ro->ro_rt = rtalloc1((struct sockaddr *) &ro->ro_dst, 0, 0UL); } if (ro->ro_rt == 0) { ip6stat.ip6s_noroute++; error = EHOSTUNREACH; /* XXX in6_ifstat_inc(ifp, ifs6_out_discard) */ goto bad; } ia = ifatoia6(ro->ro_rt->rt_ifa); ifp = ro->ro_rt->rt_ifp; ro->ro_rt->rt_use++; + RT_UNLOCK(ro->ro_rt); } if ((flags & IPV6_FORWARDING) == 0) in6_ifstat_inc(ifp, ifs6_out_request); in6_ifstat_inc(ifp, ifs6_out_mcast); /* * Confirm that the outgoing interface supports multicast. */ if ((ifp->if_flags & IFF_MULTICAST) == 0) { ip6stat.ip6s_noroute++; in6_ifstat_inc(ifp, ifs6_out_discard); error = ENETUNREACH; goto bad; } IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m); if (in6m != NULL && (im6o == NULL || im6o->im6o_multicast_loop)) { /* * If we belong to the destination multicast group * on the outgoing interface, and the caller did not * forbid loopback, loop back a copy. */ ip6_mloopback(ifp, m, dst); } else { /* * If we are acting as a multicast router, perform * multicast forwarding as if the packet had just * arrived on the interface to which we are about * to send. The multicast forwarding function * recursively calls this function, using the * IPV6_FORWARDING flag to prevent infinite recursion. * * Multicasts that are looped back by ip6_mloopback(), * above, will be forwarded by the ip6_input() routine, * if necessary. */ if (ip6_mrouter && (flags & IPV6_FORWARDING) == 0) { if (ip6_mforward(ip6, ifp, m) != 0) { m_freem(m); goto done; } } } /* * Multicasts with a hoplimit of zero may be looped back, * above, but must not be transmitted on a network. * Also, multicasts addressed to the loopback interface * are not sent -- the above call to ip6_mloopback() will * loop back a copy if this host actually belongs to the * destination group on the loopback interface. */ if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK)) { m_freem(m); goto done; } } /* * Fill the outgoing inteface to tell the upper layer * to increment per-interface statistics. */ if (ifpp) *ifpp = ifp; /* * Determine path MTU. */ if (ro_pmtu != ro) { /* The first hop and the final destination may differ. */ struct sockaddr_in6 *sin6_fin = (struct sockaddr_in6 *)&ro_pmtu->ro_dst; if (ro_pmtu->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 || !IN6_ARE_ADDR_EQUAL(&sin6_fin->sin6_addr, &finaldst))) { RTFREE(ro_pmtu->ro_rt); ro_pmtu->ro_rt = (struct rtentry *)0; } if (ro_pmtu->ro_rt == 0) { bzero(sin6_fin, sizeof(*sin6_fin)); sin6_fin->sin6_family = AF_INET6; sin6_fin->sin6_len = sizeof(struct sockaddr_in6); sin6_fin->sin6_addr = finaldst; rtalloc((struct route *)ro_pmtu); } } if (ro_pmtu->ro_rt != NULL) { u_int32_t ifmtu = nd_ifinfo[ifp->if_index].linkmtu; mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu; if (mtu > ifmtu || mtu == 0) { /* * The MTU on the route is larger than the MTU on * the interface! This shouldn't happen, unless the * MTU of the interface has been changed after the * interface was brought up. Change the MTU in the * route to match the interface MTU (as long as the * field isn't locked). * * if MTU on the route is 0, we need to fix the MTU. * this case happens with path MTU discovery timeouts. */ mtu = ifmtu; if ((ro_pmtu->ro_rt->rt_rmx.rmx_locks & RTV_MTU) == 0) ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu; /* XXX */ } } else { mtu = nd_ifinfo[ifp->if_index].linkmtu; } /* * advanced API (IPV6_USE_MIN_MTU) overrides mtu setting */ if ((flags & IPV6_MINMTU) != 0 && mtu > IPV6_MMTU) mtu = IPV6_MMTU; /* Fake scoped addresses */ if ((ifp->if_flags & IFF_LOOPBACK) != 0) { /* * If source or destination address is a scoped address, and * the packet is going to be sent to a loopback interface, * we should keep the original interface. */ /* * XXX: this is a very experimental and temporary solution. * We eventually have sockaddr_in6 and use the sin6_scope_id * field of the structure here. * We rely on the consistency between two scope zone ids * of source and destination, which should already be assured. * Larger scopes than link will be supported in the future. */ origifp = NULL; if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) origifp = ifnet_byindex(ntohs(ip6->ip6_src.s6_addr16[1])); else if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) origifp = ifnet_byindex(ntohs(ip6->ip6_dst.s6_addr16[1])); /* * XXX: origifp can be NULL even in those two cases above. * For example, if we remove the (only) link-local address * from the loopback interface, and try to send a link-local * address without link-id information. Then the source * address is ::1, and the destination address is the * link-local address with its s6_addr16[1] being zero. * What is worse, if the packet goes to the loopback interface * by a default rejected route, the null pointer would be * passed to looutput, and the kernel would hang. * The following last resort would prevent such disaster. */ if (origifp == NULL) origifp = ifp; } else origifp = ifp; #ifndef SCOPEDROUTING /* * clear embedded scope identifiers if necessary. * in6_clearscope will touch the addresses only when necessary. */ in6_clearscope(&ip6->ip6_src); in6_clearscope(&ip6->ip6_dst); #endif /* * Check with the firewall... */ if (ip6_fw_enable && ip6_fw_chk_ptr) { u_short port = 0; m->m_pkthdr.rcvif = NULL; /* XXX */ /* If ipfw says divert, we have to just drop packet */ if ((*ip6_fw_chk_ptr)(&ip6, ifp, &port, &m)) { m_freem(m); goto done; } if (!m) { error = EACCES; goto done; } } /* * If the outgoing packet contains a hop-by-hop options header, * it must be examined and processed even by the source node. * (RFC 2460, section 4.) */ if (exthdrs.ip6e_hbh) { struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *); u_int32_t dummy1; /* XXX unused */ u_int32_t dummy2; /* XXX unused */ #ifdef DIAGNOSTIC if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len) panic("ip6e_hbh is not continuous"); #endif /* * XXX: if we have to send an ICMPv6 error to the sender, * we need the M_LOOP flag since icmp6_error() expects * the IPv6 and the hop-by-hop options header are * continuous unless the flag is set. */ m->m_flags |= M_LOOP; m->m_pkthdr.rcvif = ifp; if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1), ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh), &dummy1, &dummy2) < 0) { /* m was already freed at this point */ error = EINVAL;/* better error? */ goto done; } m->m_flags &= ~M_LOOP; /* XXX */ m->m_pkthdr.rcvif = NULL; } #ifdef PFIL_HOOKS /* * Run through list of hooks for output packets. */ error = pfil_run_hooks(&inet6_pfil_hook, &m, ifp, PFIL_OUT); if (error != 0 || m == NULL) goto done; ip6 = mtod(m, struct ip6_hdr *); #endif /* PFIL_HOOKS */ /* * Send the packet to the outgoing interface. * If necessary, do IPv6 fragmentation before sending. */ tlen = m->m_pkthdr.len; if (tlen <= mtu #ifdef notyet /* * On any link that cannot convey a 1280-octet packet in one piece, * link-specific fragmentation and reassembly must be provided at * a layer below IPv6. [RFC 2460, sec.5] * Thus if the interface has ability of link-level fragmentation, * we can just send the packet even if the packet size is * larger than the link's MTU. * XXX: IFF_FRAGMENTABLE (or such) flag has not been defined yet... */ || ifp->if_flags & IFF_FRAGMENTABLE #endif ) { /* Record statistics for this interface address. */ if (ia && !(flags & IPV6_FORWARDING)) { ia->ia_ifa.if_opackets++; ia->ia_ifa.if_obytes += m->m_pkthdr.len; } #ifdef IPSEC /* clean ipsec history once it goes out of the node */ ipsec_delaux(m); #endif error = nd6_output(ifp, origifp, m, dst, ro->ro_rt); goto done; } else if (mtu < IPV6_MMTU) { /* * note that path MTU is never less than IPV6_MMTU * (see icmp6_input). */ error = EMSGSIZE; in6_ifstat_inc(ifp, ifs6_out_fragfail); goto bad; } else if (ip6->ip6_plen == 0) { /* jumbo payload cannot be fragmented */ error = EMSGSIZE; in6_ifstat_inc(ifp, ifs6_out_fragfail); goto bad; } else { struct mbuf **mnext, *m_frgpart; struct ip6_frag *ip6f; #ifdef RANDOM_IP_ID u_int32_t id = htonl(ip6_randomid()); #else u_int32_t id = htonl(ip6_id++); #endif u_char nextproto; /* * Too large for the destination or interface; * fragment if possible. * Must be able to put at least 8 bytes per fragment. */ hlen = unfragpartlen; if (mtu > IPV6_MAXPACKET) mtu = IPV6_MAXPACKET; len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7; if (len < 8) { error = EMSGSIZE; in6_ifstat_inc(ifp, ifs6_out_fragfail); goto bad; } mnext = &m->m_nextpkt; /* * Change the next header field of the last header in the * unfragmentable part. */ if (exthdrs.ip6e_rthdr) { nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *); *mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT; } else if (exthdrs.ip6e_dest1) { nextproto = *mtod(exthdrs.ip6e_dest1, u_char *); *mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT; } else if (exthdrs.ip6e_hbh) { nextproto = *mtod(exthdrs.ip6e_hbh, u_char *); *mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT; } else { nextproto = ip6->ip6_nxt; ip6->ip6_nxt = IPPROTO_FRAGMENT; } /* * Loop through length of segment after first fragment, * make new header and copy data of each part and link onto * chain. */ m0 = m; for (off = hlen; off < tlen; off += len) { MGETHDR(m, M_DONTWAIT, MT_HEADER); if (!m) { error = ENOBUFS; ip6stat.ip6s_odropped++; goto sendorfree; } m->m_pkthdr.rcvif = NULL; m->m_flags = m0->m_flags & M_COPYFLAGS; *mnext = m; mnext = &m->m_nextpkt; m->m_data += max_linkhdr; mhip6 = mtod(m, struct ip6_hdr *); *mhip6 = *ip6; m->m_len = sizeof(*mhip6); error = ip6_insertfraghdr(m0, m, hlen, &ip6f); if (error) { ip6stat.ip6s_odropped++; goto sendorfree; } ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7)); if (off + len >= tlen) len = tlen - off; else ip6f->ip6f_offlg |= IP6F_MORE_FRAG; mhip6->ip6_plen = htons((u_short)(len + hlen + sizeof(*ip6f) - sizeof(struct ip6_hdr))); if ((m_frgpart = m_copy(m0, off, len)) == 0) { error = ENOBUFS; ip6stat.ip6s_odropped++; goto sendorfree; } m_cat(m, m_frgpart); m->m_pkthdr.len = len + hlen + sizeof(*ip6f); m->m_pkthdr.rcvif = (struct ifnet *)0; ip6f->ip6f_reserved = 0; ip6f->ip6f_ident = id; ip6f->ip6f_nxt = nextproto; ip6stat.ip6s_ofragments++; in6_ifstat_inc(ifp, ifs6_out_fragcreat); } in6_ifstat_inc(ifp, ifs6_out_fragok); } /* * Remove leading garbages. */ sendorfree: m = m0->m_nextpkt; m0->m_nextpkt = 0; m_freem(m0); for (m0 = m; m; m = m0) { m0 = m->m_nextpkt; m->m_nextpkt = 0; if (error == 0) { /* Record statistics for this interface address. */ if (ia) { ia->ia_ifa.if_opackets++; ia->ia_ifa.if_obytes += m->m_pkthdr.len; } #ifdef IPSEC /* clean ipsec history once it goes out of the node */ ipsec_delaux(m); #endif error = nd6_output(ifp, origifp, m, dst, ro->ro_rt); } else m_freem(m); } if (error == 0) ip6stat.ip6s_fragmented++; done: if (ro == &ip6route && ro->ro_rt) { /* brace necessary for RTFREE */ RTFREE(ro->ro_rt); } else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt) { RTFREE(ro_pmtu->ro_rt); } #ifdef IPSEC if (sp != NULL) key_freesp(sp); #endif /* IPSEC */ #ifdef FAST_IPSEC if (sp != NULL) KEY_FREESP(&sp); #endif /* FAST_IPSEC */ return(error); freehdrs: m_freem(exthdrs.ip6e_hbh); /* m_freem will check if mbuf is 0 */ m_freem(exthdrs.ip6e_dest1); m_freem(exthdrs.ip6e_rthdr); m_freem(exthdrs.ip6e_dest2); /* fall through */ bad: m_freem(m); goto done; } static int ip6_copyexthdr(mp, hdr, hlen) struct mbuf **mp; caddr_t hdr; int hlen; { struct mbuf *m; if (hlen > MCLBYTES) return(ENOBUFS); /* XXX */ MGET(m, M_DONTWAIT, MT_DATA); if (!m) return(ENOBUFS); if (hlen > MLEN) { MCLGET(m, M_DONTWAIT); if ((m->m_flags & M_EXT) == 0) { m_free(m); return(ENOBUFS); } } m->m_len = hlen; if (hdr) bcopy(hdr, mtod(m, caddr_t), hlen); *mp = m; return(0); } /* * Insert jumbo payload option. */ static int ip6_insert_jumboopt(exthdrs, plen) struct ip6_exthdrs *exthdrs; u_int32_t plen; { struct mbuf *mopt; u_char *optbuf; u_int32_t v; #define JUMBOOPTLEN 8 /* length of jumbo payload option and padding */ /* * If there is no hop-by-hop options header, allocate new one. * If there is one but it doesn't have enough space to store the * jumbo payload option, allocate a cluster to store the whole options. * Otherwise, use it to store the options. */ if (exthdrs->ip6e_hbh == 0) { MGET(mopt, M_DONTWAIT, MT_DATA); if (mopt == 0) return(ENOBUFS); mopt->m_len = JUMBOOPTLEN; optbuf = mtod(mopt, u_char *); optbuf[1] = 0; /* = ((JUMBOOPTLEN) >> 3) - 1 */ exthdrs->ip6e_hbh = mopt; } else { struct ip6_hbh *hbh; mopt = exthdrs->ip6e_hbh; if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) { /* * XXX assumption: * - exthdrs->ip6e_hbh is not referenced from places * other than exthdrs. * - exthdrs->ip6e_hbh is not an mbuf chain. */ int oldoptlen = mopt->m_len; struct mbuf *n; /* * XXX: give up if the whole (new) hbh header does * not fit even in an mbuf cluster. */ if (oldoptlen + JUMBOOPTLEN > MCLBYTES) return(ENOBUFS); /* * As a consequence, we must always prepare a cluster * at this point. */ MGET(n, M_DONTWAIT, MT_DATA); if (n) { MCLGET(n, M_DONTWAIT); if ((n->m_flags & M_EXT) == 0) { m_freem(n); n = NULL; } } if (!n) return(ENOBUFS); n->m_len = oldoptlen + JUMBOOPTLEN; bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t), oldoptlen); optbuf = mtod(n, caddr_t) + oldoptlen; m_freem(mopt); mopt = exthdrs->ip6e_hbh = n; } else { optbuf = mtod(mopt, u_char *) + mopt->m_len; mopt->m_len += JUMBOOPTLEN; } optbuf[0] = IP6OPT_PADN; optbuf[1] = 1; /* * Adjust the header length according to the pad and * the jumbo payload option. */ hbh = mtod(mopt, struct ip6_hbh *); hbh->ip6h_len += (JUMBOOPTLEN >> 3); } /* fill in the option. */ optbuf[2] = IP6OPT_JUMBO; optbuf[3] = 4; v = (u_int32_t)htonl(plen + JUMBOOPTLEN); bcopy(&v, &optbuf[4], sizeof(u_int32_t)); /* finally, adjust the packet header length */ exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN; return(0); #undef JUMBOOPTLEN } /* * Insert fragment header and copy unfragmentable header portions. */ static int ip6_insertfraghdr(m0, m, hlen, frghdrp) struct mbuf *m0, *m; int hlen; struct ip6_frag **frghdrp; { struct mbuf *n, *mlast; if (hlen > sizeof(struct ip6_hdr)) { n = m_copym(m0, sizeof(struct ip6_hdr), hlen - sizeof(struct ip6_hdr), M_DONTWAIT); if (n == 0) return(ENOBUFS); m->m_next = n; } else n = m; /* Search for the last mbuf of unfragmentable part. */ for (mlast = n; mlast->m_next; mlast = mlast->m_next) ; if ((mlast->m_flags & M_EXT) == 0 && M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) { /* use the trailing space of the last mbuf for the fragment hdr */ *frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) + mlast->m_len); mlast->m_len += sizeof(struct ip6_frag); m->m_pkthdr.len += sizeof(struct ip6_frag); } else { /* allocate a new mbuf for the fragment header */ struct mbuf *mfrg; MGET(mfrg, M_DONTWAIT, MT_DATA); if (mfrg == 0) return(ENOBUFS); mfrg->m_len = sizeof(struct ip6_frag); *frghdrp = mtod(mfrg, struct ip6_frag *); mlast->m_next = mfrg; } return(0); } /* * IP6 socket option processing. */ int ip6_ctloutput(so, sopt) struct socket *so; struct sockopt *sopt; { int privileged; struct inpcb *in6p = sotoinpcb(so); int error, optval; int level, op, optname; int optlen; struct thread *td; if (sopt) { level = sopt->sopt_level; op = sopt->sopt_dir; optname = sopt->sopt_name; optlen = sopt->sopt_valsize; td = sopt->sopt_td; } else { panic("ip6_ctloutput: arg soopt is NULL"); } error = optval = 0; privileged = (td == 0 || suser(td)) ? 0 : 1; if (level == IPPROTO_IPV6) { switch (op) { case SOPT_SET: switch (optname) { case IPV6_PKTOPTIONS: { struct mbuf *m; error = soopt_getm(sopt, &m); /* XXX */ if (error != 0) break; error = soopt_mcopyin(sopt, m); /* XXX */ if (error != 0) break; error = ip6_pcbopts(&in6p->in6p_outputopts, m, so, sopt); m_freem(m); /* XXX */ break; } /* * Use of some Hop-by-Hop options or some * Destination options, might require special * privilege. That is, normal applications * (without special privilege) might be forbidden * from setting certain options in outgoing packets, * and might never see certain options in received * packets. [RFC 2292 Section 6] * KAME specific note: * KAME prevents non-privileged users from sending or * receiving ANY hbh/dst options in order to avoid * overhead of parsing options in the kernel. */ case IPV6_UNICAST_HOPS: case IPV6_CHECKSUM: case IPV6_FAITH: case IPV6_V6ONLY: if (optlen != sizeof(int)) { error = EINVAL; break; } error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; switch (optname) { case IPV6_UNICAST_HOPS: if (optval < -1 || optval >= 256) error = EINVAL; else { /* -1 = kernel default */ in6p->in6p_hops = optval; if ((in6p->in6p_vflag & INP_IPV4) != 0) in6p->inp_ip_ttl = optval; } break; #define OPTSET(bit) \ do { \ if (optval) \ in6p->in6p_flags |= (bit); \ else \ in6p->in6p_flags &= ~(bit); \ } while (0) #define OPTBIT(bit) (in6p->in6p_flags & (bit) ? 1 : 0) case IPV6_CHECKSUM: in6p->in6p_cksum = optval; break; case IPV6_FAITH: OPTSET(IN6P_FAITH); break; case IPV6_V6ONLY: /* * make setsockopt(IPV6_V6ONLY) * available only prior to bind(2). * see ipng mailing list, Jun 22 2001. */ if (in6p->in6p_lport || !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) { error = EINVAL; break; } OPTSET(IN6P_IPV6_V6ONLY); if (optval) in6p->in6p_vflag &= ~INP_IPV4; else in6p->in6p_vflag |= INP_IPV4; break; } break; case IPV6_PKTINFO: case IPV6_HOPLIMIT: case IPV6_HOPOPTS: case IPV6_DSTOPTS: case IPV6_RTHDR: /* RFC 2292 */ if (optlen != sizeof(int)) { error = EINVAL; break; } error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; switch (optname) { case IPV6_PKTINFO: OPTSET(IN6P_PKTINFO); break; case IPV6_HOPLIMIT: OPTSET(IN6P_HOPLIMIT); break; case IPV6_HOPOPTS: /* * Check super-user privilege. * See comments for IPV6_RECVHOPOPTS. */ if (!privileged) return(EPERM); OPTSET(IN6P_HOPOPTS); break; case IPV6_DSTOPTS: if (!privileged) return(EPERM); OPTSET(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */ break; case IPV6_RTHDR: OPTSET(IN6P_RTHDR); break; } break; #undef OPTSET case IPV6_MULTICAST_IF: case IPV6_MULTICAST_HOPS: case IPV6_MULTICAST_LOOP: case IPV6_JOIN_GROUP: case IPV6_LEAVE_GROUP: { struct mbuf *m; if (sopt->sopt_valsize > MLEN) { error = EMSGSIZE; break; } /* XXX */ MGET(m, sopt->sopt_td ? M_TRYWAIT : M_DONTWAIT, MT_HEADER); if (m == 0) { error = ENOBUFS; break; } m->m_len = sopt->sopt_valsize; error = sooptcopyin(sopt, mtod(m, char *), m->m_len, m->m_len); error = ip6_setmoptions(sopt->sopt_name, &in6p->in6p_moptions, m); (void)m_free(m); } break; case IPV6_PORTRANGE: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; switch (optval) { case IPV6_PORTRANGE_DEFAULT: in6p->in6p_flags &= ~(IN6P_LOWPORT); in6p->in6p_flags &= ~(IN6P_HIGHPORT); break; case IPV6_PORTRANGE_HIGH: in6p->in6p_flags &= ~(IN6P_LOWPORT); in6p->in6p_flags |= IN6P_HIGHPORT; break; case IPV6_PORTRANGE_LOW: in6p->in6p_flags &= ~(IN6P_HIGHPORT); in6p->in6p_flags |= IN6P_LOWPORT; break; default: error = EINVAL; break; } break; #if defined(IPSEC) || defined(FAST_IPSEC) case IPV6_IPSEC_POLICY: { caddr_t req = NULL; size_t len = 0; struct mbuf *m; if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */ break; if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */ break; if (m) { req = mtod(m, caddr_t); len = m->m_len; } error = ipsec6_set_policy(in6p, optname, req, len, privileged); m_freem(m); } break; #endif /* KAME IPSEC */ case IPV6_FW_ADD: case IPV6_FW_DEL: case IPV6_FW_FLUSH: case IPV6_FW_ZERO: { struct mbuf *m; struct mbuf **mp = &m; if (ip6_fw_ctl_ptr == NULL) return EINVAL; /* XXX */ if ((error = soopt_getm(sopt, &m)) != 0) break; /* XXX */ if ((error = soopt_mcopyin(sopt, m)) != 0) break; error = (*ip6_fw_ctl_ptr)(optname, mp); m = *mp; } break; default: error = ENOPROTOOPT; break; } break; case SOPT_GET: switch (optname) { case IPV6_PKTOPTIONS: if (in6p->in6p_options) { struct mbuf *m; m = m_copym(in6p->in6p_options, 0, M_COPYALL, M_TRYWAIT); error = soopt_mcopyout(sopt, m); if (error == 0) m_freem(m); } else sopt->sopt_valsize = 0; break; case IPV6_UNICAST_HOPS: case IPV6_CHECKSUM: case IPV6_FAITH: case IPV6_V6ONLY: case IPV6_PORTRANGE: switch (optname) { case IPV6_UNICAST_HOPS: optval = in6p->in6p_hops; break; case IPV6_CHECKSUM: optval = in6p->in6p_cksum; break; case IPV6_FAITH: optval = OPTBIT(IN6P_FAITH); break; case IPV6_V6ONLY: optval = OPTBIT(IN6P_IPV6_V6ONLY); break; case IPV6_PORTRANGE: { int flags; flags = in6p->in6p_flags; if (flags & IN6P_HIGHPORT) optval = IPV6_PORTRANGE_HIGH; else if (flags & IN6P_LOWPORT) optval = IPV6_PORTRANGE_LOW; else optval = 0; break; } } error = sooptcopyout(sopt, &optval, sizeof optval); break; case IPV6_PKTINFO: case IPV6_HOPLIMIT: case IPV6_HOPOPTS: case IPV6_RTHDR: case IPV6_DSTOPTS: if (optname == IPV6_HOPOPTS || optname == IPV6_DSTOPTS || !privileged) return(EPERM); switch (optname) { case IPV6_PKTINFO: optval = OPTBIT(IN6P_PKTINFO); break; case IPV6_HOPLIMIT: optval = OPTBIT(IN6P_HOPLIMIT); break; case IPV6_HOPOPTS: if (!privileged) return(EPERM); optval = OPTBIT(IN6P_HOPOPTS); break; case IPV6_RTHDR: optval = OPTBIT(IN6P_RTHDR); break; case IPV6_DSTOPTS: if (!privileged) return(EPERM); optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); break; } error = sooptcopyout(sopt, &optval, sizeof optval); break; case IPV6_MULTICAST_IF: case IPV6_MULTICAST_HOPS: case IPV6_MULTICAST_LOOP: case IPV6_JOIN_GROUP: case IPV6_LEAVE_GROUP: { struct mbuf *m; error = ip6_getmoptions(sopt->sopt_name, in6p->in6p_moptions, &m); if (error == 0) error = sooptcopyout(sopt, mtod(m, char *), m->m_len); m_freem(m); } break; #if defined(IPSEC) || defined(FAST_IPSEC) case IPV6_IPSEC_POLICY: { caddr_t req = NULL; size_t len = 0; struct mbuf *m = NULL; struct mbuf **mp = &m; error = soopt_getm(sopt, &m); /* XXX */ if (error != 0) break; error = soopt_mcopyin(sopt, m); /* XXX */ if (error != 0) break; if (m) { req = mtod(m, caddr_t); len = m->m_len; } error = ipsec6_get_policy(in6p, req, len, mp); if (error == 0) error = soopt_mcopyout(sopt, m); /*XXX*/ if (error == 0 && m) m_freem(m); break; } #endif /* KAME IPSEC */ case IPV6_FW_GET: { struct mbuf *m; struct mbuf **mp = &m; if (ip6_fw_ctl_ptr == NULL) { return EINVAL; } error = (*ip6_fw_ctl_ptr)(optname, mp); if (error == 0) error = soopt_mcopyout(sopt, m); /* XXX */ if (error == 0 && m) m_freem(m); } break; default: error = ENOPROTOOPT; break; } break; } } else { error = EINVAL; } return(error); } /* * Set up IP6 options in pcb for insertion in output packets or * specifying behavior of outgoing packets. */ static int ip6_pcbopts(pktopt, m, so, sopt) struct ip6_pktopts **pktopt; struct mbuf *m; struct socket *so; struct sockopt *sopt; { struct ip6_pktopts *opt = *pktopt; int error = 0; struct thread *td = sopt->sopt_td; int priv = 0; /* turn off any old options. */ if (opt) { #ifdef DIAGNOSTIC if (opt->ip6po_pktinfo || opt->ip6po_nexthop || opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 || opt->ip6po_rhinfo.ip6po_rhi_rthdr) printf("ip6_pcbopts: all specified options are cleared.\n"); #endif ip6_clearpktopts(opt, 1, -1); } else opt = malloc(sizeof(*opt), M_IP6OPT, M_WAITOK); *pktopt = NULL; if (!m || m->m_len == 0) { /* * Only turning off any previous options, regardless of * whether the opt is just created or given. */ free(opt, M_IP6OPT); return(0); } /* set options specified by user. */ if (td && !suser(td)) priv = 1; if ((error = ip6_setpktoptions(m, opt, priv, 1)) != 0) { ip6_clearpktopts(opt, 1, -1); /* XXX: discard all options */ free(opt, M_IP6OPT); return(error); } *pktopt = opt; return(0); } /* * initialize ip6_pktopts. beware that there are non-zero default values in * the struct. */ void init_ip6pktopts(opt) struct ip6_pktopts *opt; { bzero(opt, sizeof(*opt)); opt->ip6po_hlim = -1; /* -1 means default hop limit */ } void ip6_clearpktopts(pktopt, needfree, optname) struct ip6_pktopts *pktopt; int needfree, optname; { if (pktopt == NULL) return; if (optname == -1) { if (needfree && pktopt->ip6po_pktinfo) free(pktopt->ip6po_pktinfo, M_IP6OPT); pktopt->ip6po_pktinfo = NULL; } if (optname == -1) pktopt->ip6po_hlim = -1; if (optname == -1) { if (needfree && pktopt->ip6po_nexthop) free(pktopt->ip6po_nexthop, M_IP6OPT); pktopt->ip6po_nexthop = NULL; } if (optname == -1) { if (needfree && pktopt->ip6po_hbh) free(pktopt->ip6po_hbh, M_IP6OPT); pktopt->ip6po_hbh = NULL; } if (optname == -1) { if (needfree && pktopt->ip6po_dest1) free(pktopt->ip6po_dest1, M_IP6OPT); pktopt->ip6po_dest1 = NULL; } if (optname == -1) { if (needfree && pktopt->ip6po_rhinfo.ip6po_rhi_rthdr) free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT); pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL; if (pktopt->ip6po_route.ro_rt) { RTFREE(pktopt->ip6po_route.ro_rt); pktopt->ip6po_route.ro_rt = NULL; } } if (optname == -1) { if (needfree && pktopt->ip6po_dest2) free(pktopt->ip6po_dest2, M_IP6OPT); pktopt->ip6po_dest2 = NULL; } } #define PKTOPT_EXTHDRCPY(type) \ do {\ if (src->type) {\ int hlen =\ (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\ dst->type = malloc(hlen, M_IP6OPT, canwait);\ if (dst->type == NULL && canwait == M_NOWAIT)\ goto bad;\ bcopy(src->type, dst->type, hlen);\ }\ } while (0) struct ip6_pktopts * ip6_copypktopts(src, canwait) struct ip6_pktopts *src; int canwait; { struct ip6_pktopts *dst; if (src == NULL) { printf("ip6_clearpktopts: invalid argument\n"); return(NULL); } dst = malloc(sizeof(*dst), M_IP6OPT, canwait); if (dst == NULL && canwait == M_NOWAIT) return (NULL); bzero(dst, sizeof(*dst)); dst->ip6po_hlim = src->ip6po_hlim; if (src->ip6po_pktinfo) { dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo), M_IP6OPT, canwait); if (dst->ip6po_pktinfo == NULL && canwait == M_NOWAIT) goto bad; *dst->ip6po_pktinfo = *src->ip6po_pktinfo; } if (src->ip6po_nexthop) { dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len, M_IP6OPT, canwait); if (dst->ip6po_nexthop == NULL && canwait == M_NOWAIT) goto bad; bcopy(src->ip6po_nexthop, dst->ip6po_nexthop, src->ip6po_nexthop->sa_len); } PKTOPT_EXTHDRCPY(ip6po_hbh); PKTOPT_EXTHDRCPY(ip6po_dest1); PKTOPT_EXTHDRCPY(ip6po_dest2); PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */ return(dst); bad: if (dst->ip6po_pktinfo) free(dst->ip6po_pktinfo, M_IP6OPT); if (dst->ip6po_nexthop) free(dst->ip6po_nexthop, M_IP6OPT); if (dst->ip6po_hbh) free(dst->ip6po_hbh, M_IP6OPT); if (dst->ip6po_dest1) free(dst->ip6po_dest1, M_IP6OPT); if (dst->ip6po_dest2) free(dst->ip6po_dest2, M_IP6OPT); if (dst->ip6po_rthdr) free(dst->ip6po_rthdr, M_IP6OPT); free(dst, M_IP6OPT); return(NULL); } #undef PKTOPT_EXTHDRCPY void ip6_freepcbopts(pktopt) struct ip6_pktopts *pktopt; { if (pktopt == NULL) return; ip6_clearpktopts(pktopt, 1, -1); free(pktopt, M_IP6OPT); } /* * Set the IP6 multicast options in response to user setsockopt(). */ static int ip6_setmoptions(optname, im6op, m) int optname; struct ip6_moptions **im6op; struct mbuf *m; { int error = 0; u_int loop, ifindex; struct ipv6_mreq *mreq; struct ifnet *ifp; struct ip6_moptions *im6o = *im6op; struct route_in6 ro; struct sockaddr_in6 *dst; struct in6_multi_mship *imm; struct thread *td = curthread; /* XXX */ if (im6o == NULL) { /* * No multicast option buffer attached to the pcb; * allocate one and initialize to default values. */ im6o = (struct ip6_moptions *) malloc(sizeof(*im6o), M_IPMOPTS, M_WAITOK); if (im6o == NULL) return(ENOBUFS); *im6op = im6o; im6o->im6o_multicast_ifp = NULL; im6o->im6o_multicast_hlim = ip6_defmcasthlim; im6o->im6o_multicast_loop = IPV6_DEFAULT_MULTICAST_LOOP; LIST_INIT(&im6o->im6o_memberships); } switch (optname) { case IPV6_MULTICAST_IF: /* * Select the interface for outgoing multicast packets. */ if (m == NULL || m->m_len != sizeof(u_int)) { error = EINVAL; break; } bcopy(mtod(m, u_int *), &ifindex, sizeof(ifindex)); if (ifindex < 0 || if_index < ifindex) { error = ENXIO; /* XXX EINVAL? */ break; } ifp = ifnet_byindex(ifindex); if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { error = EADDRNOTAVAIL; break; } im6o->im6o_multicast_ifp = ifp; break; case IPV6_MULTICAST_HOPS: { /* * Set the IP6 hoplimit for outgoing multicast packets. */ int optval; if (m == NULL || m->m_len != sizeof(int)) { error = EINVAL; break; } bcopy(mtod(m, u_int *), &optval, sizeof(optval)); if (optval < -1 || optval >= 256) error = EINVAL; else if (optval == -1) im6o->im6o_multicast_hlim = ip6_defmcasthlim; else im6o->im6o_multicast_hlim = optval; break; } case IPV6_MULTICAST_LOOP: /* * Set the loopback flag for outgoing multicast packets. * Must be zero or one. */ if (m == NULL || m->m_len != sizeof(u_int)) { error = EINVAL; break; } bcopy(mtod(m, u_int *), &loop, sizeof(loop)); if (loop > 1) { error = EINVAL; break; } im6o->im6o_multicast_loop = loop; break; case IPV6_JOIN_GROUP: /* * Add a multicast group membership. * Group must be a valid IP6 multicast address. */ if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) { error = EINVAL; break; } mreq = mtod(m, struct ipv6_mreq *); if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) { /* * We use the unspecified address to specify to accept * all multicast addresses. Only super user is allowed * to do this. */ if (suser(td)) { error = EACCES; break; } } else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) { error = EINVAL; break; } /* * If the interface is specified, validate it. */ if (mreq->ipv6mr_interface < 0 || if_index < mreq->ipv6mr_interface) { error = ENXIO; /* XXX EINVAL? */ break; } /* * If no interface was explicitly specified, choose an * appropriate one according to the given multicast address. */ if (mreq->ipv6mr_interface == 0) { /* * If the multicast address is in node-local scope, * the interface should be a loopback interface. * Otherwise, look up the routing table for the * address, and choose the outgoing interface. * XXX: is it a good approach? */ if (IN6_IS_ADDR_MC_NODELOCAL(&mreq->ipv6mr_multiaddr)) { ifp = &loif[0]; } else { ro.ro_rt = NULL; dst = (struct sockaddr_in6 *)&ro.ro_dst; bzero(dst, sizeof(*dst)); dst->sin6_len = sizeof(struct sockaddr_in6); dst->sin6_family = AF_INET6; dst->sin6_addr = mreq->ipv6mr_multiaddr; rtalloc((struct route *)&ro); if (ro.ro_rt == NULL) { error = EADDRNOTAVAIL; break; } ifp = ro.ro_rt->rt_ifp; - rtfree(ro.ro_rt); + RTFREE(ro.ro_rt); } } else ifp = ifnet_byindex(mreq->ipv6mr_interface); /* * See if we found an interface, and confirm that it * supports multicast */ if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { error = EADDRNOTAVAIL; break; } /* * Put interface index into the multicast address, * if the address has link-local scope. */ if (IN6_IS_ADDR_MC_LINKLOCAL(&mreq->ipv6mr_multiaddr)) { mreq->ipv6mr_multiaddr.s6_addr16[1] = htons(mreq->ipv6mr_interface); } /* * See if the membership already exists. */ for (imm = im6o->im6o_memberships.lh_first; imm != NULL; imm = imm->i6mm_chain.le_next) if (imm->i6mm_maddr->in6m_ifp == ifp && IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr, &mreq->ipv6mr_multiaddr)) break; if (imm != NULL) { error = EADDRINUSE; break; } /* * Everything looks good; add a new record to the multicast * address list for the given interface. */ imm = malloc(sizeof(*imm), M_IPMADDR, M_WAITOK); if (imm == NULL) { error = ENOBUFS; break; } if ((imm->i6mm_maddr = in6_addmulti(&mreq->ipv6mr_multiaddr, ifp, &error)) == NULL) { free(imm, M_IPMADDR); break; } LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain); break; case IPV6_LEAVE_GROUP: /* * Drop a multicast group membership. * Group must be a valid IP6 multicast address. */ if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) { error = EINVAL; break; } mreq = mtod(m, struct ipv6_mreq *); if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) { if (suser(td)) { error = EACCES; break; } } else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) { error = EINVAL; break; } /* * If an interface address was specified, get a pointer * to its ifnet structure. */ if (mreq->ipv6mr_interface < 0 || if_index < mreq->ipv6mr_interface) { error = ENXIO; /* XXX EINVAL? */ break; } ifp = ifnet_byindex(mreq->ipv6mr_interface); /* * Put interface index into the multicast address, * if the address has link-local scope. */ if (IN6_IS_ADDR_MC_LINKLOCAL(&mreq->ipv6mr_multiaddr)) { mreq->ipv6mr_multiaddr.s6_addr16[1] = htons(mreq->ipv6mr_interface); } /* * Find the membership in the membership list. */ for (imm = im6o->im6o_memberships.lh_first; imm != NULL; imm = imm->i6mm_chain.le_next) { if ((ifp == NULL || imm->i6mm_maddr->in6m_ifp == ifp) && IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr, &mreq->ipv6mr_multiaddr)) break; } if (imm == NULL) { /* Unable to resolve interface */ error = EADDRNOTAVAIL; break; } /* * Give up the multicast address record to which the * membership points. */ LIST_REMOVE(imm, i6mm_chain); in6_delmulti(imm->i6mm_maddr); free(imm, M_IPMADDR); break; default: error = EOPNOTSUPP; break; } /* * If all options have default values, no need to keep the mbuf. */ if (im6o->im6o_multicast_ifp == NULL && im6o->im6o_multicast_hlim == ip6_defmcasthlim && im6o->im6o_multicast_loop == IPV6_DEFAULT_MULTICAST_LOOP && im6o->im6o_memberships.lh_first == NULL) { free(*im6op, M_IPMOPTS); *im6op = NULL; } return(error); } /* * Return the IP6 multicast options in response to user getsockopt(). */ static int ip6_getmoptions(optname, im6o, mp) int optname; struct ip6_moptions *im6o; struct mbuf **mp; { u_int *hlim, *loop, *ifindex; *mp = m_get(M_TRYWAIT, MT_HEADER); /* XXX */ switch (optname) { case IPV6_MULTICAST_IF: ifindex = mtod(*mp, u_int *); (*mp)->m_len = sizeof(u_int); if (im6o == NULL || im6o->im6o_multicast_ifp == NULL) *ifindex = 0; else *ifindex = im6o->im6o_multicast_ifp->if_index; return(0); case IPV6_MULTICAST_HOPS: hlim = mtod(*mp, u_int *); (*mp)->m_len = sizeof(u_int); if (im6o == NULL) *hlim = ip6_defmcasthlim; else *hlim = im6o->im6o_multicast_hlim; return(0); case IPV6_MULTICAST_LOOP: loop = mtod(*mp, u_int *); (*mp)->m_len = sizeof(u_int); if (im6o == NULL) *loop = ip6_defmcasthlim; else *loop = im6o->im6o_multicast_loop; return(0); default: return(EOPNOTSUPP); } } /* * Discard the IP6 multicast options. */ void ip6_freemoptions(im6o) struct ip6_moptions *im6o; { struct in6_multi_mship *imm; if (im6o == NULL) return; while ((imm = im6o->im6o_memberships.lh_first) != NULL) { LIST_REMOVE(imm, i6mm_chain); if (imm->i6mm_maddr) in6_delmulti(imm->i6mm_maddr); free(imm, M_IPMADDR); } free(im6o, M_IPMOPTS); } /* * Set IPv6 outgoing packet options based on advanced API. */ int ip6_setpktoptions(control, opt, priv, needcopy) struct mbuf *control; struct ip6_pktopts *opt; int priv, needcopy; { struct cmsghdr *cm = 0; if (control == 0 || opt == 0) return(EINVAL); init_ip6pktopts(opt); /* * XXX: Currently, we assume all the optional information is stored * in a single mbuf. */ if (control->m_next) return(EINVAL); for (; control->m_len; control->m_data += CMSG_ALIGN(cm->cmsg_len), control->m_len -= CMSG_ALIGN(cm->cmsg_len)) { cm = mtod(control, struct cmsghdr *); if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len) return(EINVAL); if (cm->cmsg_level != IPPROTO_IPV6) continue; /* * XXX should check if RFC2292 API is mixed with 2292bis API */ switch (cm->cmsg_type) { case IPV6_PKTINFO: if (cm->cmsg_len != CMSG_LEN(sizeof(struct in6_pktinfo))) return(EINVAL); if (needcopy) { /* XXX: Is it really WAITOK? */ opt->ip6po_pktinfo = malloc(sizeof(struct in6_pktinfo), M_IP6OPT, M_WAITOK); bcopy(CMSG_DATA(cm), opt->ip6po_pktinfo, sizeof(struct in6_pktinfo)); } else opt->ip6po_pktinfo = (struct in6_pktinfo *)CMSG_DATA(cm); if (opt->ip6po_pktinfo->ipi6_ifindex && IN6_IS_ADDR_LINKLOCAL(&opt->ip6po_pktinfo->ipi6_addr)) opt->ip6po_pktinfo->ipi6_addr.s6_addr16[1] = htons(opt->ip6po_pktinfo->ipi6_ifindex); if (opt->ip6po_pktinfo->ipi6_ifindex > if_index || opt->ip6po_pktinfo->ipi6_ifindex < 0) { return(ENXIO); } /* * Check if the requested source address is indeed a * unicast address assigned to the node, and can be * used as the packet's source address. */ if (!IN6_IS_ADDR_UNSPECIFIED(&opt->ip6po_pktinfo->ipi6_addr)) { struct in6_ifaddr *ia6; struct sockaddr_in6 sin6; bzero(&sin6, sizeof(sin6)); sin6.sin6_len = sizeof(sin6); sin6.sin6_family = AF_INET6; sin6.sin6_addr = opt->ip6po_pktinfo->ipi6_addr; ia6 = (struct in6_ifaddr *)ifa_ifwithaddr(sin6tosa(&sin6)); if (ia6 == NULL || (ia6->ia6_flags & (IN6_IFF_ANYCAST | IN6_IFF_NOTREADY)) != 0) return(EADDRNOTAVAIL); } break; case IPV6_HOPLIMIT: if (cm->cmsg_len != CMSG_LEN(sizeof(int))) return(EINVAL); opt->ip6po_hlim = *(int *)CMSG_DATA(cm); if (opt->ip6po_hlim < -1 || opt->ip6po_hlim > 255) return(EINVAL); break; case IPV6_NEXTHOP: if (!priv) return(EPERM); if (cm->cmsg_len < sizeof(u_char) || /* check if cmsg_len is large enough for sa_len */ cm->cmsg_len < CMSG_LEN(*CMSG_DATA(cm))) return(EINVAL); if (needcopy) { opt->ip6po_nexthop = malloc(*CMSG_DATA(cm), M_IP6OPT, M_WAITOK); bcopy(CMSG_DATA(cm), opt->ip6po_nexthop, *CMSG_DATA(cm)); } else opt->ip6po_nexthop = (struct sockaddr *)CMSG_DATA(cm); break; case IPV6_HOPOPTS: { struct ip6_hbh *hbh; int hbhlen; if (cm->cmsg_len < CMSG_LEN(sizeof(struct ip6_hbh))) return(EINVAL); hbh = (struct ip6_hbh *)CMSG_DATA(cm); hbhlen = (hbh->ip6h_len + 1) << 3; if (cm->cmsg_len != CMSG_LEN(hbhlen)) return(EINVAL); if (needcopy) { opt->ip6po_hbh = malloc(hbhlen, M_IP6OPT, M_WAITOK); bcopy(hbh, opt->ip6po_hbh, hbhlen); } else opt->ip6po_hbh = hbh; break; } case IPV6_DSTOPTS: { struct ip6_dest *dest, **newdest; int destlen; if (cm->cmsg_len < CMSG_LEN(sizeof(struct ip6_dest))) return(EINVAL); dest = (struct ip6_dest *)CMSG_DATA(cm); destlen = (dest->ip6d_len + 1) << 3; if (cm->cmsg_len != CMSG_LEN(destlen)) return(EINVAL); /* * The old advacned API is ambiguous on this * point. Our approach is to determine the * position based according to the existence * of a routing header. Note, however, that * this depends on the order of the extension * headers in the ancillary data; the 1st part * of the destination options header must * appear before the routing header in the * ancillary data, too. * RFC2292bis solved the ambiguity by * introducing separate cmsg types. */ if (opt->ip6po_rthdr == NULL) newdest = &opt->ip6po_dest1; else newdest = &opt->ip6po_dest2; if (needcopy) { *newdest = malloc(destlen, M_IP6OPT, M_WAITOK); bcopy(dest, *newdest, destlen); } else *newdest = dest; break; } case IPV6_RTHDR: { struct ip6_rthdr *rth; int rthlen; if (cm->cmsg_len < CMSG_LEN(sizeof(struct ip6_rthdr))) return(EINVAL); rth = (struct ip6_rthdr *)CMSG_DATA(cm); rthlen = (rth->ip6r_len + 1) << 3; if (cm->cmsg_len != CMSG_LEN(rthlen)) return(EINVAL); switch (rth->ip6r_type) { case IPV6_RTHDR_TYPE_0: /* must contain one addr */ if (rth->ip6r_len == 0) return(EINVAL); /* length must be even */ if (rth->ip6r_len % 2) return(EINVAL); if (rth->ip6r_len / 2 != rth->ip6r_segleft) return(EINVAL); break; default: return(EINVAL); /* not supported */ } if (needcopy) { opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT, M_WAITOK); bcopy(rth, opt->ip6po_rthdr, rthlen); } else opt->ip6po_rthdr = rth; break; } default: return(ENOPROTOOPT); } } return(0); } /* * Routine called from ip6_output() to loop back a copy of an IP6 multicast * packet to the input queue of a specified interface. Note that this * calls the output routine of the loopback "driver", but with an interface * pointer that might NOT be &loif -- easier than replicating that code here. */ void ip6_mloopback(ifp, m, dst) struct ifnet *ifp; struct mbuf *m; struct sockaddr_in6 *dst; { struct mbuf *copym; struct ip6_hdr *ip6; copym = m_copy(m, 0, M_COPYALL); if (copym == NULL) return; /* * Make sure to deep-copy IPv6 header portion in case the data * is in an mbuf cluster, so that we can safely override the IPv6 * header portion later. */ if ((copym->m_flags & M_EXT) != 0 || copym->m_len < sizeof(struct ip6_hdr)) { copym = m_pullup(copym, sizeof(struct ip6_hdr)); if (copym == NULL) return; } #ifdef DIAGNOSTIC if (copym->m_len < sizeof(*ip6)) { m_freem(copym); return; } #endif ip6 = mtod(copym, struct ip6_hdr *); #ifndef SCOPEDROUTING /* * clear embedded scope identifiers if necessary. * in6_clearscope will touch the addresses only when necessary. */ in6_clearscope(&ip6->ip6_src); in6_clearscope(&ip6->ip6_dst); #endif (void)if_simloop(ifp, copym, dst->sin6_family, 0); } /* * Chop IPv6 header off from the payload. */ static int ip6_splithdr(m, exthdrs) struct mbuf *m; struct ip6_exthdrs *exthdrs; { struct mbuf *mh; struct ip6_hdr *ip6; ip6 = mtod(m, struct ip6_hdr *); if (m->m_len > sizeof(*ip6)) { MGETHDR(mh, M_DONTWAIT, MT_HEADER); if (mh == 0) { m_freem(m); return ENOBUFS; } M_MOVE_PKTHDR(mh, m); MH_ALIGN(mh, sizeof(*ip6)); m->m_len -= sizeof(*ip6); m->m_data += sizeof(*ip6); mh->m_next = m; m = mh; m->m_len = sizeof(*ip6); bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6)); } exthdrs->ip6e_ip6 = m; return 0; } /* * Compute IPv6 extension header length. */ int ip6_optlen(in6p) struct in6pcb *in6p; { int len; if (!in6p->in6p_outputopts) return 0; len = 0; #define elen(x) \ (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0) len += elen(in6p->in6p_outputopts->ip6po_hbh); if (in6p->in6p_outputopts->ip6po_rthdr) /* dest1 is valid with rthdr only */ len += elen(in6p->in6p_outputopts->ip6po_dest1); len += elen(in6p->in6p_outputopts->ip6po_rthdr); len += elen(in6p->in6p_outputopts->ip6po_dest2); return len; #undef elen } Index: head/sys/netinet6/nd6.c =================================================================== --- head/sys/netinet6/nd6.c (revision 120726) +++ head/sys/netinet6/nd6.c (revision 120727) @@ -1,2261 +1,2265 @@ /* $FreeBSD$ */ /* $KAME: nd6.c,v 1.144 2001/05/24 07:44:00 itojun Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * XXX * KAME 970409 note: * BSD/OS version heavily modifies this code, related to llinfo. * Since we don't have BSD/OS version of net/route.c in our hand, * I left the code mostly as it was in 970310. -- itojun */ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_mac.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define ND6_SLOWTIMER_INTERVAL (60 * 60) /* 1 hour */ #define ND6_RECALC_REACHTM_INTERVAL (60 * 120) /* 2 hours */ #define SIN6(s) ((struct sockaddr_in6 *)s) #define SDL(s) ((struct sockaddr_dl *)s) /* timer values */ int nd6_prune = 1; /* walk list every 1 seconds */ int nd6_delay = 5; /* delay first probe time 5 second */ int nd6_umaxtries = 3; /* maximum unicast query */ int nd6_mmaxtries = 3; /* maximum multicast query */ int nd6_useloopback = 1; /* use loopback interface for local traffic */ int nd6_gctimer = (60 * 60 * 24); /* 1 day: garbage collection timer */ /* preventing too many loops in ND option parsing */ int nd6_maxndopt = 10; /* max # of ND options allowed */ int nd6_maxnudhint = 0; /* max # of subsequent upper layer hints */ #ifdef ND6_DEBUG int nd6_debug = 1; #else int nd6_debug = 0; #endif /* for debugging? */ static int nd6_inuse, nd6_allocated; struct llinfo_nd6 llinfo_nd6 = {&llinfo_nd6, &llinfo_nd6}; static size_t nd_ifinfo_indexlim = 8; struct nd_ifinfo *nd_ifinfo = NULL; struct nd_drhead nd_defrouter; struct nd_prhead nd_prefix = { 0 }; int nd6_recalc_reachtm_interval = ND6_RECALC_REACHTM_INTERVAL; static struct sockaddr_in6 all1_sa; static void nd6_slowtimo __P((void *)); static int regen_tmpaddr __P((struct in6_ifaddr *)); struct callout nd6_slowtimo_ch; struct callout nd6_timer_ch; extern struct callout in6_tmpaddrtimer_ch; void nd6_init() { static int nd6_init_done = 0; int i; if (nd6_init_done) { log(LOG_NOTICE, "nd6_init called more than once(ignored)\n"); return; } all1_sa.sin6_family = AF_INET6; all1_sa.sin6_len = sizeof(struct sockaddr_in6); for (i = 0; i < sizeof(all1_sa.sin6_addr); i++) all1_sa.sin6_addr.s6_addr[i] = 0xff; /* initialization of the default router list */ TAILQ_INIT(&nd_defrouter); nd6_init_done = 1; /* start timer */ callout_reset(&nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz, nd6_slowtimo, NULL); } void nd6_ifattach(ifp) struct ifnet *ifp; { /* * We have some arrays that should be indexed by if_index. * since if_index will grow dynamically, they should grow too. */ if (nd_ifinfo == NULL || if_index >= nd_ifinfo_indexlim) { size_t n; caddr_t q; while (if_index >= nd_ifinfo_indexlim) nd_ifinfo_indexlim <<= 1; /* grow nd_ifinfo */ n = nd_ifinfo_indexlim * sizeof(struct nd_ifinfo); q = (caddr_t)malloc(n, M_IP6NDP, M_WAITOK); bzero(q, n); if (nd_ifinfo) { bcopy((caddr_t)nd_ifinfo, q, n/2); free((caddr_t)nd_ifinfo, M_IP6NDP); } nd_ifinfo = (struct nd_ifinfo *)q; } #define ND nd_ifinfo[ifp->if_index] /* * Don't initialize if called twice. * XXX: to detect this, we should choose a member that is never set * before initialization of the ND structure itself. We formaly used * the linkmtu member, which was not suitable because it could be * initialized via "ifconfig mtu". */ if (ND.basereachable) return; ND.linkmtu = ifnet_byindex(ifp->if_index)->if_mtu; ND.chlim = IPV6_DEFHLIM; ND.basereachable = REACHABLE_TIME; ND.reachable = ND_COMPUTE_RTIME(ND.basereachable); ND.retrans = RETRANS_TIMER; ND.receivedra = 0; /* * Note that the default value of ip6_accept_rtadv is 0, which means * we won't accept RAs by default even if we set ND6_IFF_ACCEPT_RTADV * here. */ ND.flags = (ND6_IFF_PERFORMNUD | ND6_IFF_ACCEPT_RTADV); nd6_setmtu(ifp); #undef ND } /* * Reset ND level link MTU. This function is called when the physical MTU * changes, which means we might have to adjust the ND level MTU. */ void nd6_setmtu(ifp) struct ifnet *ifp; { struct nd_ifinfo *ndi = &nd_ifinfo[ifp->if_index]; u_long oldmaxmtu = ndi->maxmtu; u_long oldlinkmtu = ndi->linkmtu; switch (ifp->if_type) { case IFT_ARCNET: /* XXX MTU handling needs more work */ ndi->maxmtu = MIN(60480, ifp->if_mtu); break; case IFT_ETHER: ndi->maxmtu = MIN(ETHERMTU, ifp->if_mtu); break; case IFT_FDDI: ndi->maxmtu = MIN(FDDIIPMTU, ifp->if_mtu); break; case IFT_ATM: ndi->maxmtu = MIN(ATMMTU, ifp->if_mtu); break; case IFT_IEEE1394: /* XXX should be IEEE1394MTU(1500) */ ndi->maxmtu = MIN(ETHERMTU, ifp->if_mtu); break; #ifdef IFT_IEEE80211 case IFT_IEEE80211: /* XXX should be IEEE80211MTU(1500) */ ndi->maxmtu = MIN(ETHERMTU, ifp->if_mtu); break; #endif case IFT_ISO88025: ndi->maxmtu = MIN(ISO88025_MAX_MTU, ifp->if_mtu); break; default: ndi->maxmtu = ifp->if_mtu; break; } if (oldmaxmtu != ndi->maxmtu) { /* * If the ND level MTU is not set yet, or if the maxmtu * is reset to a smaller value than the ND level MTU, * also reset the ND level MTU. */ if (ndi->linkmtu == 0 || ndi->maxmtu < ndi->linkmtu) { ndi->linkmtu = ndi->maxmtu; /* also adjust in6_maxmtu if necessary. */ if (oldlinkmtu == 0) { /* * XXX: the case analysis is grotty, but * it is not efficient to call in6_setmaxmtu() * here when we are during the initialization * procedure. */ if (in6_maxmtu < ndi->linkmtu) in6_maxmtu = ndi->linkmtu; } else in6_setmaxmtu(); } } #undef MIN } void nd6_option_init(opt, icmp6len, ndopts) void *opt; int icmp6len; union nd_opts *ndopts; { bzero(ndopts, sizeof(*ndopts)); ndopts->nd_opts_search = (struct nd_opt_hdr *)opt; ndopts->nd_opts_last = (struct nd_opt_hdr *)(((u_char *)opt) + icmp6len); if (icmp6len == 0) { ndopts->nd_opts_done = 1; ndopts->nd_opts_search = NULL; } } /* * Take one ND option. */ struct nd_opt_hdr * nd6_option(ndopts) union nd_opts *ndopts; { struct nd_opt_hdr *nd_opt; int olen; if (!ndopts) panic("ndopts == NULL in nd6_option"); if (!ndopts->nd_opts_last) panic("uninitialized ndopts in nd6_option"); if (!ndopts->nd_opts_search) return NULL; if (ndopts->nd_opts_done) return NULL; nd_opt = ndopts->nd_opts_search; /* make sure nd_opt_len is inside the buffer */ if ((caddr_t)&nd_opt->nd_opt_len >= (caddr_t)ndopts->nd_opts_last) { bzero(ndopts, sizeof(*ndopts)); return NULL; } olen = nd_opt->nd_opt_len << 3; if (olen == 0) { /* * Message validation requires that all included * options have a length that is greater than zero. */ bzero(ndopts, sizeof(*ndopts)); return NULL; } ndopts->nd_opts_search = (struct nd_opt_hdr *)((caddr_t)nd_opt + olen); if (ndopts->nd_opts_search > ndopts->nd_opts_last) { /* option overruns the end of buffer, invalid */ bzero(ndopts, sizeof(*ndopts)); return NULL; } else if (ndopts->nd_opts_search == ndopts->nd_opts_last) { /* reached the end of options chain */ ndopts->nd_opts_done = 1; ndopts->nd_opts_search = NULL; } return nd_opt; } /* * Parse multiple ND options. * This function is much easier to use, for ND routines that do not need * multiple options of the same type. */ int nd6_options(ndopts) union nd_opts *ndopts; { struct nd_opt_hdr *nd_opt; int i = 0; if (!ndopts) panic("ndopts == NULL in nd6_options"); if (!ndopts->nd_opts_last) panic("uninitialized ndopts in nd6_options"); if (!ndopts->nd_opts_search) return 0; while (1) { nd_opt = nd6_option(ndopts); if (!nd_opt && !ndopts->nd_opts_last) { /* * Message validation requires that all included * options have a length that is greater than zero. */ icmp6stat.icp6s_nd_badopt++; bzero(ndopts, sizeof(*ndopts)); return -1; } if (!nd_opt) goto skip1; switch (nd_opt->nd_opt_type) { case ND_OPT_SOURCE_LINKADDR: case ND_OPT_TARGET_LINKADDR: case ND_OPT_MTU: case ND_OPT_REDIRECTED_HEADER: if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) { nd6log((LOG_INFO, "duplicated ND6 option found (type=%d)\n", nd_opt->nd_opt_type)); /* XXX bark? */ } else { ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt; } break; case ND_OPT_PREFIX_INFORMATION: if (ndopts->nd_opt_array[nd_opt->nd_opt_type] == 0) { ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt; } ndopts->nd_opts_pi_end = (struct nd_opt_prefix_info *)nd_opt; break; default: /* * Unknown options must be silently ignored, * to accomodate future extension to the protocol. */ nd6log((LOG_DEBUG, "nd6_options: unsupported option %d - " "option ignored\n", nd_opt->nd_opt_type)); } skip1: i++; if (i > nd6_maxndopt) { icmp6stat.icp6s_nd_toomanyopt++; nd6log((LOG_INFO, "too many loop in nd opt\n")); break; } if (ndopts->nd_opts_done) break; } return 0; } /* * ND6 timer routine to expire default route list and prefix list */ void nd6_timer(ignored_arg) void *ignored_arg; { int s; struct llinfo_nd6 *ln; struct nd_defrouter *dr; struct nd_prefix *pr; struct ifnet *ifp; struct in6_ifaddr *ia6, *nia6; struct in6_addrlifetime *lt6; s = splnet(); callout_reset(&nd6_timer_ch, nd6_prune * hz, nd6_timer, NULL); ln = llinfo_nd6.ln_next; while (ln && ln != &llinfo_nd6) { struct rtentry *rt; struct sockaddr_in6 *dst; struct llinfo_nd6 *next = ln->ln_next; /* XXX: used for the DELAY case only: */ struct nd_ifinfo *ndi = NULL; if ((rt = ln->ln_rt) == NULL) { ln = next; continue; } if ((ifp = rt->rt_ifp) == NULL) { ln = next; continue; } ndi = &nd_ifinfo[ifp->if_index]; dst = (struct sockaddr_in6 *)rt_key(rt); if (ln->ln_expire > time_second) { ln = next; continue; } /* sanity check */ if (!rt) panic("rt=0 in nd6_timer(ln=%p)", ln); if (rt->rt_llinfo && (struct llinfo_nd6 *)rt->rt_llinfo != ln) panic("rt_llinfo(%p) is not equal to ln(%p)", rt->rt_llinfo, ln); if (!dst) panic("dst=0 in nd6_timer(ln=%p)", ln); switch (ln->ln_state) { case ND6_LLINFO_INCOMPLETE: if (ln->ln_asked < nd6_mmaxtries) { ln->ln_asked++; ln->ln_expire = time_second + nd_ifinfo[ifp->if_index].retrans / 1000; nd6_ns_output(ifp, NULL, &dst->sin6_addr, ln, 0); } else { struct mbuf *m = ln->ln_hold; if (m) { if (rt->rt_ifp) { /* * Fake rcvif to make ICMP error * more helpful in diagnosing * for the receiver. * XXX: should we consider * older rcvif? */ m->m_pkthdr.rcvif = rt->rt_ifp; } icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADDR, 0); ln->ln_hold = NULL; } next = nd6_free(rt); } break; case ND6_LLINFO_REACHABLE: if (ln->ln_expire) { ln->ln_state = ND6_LLINFO_STALE; ln->ln_expire = time_second + nd6_gctimer; } break; case ND6_LLINFO_STALE: /* Garbage Collection(RFC 2461 5.3) */ if (ln->ln_expire) next = nd6_free(rt); break; case ND6_LLINFO_DELAY: if (ndi && (ndi->flags & ND6_IFF_PERFORMNUD) != 0) { /* We need NUD */ ln->ln_asked = 1; ln->ln_state = ND6_LLINFO_PROBE; ln->ln_expire = time_second + ndi->retrans / 1000; nd6_ns_output(ifp, &dst->sin6_addr, &dst->sin6_addr, ln, 0); } else { ln->ln_state = ND6_LLINFO_STALE; /* XXX */ ln->ln_expire = time_second + nd6_gctimer; } break; case ND6_LLINFO_PROBE: if (ln->ln_asked < nd6_umaxtries) { ln->ln_asked++; ln->ln_expire = time_second + nd_ifinfo[ifp->if_index].retrans / 1000; nd6_ns_output(ifp, &dst->sin6_addr, &dst->sin6_addr, ln, 0); } else { next = nd6_free(rt); } break; } ln = next; } /* expire default router list */ dr = TAILQ_FIRST(&nd_defrouter); while (dr) { if (dr->expire && dr->expire < time_second) { struct nd_defrouter *t; t = TAILQ_NEXT(dr, dr_entry); defrtrlist_del(dr); dr = t; } else { dr = TAILQ_NEXT(dr, dr_entry); } } /* * expire interface addresses. * in the past the loop was inside prefix expiry processing. * However, from a stricter speci-confrmance standpoint, we should * rather separate address lifetimes and prefix lifetimes. */ addrloop: for (ia6 = in6_ifaddr; ia6; ia6 = nia6) { nia6 = ia6->ia_next; /* check address lifetime */ lt6 = &ia6->ia6_lifetime; if (IFA6_IS_INVALID(ia6)) { int regen = 0; /* * If the expiring address is temporary, try * regenerating a new one. This would be useful when * we suspended a laptop PC, then turned it on after a * period that could invalidate all temporary * addresses. Although we may have to restart the * loop (see below), it must be after purging the * address. Otherwise, we'd see an infinite loop of * regeneration. */ if (ip6_use_tempaddr && (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0) { if (regen_tmpaddr(ia6) == 0) regen = 1; } in6_purgeaddr(&ia6->ia_ifa); if (regen) goto addrloop; /* XXX: see below */ } if (IFA6_IS_DEPRECATED(ia6)) { int oldflags = ia6->ia6_flags; ia6->ia6_flags |= IN6_IFF_DEPRECATED; /* * If a temporary address has just become deprecated, * regenerate a new one if possible. */ if (ip6_use_tempaddr && (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0 && (oldflags & IN6_IFF_DEPRECATED) == 0) { if (regen_tmpaddr(ia6) == 0) { /* * A new temporary address is * generated. * XXX: this means the address chain * has changed while we are still in * the loop. Although the change * would not cause disaster (because * it's not a deletion, but an * addition,) we'd rather restart the * loop just for safety. Or does this * significantly reduce performance?? */ goto addrloop; } } } else { /* * A new RA might have made a deprecated address * preferred. */ ia6->ia6_flags &= ~IN6_IFF_DEPRECATED; } } /* expire prefix list */ pr = nd_prefix.lh_first; while (pr) { /* * check prefix lifetime. * since pltime is just for autoconf, pltime processing for * prefix is not necessary. */ if (pr->ndpr_expire && pr->ndpr_expire < time_second) { struct nd_prefix *t; t = pr->ndpr_next; /* * address expiration and prefix expiration are * separate. NEVER perform in6_purgeaddr here. */ prelist_remove(pr); pr = t; } else pr = pr->ndpr_next; } splx(s); } static int regen_tmpaddr(ia6) struct in6_ifaddr *ia6; /* deprecated/invalidated temporary address */ { struct ifaddr *ifa; struct ifnet *ifp; struct in6_ifaddr *public_ifa6 = NULL; ifp = ia6->ia_ifa.ifa_ifp; for (ifa = ifp->if_addrlist.tqh_first; ifa; ifa = ifa->ifa_list.tqe_next) { struct in6_ifaddr *it6; if (ifa->ifa_addr->sa_family != AF_INET6) continue; it6 = (struct in6_ifaddr *)ifa; /* ignore no autoconf addresses. */ if ((it6->ia6_flags & IN6_IFF_AUTOCONF) == 0) continue; /* ignore autoconf addresses with different prefixes. */ if (it6->ia6_ndpr == NULL || it6->ia6_ndpr != ia6->ia6_ndpr) continue; /* * Now we are looking at an autoconf address with the same * prefix as ours. If the address is temporary and is still * preferred, do not create another one. It would be rare, but * could happen, for example, when we resume a laptop PC after * a long period. */ if ((it6->ia6_flags & IN6_IFF_TEMPORARY) != 0 && !IFA6_IS_DEPRECATED(it6)) { public_ifa6 = NULL; break; } /* * This is a public autoconf address that has the same prefix * as ours. If it is preferred, keep it. We can't break the * loop here, because there may be a still-preferred temporary * address with the prefix. */ if (!IFA6_IS_DEPRECATED(it6)) public_ifa6 = it6; } if (public_ifa6 != NULL) { int e; if ((e = in6_tmpifadd(public_ifa6, 0)) != 0) { log(LOG_NOTICE, "regen_tmpaddr: failed to create a new" " tmp addr,errno=%d\n", e); return(-1); } return(0); } return(-1); } /* * Nuke neighbor cache/prefix/default router management table, right before * ifp goes away. */ void nd6_purge(ifp) struct ifnet *ifp; { struct llinfo_nd6 *ln, *nln; struct nd_defrouter *dr, *ndr, drany; struct nd_prefix *pr, *npr; /* Nuke default router list entries toward ifp */ if ((dr = TAILQ_FIRST(&nd_defrouter)) != NULL) { /* * The first entry of the list may be stored in * the routing table, so we'll delete it later. */ for (dr = TAILQ_NEXT(dr, dr_entry); dr; dr = ndr) { ndr = TAILQ_NEXT(dr, dr_entry); if (dr->ifp == ifp) defrtrlist_del(dr); } dr = TAILQ_FIRST(&nd_defrouter); if (dr->ifp == ifp) defrtrlist_del(dr); } /* Nuke prefix list entries toward ifp */ for (pr = nd_prefix.lh_first; pr; pr = npr) { npr = pr->ndpr_next; if (pr->ndpr_ifp == ifp) { /* * Previously, pr->ndpr_addr is removed as well, * but I strongly believe we don't have to do it. * nd6_purge() is only called from in6_ifdetach(), * which removes all the associated interface addresses * by itself. * (jinmei@kame.net 20010129) */ prelist_remove(pr); } } /* cancel default outgoing interface setting */ if (nd6_defifindex == ifp->if_index) nd6_setdefaultiface(0); if (!ip6_forwarding && ip6_accept_rtadv) { /* XXX: too restrictive? */ /* refresh default router list */ bzero(&drany, sizeof(drany)); defrouter_delreq(&drany, 0); defrouter_select(); } /* * Nuke neighbor cache entries for the ifp. * Note that rt->rt_ifp may not be the same as ifp, * due to KAME goto ours hack. See RTM_RESOLVE case in * nd6_rtrequest(), and ip6_input(). */ ln = llinfo_nd6.ln_next; while (ln && ln != &llinfo_nd6) { struct rtentry *rt; struct sockaddr_dl *sdl; nln = ln->ln_next; rt = ln->ln_rt; if (rt && rt->rt_gateway && rt->rt_gateway->sa_family == AF_LINK) { sdl = (struct sockaddr_dl *)rt->rt_gateway; if (sdl->sdl_index == ifp->if_index) nln = nd6_free(rt); } ln = nln; } } struct rtentry * nd6_lookup(addr6, create, ifp) struct in6_addr *addr6; int create; struct ifnet *ifp; { struct rtentry *rt; struct sockaddr_in6 sin6; bzero(&sin6, sizeof(sin6)); sin6.sin6_len = sizeof(struct sockaddr_in6); sin6.sin6_family = AF_INET6; sin6.sin6_addr = *addr6; #ifdef SCOPEDROUTING sin6.sin6_scope_id = in6_addr2scopeid(ifp, addr6); #endif rt = rtalloc1((struct sockaddr *)&sin6, create, 0UL); - if (rt && (rt->rt_flags & RTF_LLINFO) == 0) { - /* - * This is the case for the default route. - * If we want to create a neighbor cache for the address, we - * should free the route for the destination and allocate an - * interface route. - */ - if (create) { - RTFREE(rt); + if (rt) { + if ((rt->rt_flags & RTF_LLINFO) == 0 && create) { + /* + * This is the case for the default route. + * If we want to create a neighbor cache for the + * address, we should free the route for the + * destination and allocate an interface route. + */ + RTFREE_LOCKED(rt); rt = 0; } + RT_UNLOCK(rt); } if (!rt) { if (create && ifp) { int e; /* * If no route is available and create is set, * we allocate a host route for the destination * and treat it like an interface route. * This hack is necessary for a neighbor which can't * be covered by our own prefix. */ struct ifaddr *ifa = ifaof_ifpforaddr((struct sockaddr *)&sin6, ifp); if (ifa == NULL) return(NULL); /* * Create a new route. RTF_LLINFO is necessary * to create a Neighbor Cache entry for the * destination in nd6_rtrequest which will be * called in rtrequest via ifa->ifa_rtrequest. */ if ((e = rtrequest(RTM_ADD, (struct sockaddr *)&sin6, ifa->ifa_addr, (struct sockaddr *)&all1_sa, (ifa->ifa_flags | RTF_HOST | RTF_LLINFO) & ~RTF_CLONING, &rt)) != 0) log(LOG_ERR, "nd6_lookup: failed to add route for a " "neighbor(%s), errno=%d\n", ip6_sprintf(addr6), e); if (rt == NULL) return(NULL); if (rt->rt_llinfo) { struct llinfo_nd6 *ln = (struct llinfo_nd6 *)rt->rt_llinfo; ln->ln_state = ND6_LLINFO_NOSTATE; } } else return(NULL); } rt->rt_refcnt--; /* * Validation for the entry. * Note that the check for rt_llinfo is necessary because a cloned * route from a parent route that has the L flag (e.g. the default * route to a p2p interface) may have the flag, too, while the * destination is not actually a neighbor. * XXX: we can't use rt->rt_ifp to check for the interface, since * it might be the loopback interface if the entry is for our * own address on a non-loopback interface. Instead, we should * use rt->rt_ifa->ifa_ifp, which would specify the REAL * interface. */ if ((rt->rt_flags & RTF_GATEWAY) || (rt->rt_flags & RTF_LLINFO) == 0 || rt->rt_gateway->sa_family != AF_LINK || rt->rt_llinfo == NULL || (ifp && rt->rt_ifa->ifa_ifp != ifp)) { if (create) { log(LOG_DEBUG, "nd6_lookup: failed to lookup %s (if = %s)\n", ip6_sprintf(addr6), ifp ? if_name(ifp) : "unspec"); /* xxx more logs... kazu */ } return(NULL); } return(rt); } /* * Detect if a given IPv6 address identifies a neighbor on a given link. * XXX: should take care of the destination of a p2p link? */ int nd6_is_addr_neighbor(addr, ifp) struct sockaddr_in6 *addr; struct ifnet *ifp; { struct ifaddr *ifa; int i; #define IFADDR6(a) ((((struct in6_ifaddr *)(a))->ia_addr).sin6_addr) #define IFMASK6(a) ((((struct in6_ifaddr *)(a))->ia_prefixmask).sin6_addr) /* * A link-local address is always a neighbor. * XXX: we should use the sin6_scope_id field rather than the embedded * interface index. */ if (IN6_IS_ADDR_LINKLOCAL(&addr->sin6_addr) && ntohs(*(u_int16_t *)&addr->sin6_addr.s6_addr[2]) == ifp->if_index) return(1); /* * If the address matches one of our addresses, * it should be a neighbor. */ for (ifa = ifp->if_addrlist.tqh_first; ifa; ifa = ifa->ifa_list.tqe_next) { if (ifa->ifa_addr->sa_family != AF_INET6) next: continue; for (i = 0; i < 4; i++) { if ((IFADDR6(ifa).s6_addr32[i] ^ addr->sin6_addr.s6_addr32[i]) & IFMASK6(ifa).s6_addr32[i]) goto next; } return(1); } /* * Even if the address matches none of our addresses, it might be * in the neighbor cache. */ if (nd6_lookup(&addr->sin6_addr, 0, ifp) != NULL) return(1); return(0); #undef IFADDR6 #undef IFMASK6 } /* * Free an nd6 llinfo entry. */ struct llinfo_nd6 * nd6_free(rt) struct rtentry *rt; { struct llinfo_nd6 *ln = (struct llinfo_nd6 *)rt->rt_llinfo, *next; struct in6_addr in6 = ((struct sockaddr_in6 *)rt_key(rt))->sin6_addr; struct nd_defrouter *dr; /* * we used to have pfctlinput(PRC_HOSTDEAD) here. * even though it is not harmful, it was not really necessary. */ if (!ip6_forwarding && ip6_accept_rtadv) { /* XXX: too restrictive? */ int s; s = splnet(); dr = defrouter_lookup(&((struct sockaddr_in6 *)rt_key(rt))->sin6_addr, rt->rt_ifp); if (ln->ln_router || dr) { /* * rt6_flush must be called whether or not the neighbor * is in the Default Router List. * See a corresponding comment in nd6_na_input(). */ rt6_flush(&in6, rt->rt_ifp); } if (dr) { /* * Unreachablity of a router might affect the default * router selection and on-link detection of advertised * prefixes. */ /* * Temporarily fake the state to choose a new default * router and to perform on-link determination of * prefixes correctly. * Below the state will be set correctly, * or the entry itself will be deleted. */ ln->ln_state = ND6_LLINFO_INCOMPLETE; /* * Since defrouter_select() does not affect the * on-link determination and MIP6 needs the check * before the default router selection, we perform * the check now. */ pfxlist_onlink_check(); if (dr == TAILQ_FIRST(&nd_defrouter)) { /* * It is used as the current default router, * so we have to move it to the end of the * list and choose a new one. * XXX: it is not very efficient if this is * the only router. */ TAILQ_REMOVE(&nd_defrouter, dr, dr_entry); TAILQ_INSERT_TAIL(&nd_defrouter, dr, dr_entry); defrouter_select(); } } splx(s); } /* * Before deleting the entry, remember the next entry as the * return value. We need this because pfxlist_onlink_check() above * might have freed other entries (particularly the old next entry) as * a side effect (XXX). */ next = ln->ln_next; /* * Detach the route from the routing tree and the list of neighbor * caches, and disable the route entry not to be used in already * cached routes. */ rtrequest(RTM_DELETE, rt_key(rt), (struct sockaddr *)0, rt_mask(rt), 0, (struct rtentry **)0); return(next); } /* * Upper-layer reachability hint for Neighbor Unreachability Detection. * * XXX cost-effective metods? */ void nd6_nud_hint(rt, dst6, force) struct rtentry *rt; struct in6_addr *dst6; int force; { struct llinfo_nd6 *ln; /* * If the caller specified "rt", use that. Otherwise, resolve the * routing table by supplied "dst6". */ if (!rt) { if (!dst6) return; if (!(rt = nd6_lookup(dst6, 0, NULL))) return; } if ((rt->rt_flags & RTF_GATEWAY) != 0 || (rt->rt_flags & RTF_LLINFO) == 0 || !rt->rt_llinfo || !rt->rt_gateway || rt->rt_gateway->sa_family != AF_LINK) { /* This is not a host route. */ return; } ln = (struct llinfo_nd6 *)rt->rt_llinfo; if (ln->ln_state < ND6_LLINFO_REACHABLE) return; /* * if we get upper-layer reachability confirmation many times, * it is possible we have false information. */ if (!force) { ln->ln_byhint++; if (ln->ln_byhint > nd6_maxnudhint) return; } ln->ln_state = ND6_LLINFO_REACHABLE; if (ln->ln_expire) ln->ln_expire = time_second + nd_ifinfo[rt->rt_ifp->if_index].reachable; } void nd6_rtrequest(req, rt, info) int req; struct rtentry *rt; struct rt_addrinfo *info; /* xxx unused */ { struct sockaddr *gate = rt->rt_gateway; struct llinfo_nd6 *ln = (struct llinfo_nd6 *)rt->rt_llinfo; static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK}; struct ifnet *ifp = rt->rt_ifp; struct ifaddr *ifa; + RT_LOCK_ASSERT(rt); + if ((rt->rt_flags & RTF_GATEWAY)) return; if (nd6_need_cache(ifp) == 0 && (rt->rt_flags & RTF_HOST) == 0) { /* * This is probably an interface direct route for a link * which does not need neighbor caches (e.g. fe80::%lo0/64). * We do not need special treatment below for such a route. * Moreover, the RTF_LLINFO flag which would be set below * would annoy the ndp(8) command. */ return; } if (req == RTM_RESOLVE && (nd6_need_cache(ifp) == 0 || /* stf case */ !nd6_is_addr_neighbor((struct sockaddr_in6 *)rt_key(rt), ifp))) { /* * FreeBSD and BSD/OS often make a cloned host route based * on a less-specific route (e.g. the default route). * If the less specific route does not have a "gateway" * (this is the case when the route just goes to a p2p or an * stf interface), we'll mistakenly make a neighbor cache for * the host route, and will see strange neighbor solicitation * for the corresponding destination. In order to avoid the * confusion, we check if the destination of the route is * a neighbor in terms of neighbor discovery, and stop the * process if not. Additionally, we remove the LLINFO flag * so that ndp(8) will not try to get the neighbor information * of the destination. */ rt->rt_flags &= ~RTF_LLINFO; return; } switch (req) { case RTM_ADD: /* * There is no backward compatibility :) * * if ((rt->rt_flags & RTF_HOST) == 0 && * SIN(rt_mask(rt))->sin_addr.s_addr != 0xffffffff) * rt->rt_flags |= RTF_CLONING; */ if (rt->rt_flags & (RTF_CLONING | RTF_LLINFO)) { /* * Case 1: This route should come from * a route to interface. RTF_LLINFO flag is set * for a host route whose destination should be * treated as on-link. */ rt_setgate(rt, rt_key(rt), (struct sockaddr *)&null_sdl); gate = rt->rt_gateway; SDL(gate)->sdl_type = ifp->if_type; SDL(gate)->sdl_index = ifp->if_index; if (ln) ln->ln_expire = time_second; #if 1 if (ln && ln->ln_expire == 0) { /* kludge for desktops */ #if 0 printf("nd6_rtequest: time.tv_sec is zero; " "treat it as 1\n"); #endif ln->ln_expire = 1; } #endif if ((rt->rt_flags & RTF_CLONING)) break; } /* * In IPv4 code, we try to annonuce new RTF_ANNOUNCE entry here. * We don't do that here since llinfo is not ready yet. * * There are also couple of other things to be discussed: * - unsolicited NA code needs improvement beforehand * - RFC2461 says we MAY send multicast unsolicited NA * (7.2.6 paragraph 4), however, it also says that we * SHOULD provide a mechanism to prevent multicast NA storm. * we don't have anything like it right now. * note that the mechanism needs a mutual agreement * between proxies, which means that we need to implement * a new protocol, or a new kludge. * - from RFC2461 6.2.4, host MUST NOT send an unsolicited NA. * we need to check ip6forwarding before sending it. * (or should we allow proxy ND configuration only for * routers? there's no mention about proxy ND from hosts) */ #if 0 /* XXX it does not work */ if (rt->rt_flags & RTF_ANNOUNCE) nd6_na_output(ifp, &SIN6(rt_key(rt))->sin6_addr, &SIN6(rt_key(rt))->sin6_addr, ip6_forwarding ? ND_NA_FLAG_ROUTER : 0, 1, NULL); #endif /* FALLTHROUGH */ case RTM_RESOLVE: if ((ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) == 0) { /* * Address resolution isn't necessary for a point to * point link, so we can skip this test for a p2p link. */ if (gate->sa_family != AF_LINK || gate->sa_len < sizeof(null_sdl)) { log(LOG_DEBUG, "nd6_rtrequest: bad gateway value: %s\n", if_name(ifp)); break; } SDL(gate)->sdl_type = ifp->if_type; SDL(gate)->sdl_index = ifp->if_index; } if (ln != NULL) break; /* This happens on a route change */ /* * Case 2: This route may come from cloning, or a manual route * add with a LL address. */ R_Malloc(ln, struct llinfo_nd6 *, sizeof(*ln)); rt->rt_llinfo = (caddr_t)ln; if (!ln) { log(LOG_DEBUG, "nd6_rtrequest: malloc failed\n"); break; } nd6_inuse++; nd6_allocated++; Bzero(ln, sizeof(*ln)); ln->ln_rt = rt; /* this is required for "ndp" command. - shin */ if (req == RTM_ADD) { /* * gate should have some valid AF_LINK entry, * and ln->ln_expire should have some lifetime * which is specified by ndp command. */ ln->ln_state = ND6_LLINFO_REACHABLE; ln->ln_byhint = 0; } else { /* * When req == RTM_RESOLVE, rt is created and * initialized in rtrequest(), so rt_expire is 0. */ ln->ln_state = ND6_LLINFO_NOSTATE; ln->ln_expire = time_second; } rt->rt_flags |= RTF_LLINFO; ln->ln_next = llinfo_nd6.ln_next; llinfo_nd6.ln_next = ln; ln->ln_prev = &llinfo_nd6; ln->ln_next->ln_prev = ln; /* * check if rt_key(rt) is one of my address assigned * to the interface. */ ifa = (struct ifaddr *)in6ifa_ifpwithaddr(rt->rt_ifp, &SIN6(rt_key(rt))->sin6_addr); if (ifa) { caddr_t macp = nd6_ifptomac(ifp); ln->ln_expire = 0; ln->ln_state = ND6_LLINFO_REACHABLE; ln->ln_byhint = 0; if (macp) { Bcopy(macp, LLADDR(SDL(gate)), ifp->if_addrlen); SDL(gate)->sdl_alen = ifp->if_addrlen; } if (nd6_useloopback) { rt->rt_ifp = &loif[0]; /* XXX */ /* * Make sure rt_ifa be equal to the ifaddr * corresponding to the address. * We need this because when we refer * rt_ifa->ia6_flags in ip6_input, we assume * that the rt_ifa points to the address instead * of the loopback address. */ if (ifa != rt->rt_ifa) { IFAFREE(rt->rt_ifa); IFAREF(ifa); rt->rt_ifa = ifa; } } } else if (rt->rt_flags & RTF_ANNOUNCE) { ln->ln_expire = 0; ln->ln_state = ND6_LLINFO_REACHABLE; ln->ln_byhint = 0; /* join solicited node multicast for proxy ND */ if (ifp->if_flags & IFF_MULTICAST) { struct in6_addr llsol; int error; llsol = SIN6(rt_key(rt))->sin6_addr; llsol.s6_addr16[0] = htons(0xff02); llsol.s6_addr16[1] = htons(ifp->if_index); llsol.s6_addr32[1] = 0; llsol.s6_addr32[2] = htonl(1); llsol.s6_addr8[12] = 0xff; if (!in6_addmulti(&llsol, ifp, &error)) { nd6log((LOG_ERR, "%s: failed to join " "%s (errno=%d)\n", if_name(ifp), ip6_sprintf(&llsol), error)); } } } break; case RTM_DELETE: if (!ln) break; /* leave from solicited node multicast for proxy ND */ if ((rt->rt_flags & RTF_ANNOUNCE) != 0 && (ifp->if_flags & IFF_MULTICAST) != 0) { struct in6_addr llsol; struct in6_multi *in6m; llsol = SIN6(rt_key(rt))->sin6_addr; llsol.s6_addr16[0] = htons(0xff02); llsol.s6_addr16[1] = htons(ifp->if_index); llsol.s6_addr32[1] = 0; llsol.s6_addr32[2] = htonl(1); llsol.s6_addr8[12] = 0xff; IN6_LOOKUP_MULTI(llsol, ifp, in6m); if (in6m) in6_delmulti(in6m); } nd6_inuse--; ln->ln_next->ln_prev = ln->ln_prev; ln->ln_prev->ln_next = ln->ln_next; ln->ln_prev = NULL; rt->rt_llinfo = 0; rt->rt_flags &= ~RTF_LLINFO; if (ln->ln_hold) m_freem(ln->ln_hold); Free((caddr_t)ln); } } int nd6_ioctl(cmd, data, ifp) u_long cmd; caddr_t data; struct ifnet *ifp; { struct in6_drlist *drl = (struct in6_drlist *)data; struct in6_prlist *prl = (struct in6_prlist *)data; struct in6_ndireq *ndi = (struct in6_ndireq *)data; struct in6_nbrinfo *nbi = (struct in6_nbrinfo *)data; struct in6_ndifreq *ndif = (struct in6_ndifreq *)data; struct nd_defrouter *dr, any; struct nd_prefix *pr; struct rtentry *rt; int i = 0, error = 0; int s; switch (cmd) { case SIOCGDRLST_IN6: /* * obsolete API, use sysctl under net.inet6.icmp6 */ bzero(drl, sizeof(*drl)); s = splnet(); dr = TAILQ_FIRST(&nd_defrouter); while (dr && i < DRLSTSIZ) { drl->defrouter[i].rtaddr = dr->rtaddr; if (IN6_IS_ADDR_LINKLOCAL(&drl->defrouter[i].rtaddr)) { /* XXX: need to this hack for KAME stack */ drl->defrouter[i].rtaddr.s6_addr16[1] = 0; } else log(LOG_ERR, "default router list contains a " "non-linklocal address(%s)\n", ip6_sprintf(&drl->defrouter[i].rtaddr)); drl->defrouter[i].flags = dr->flags; drl->defrouter[i].rtlifetime = dr->rtlifetime; drl->defrouter[i].expire = dr->expire; drl->defrouter[i].if_index = dr->ifp->if_index; i++; dr = TAILQ_NEXT(dr, dr_entry); } splx(s); break; case SIOCGPRLST_IN6: /* * obsolete API, use sysctl under net.inet6.icmp6 */ /* * XXX meaning of fields, especialy "raflags", is very * differnet between RA prefix list and RR/static prefix list. * how about separating ioctls into two? */ bzero(prl, sizeof(*prl)); s = splnet(); pr = nd_prefix.lh_first; while (pr && i < PRLSTSIZ) { struct nd_pfxrouter *pfr; int j; (void)in6_embedscope(&prl->prefix[i].prefix, &pr->ndpr_prefix, NULL, NULL); prl->prefix[i].raflags = pr->ndpr_raf; prl->prefix[i].prefixlen = pr->ndpr_plen; prl->prefix[i].vltime = pr->ndpr_vltime; prl->prefix[i].pltime = pr->ndpr_pltime; prl->prefix[i].if_index = pr->ndpr_ifp->if_index; prl->prefix[i].expire = pr->ndpr_expire; pfr = pr->ndpr_advrtrs.lh_first; j = 0; while (pfr) { if (j < DRLSTSIZ) { #define RTRADDR prl->prefix[i].advrtr[j] RTRADDR = pfr->router->rtaddr; if (IN6_IS_ADDR_LINKLOCAL(&RTRADDR)) { /* XXX: hack for KAME */ RTRADDR.s6_addr16[1] = 0; } else log(LOG_ERR, "a router(%s) advertises " "a prefix with " "non-link local address\n", ip6_sprintf(&RTRADDR)); #undef RTRADDR } j++; pfr = pfr->pfr_next; } prl->prefix[i].advrtrs = j; prl->prefix[i].origin = PR_ORIG_RA; i++; pr = pr->ndpr_next; } { struct rr_prefix *rpp; for (rpp = LIST_FIRST(&rr_prefix); rpp; rpp = LIST_NEXT(rpp, rp_entry)) { if (i >= PRLSTSIZ) break; (void)in6_embedscope(&prl->prefix[i].prefix, &pr->ndpr_prefix, NULL, NULL); prl->prefix[i].raflags = rpp->rp_raf; prl->prefix[i].prefixlen = rpp->rp_plen; prl->prefix[i].vltime = rpp->rp_vltime; prl->prefix[i].pltime = rpp->rp_pltime; prl->prefix[i].if_index = rpp->rp_ifp->if_index; prl->prefix[i].expire = rpp->rp_expire; prl->prefix[i].advrtrs = 0; prl->prefix[i].origin = rpp->rp_origin; i++; } } splx(s); break; case OSIOCGIFINFO_IN6: if (!nd_ifinfo || i >= nd_ifinfo_indexlim) { error = EINVAL; break; } ndi->ndi.linkmtu = nd_ifinfo[ifp->if_index].linkmtu; ndi->ndi.maxmtu = nd_ifinfo[ifp->if_index].maxmtu; ndi->ndi.basereachable = nd_ifinfo[ifp->if_index].basereachable; ndi->ndi.reachable = nd_ifinfo[ifp->if_index].reachable; ndi->ndi.retrans = nd_ifinfo[ifp->if_index].retrans; ndi->ndi.flags = nd_ifinfo[ifp->if_index].flags; ndi->ndi.recalctm = nd_ifinfo[ifp->if_index].recalctm; ndi->ndi.chlim = nd_ifinfo[ifp->if_index].chlim; ndi->ndi.receivedra = nd_ifinfo[ifp->if_index].receivedra; break; case SIOCGIFINFO_IN6: if (!nd_ifinfo || i >= nd_ifinfo_indexlim) { error = EINVAL; break; } ndi->ndi = nd_ifinfo[ifp->if_index]; break; case SIOCSIFINFO_FLAGS: /* XXX: almost all other fields of ndi->ndi is unused */ if (!nd_ifinfo || i >= nd_ifinfo_indexlim) { error = EINVAL; break; } nd_ifinfo[ifp->if_index].flags = ndi->ndi.flags; break; case SIOCSNDFLUSH_IN6: /* XXX: the ioctl name is confusing... */ /* flush default router list */ /* * xxx sumikawa: should not delete route if default * route equals to the top of default router list */ bzero(&any, sizeof(any)); defrouter_delreq(&any, 0); defrouter_select(); /* xxx sumikawa: flush prefix list */ break; case SIOCSPFXFLUSH_IN6: { /* flush all the prefix advertised by routers */ struct nd_prefix *pr, *next; s = splnet(); for (pr = nd_prefix.lh_first; pr; pr = next) { struct in6_ifaddr *ia, *ia_next; next = pr->ndpr_next; if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr)) continue; /* XXX */ /* do we really have to remove addresses as well? */ for (ia = in6_ifaddr; ia; ia = ia_next) { /* ia might be removed. keep the next ptr. */ ia_next = ia->ia_next; if ((ia->ia6_flags & IN6_IFF_AUTOCONF) == 0) continue; if (ia->ia6_ndpr == pr) in6_purgeaddr(&ia->ia_ifa); } prelist_remove(pr); } splx(s); break; } case SIOCSRTRFLUSH_IN6: { /* flush all the default routers */ struct nd_defrouter *dr, *next; s = splnet(); if ((dr = TAILQ_FIRST(&nd_defrouter)) != NULL) { /* * The first entry of the list may be stored in * the routing table, so we'll delete it later. */ for (dr = TAILQ_NEXT(dr, dr_entry); dr; dr = next) { next = TAILQ_NEXT(dr, dr_entry); defrtrlist_del(dr); } defrtrlist_del(TAILQ_FIRST(&nd_defrouter)); } splx(s); break; } case SIOCGNBRINFO_IN6: { struct llinfo_nd6 *ln; struct in6_addr nb_addr = nbi->addr; /* make local for safety */ /* * XXX: KAME specific hack for scoped addresses * XXXX: for other scopes than link-local? */ if (IN6_IS_ADDR_LINKLOCAL(&nbi->addr) || IN6_IS_ADDR_MC_LINKLOCAL(&nbi->addr)) { u_int16_t *idp = (u_int16_t *)&nb_addr.s6_addr[2]; if (*idp == 0) *idp = htons(ifp->if_index); } s = splnet(); if ((rt = nd6_lookup(&nb_addr, 0, ifp)) == NULL) { error = EINVAL; splx(s); break; } ln = (struct llinfo_nd6 *)rt->rt_llinfo; nbi->state = ln->ln_state; nbi->asked = ln->ln_asked; nbi->isrouter = ln->ln_router; nbi->expire = ln->ln_expire; splx(s); break; } case SIOCGDEFIFACE_IN6: /* XXX: should be implemented as a sysctl? */ ndif->ifindex = nd6_defifindex; break; case SIOCSDEFIFACE_IN6: /* XXX: should be implemented as a sysctl? */ return(nd6_setdefaultiface(ndif->ifindex)); break; } return(error); } /* * Create neighbor cache entry and cache link-layer address, * on reception of inbound ND6 packets. (RS/RA/NS/redirect) */ struct rtentry * nd6_cache_lladdr(ifp, from, lladdr, lladdrlen, type, code) struct ifnet *ifp; struct in6_addr *from; char *lladdr; int lladdrlen; int type; /* ICMP6 type */ int code; /* type dependent information */ { struct rtentry *rt = NULL; struct llinfo_nd6 *ln = NULL; int is_newentry; struct sockaddr_dl *sdl = NULL; int do_update; int olladdr; int llchange; int newstate = 0; if (!ifp) panic("ifp == NULL in nd6_cache_lladdr"); if (!from) panic("from == NULL in nd6_cache_lladdr"); /* nothing must be updated for unspecified address */ if (IN6_IS_ADDR_UNSPECIFIED(from)) return NULL; /* * Validation about ifp->if_addrlen and lladdrlen must be done in * the caller. * * XXX If the link does not have link-layer adderss, what should * we do? (ifp->if_addrlen == 0) * Spec says nothing in sections for RA, RS and NA. There's small * description on it in NS section (RFC 2461 7.2.3). */ rt = nd6_lookup(from, 0, ifp); if (!rt) { #if 0 /* nothing must be done if there's no lladdr */ if (!lladdr || !lladdrlen) return NULL; #endif rt = nd6_lookup(from, 1, ifp); is_newentry = 1; } else { /* do nothing if static ndp is set */ if (rt->rt_flags & RTF_STATIC) return NULL; is_newentry = 0; } if (!rt) return NULL; if ((rt->rt_flags & (RTF_GATEWAY | RTF_LLINFO)) != RTF_LLINFO) { fail: (void)nd6_free(rt); return NULL; } ln = (struct llinfo_nd6 *)rt->rt_llinfo; if (!ln) goto fail; if (!rt->rt_gateway) goto fail; if (rt->rt_gateway->sa_family != AF_LINK) goto fail; sdl = SDL(rt->rt_gateway); olladdr = (sdl->sdl_alen) ? 1 : 0; if (olladdr && lladdr) { if (bcmp(lladdr, LLADDR(sdl), ifp->if_addrlen)) llchange = 1; else llchange = 0; } else llchange = 0; /* * newentry olladdr lladdr llchange (*=record) * 0 n n -- (1) * 0 y n -- (2) * 0 n y -- (3) * STALE * 0 y y n (4) * * 0 y y y (5) * STALE * 1 -- n -- (6) NOSTATE(= PASSIVE) * 1 -- y -- (7) * STALE */ if (lladdr) { /* (3-5) and (7) */ /* * Record source link-layer address * XXX is it dependent to ifp->if_type? */ sdl->sdl_alen = ifp->if_addrlen; bcopy(lladdr, LLADDR(sdl), ifp->if_addrlen); } if (!is_newentry) { if ((!olladdr && lladdr) /* (3) */ || (olladdr && lladdr && llchange)) { /* (5) */ do_update = 1; newstate = ND6_LLINFO_STALE; } else /* (1-2,4) */ do_update = 0; } else { do_update = 1; if (!lladdr) /* (6) */ newstate = ND6_LLINFO_NOSTATE; else /* (7) */ newstate = ND6_LLINFO_STALE; } if (do_update) { /* * Update the state of the neighbor cache. */ ln->ln_state = newstate; if (ln->ln_state == ND6_LLINFO_STALE) { /* * XXX: since nd6_output() below will cause * state tansition to DELAY and reset the timer, * we must set the timer now, although it is actually * meaningless. */ ln->ln_expire = time_second + nd6_gctimer; if (ln->ln_hold) { /* * we assume ifp is not a p2p here, so just * set the 2nd argument as the 1st one. */ nd6_output(ifp, ifp, ln->ln_hold, (struct sockaddr_in6 *)rt_key(rt), rt); ln->ln_hold = NULL; } } else if (ln->ln_state == ND6_LLINFO_INCOMPLETE) { /* probe right away */ ln->ln_expire = time_second; } } /* * ICMP6 type dependent behavior. * * NS: clear IsRouter if new entry * RS: clear IsRouter * RA: set IsRouter if there's lladdr * redir: clear IsRouter if new entry * * RA case, (1): * The spec says that we must set IsRouter in the following cases: * - If lladdr exist, set IsRouter. This means (1-5). * - If it is old entry (!newentry), set IsRouter. This means (7). * So, based on the spec, in (1-5) and (7) cases we must set IsRouter. * A quetion arises for (1) case. (1) case has no lladdr in the * neighbor cache, this is similar to (6). * This case is rare but we figured that we MUST NOT set IsRouter. * * newentry olladdr lladdr llchange NS RS RA redir * D R * 0 n n -- (1) c ? s * 0 y n -- (2) c s s * 0 n y -- (3) c s s * 0 y y n (4) c s s * 0 y y y (5) c s s * 1 -- n -- (6) c c c s * 1 -- y -- (7) c c s c s * * (c=clear s=set) */ switch (type & 0xff) { case ND_NEIGHBOR_SOLICIT: /* * New entry must have is_router flag cleared. */ if (is_newentry) /* (6-7) */ ln->ln_router = 0; break; case ND_REDIRECT: /* * If the icmp is a redirect to a better router, always set the * is_router flag. Otherwise, if the entry is newly created, * clear the flag. [RFC 2461, sec 8.3] */ if (code == ND_REDIRECT_ROUTER) ln->ln_router = 1; else if (is_newentry) /* (6-7) */ ln->ln_router = 0; break; case ND_ROUTER_SOLICIT: /* * is_router flag must always be cleared. */ ln->ln_router = 0; break; case ND_ROUTER_ADVERT: /* * Mark an entry with lladdr as a router. */ if ((!is_newentry && (olladdr || lladdr)) /* (2-5) */ || (is_newentry && lladdr)) { /* (7) */ ln->ln_router = 1; } break; } /* * When the link-layer address of a router changes, select the * best router again. In particular, when the neighbor entry is newly * created, it might affect the selection policy. * Question: can we restrict the first condition to the "is_newentry" * case? * XXX: when we hear an RA from a new router with the link-layer * address option, defrouter_select() is called twice, since * defrtrlist_update called the function as well. However, I believe * we can compromise the overhead, since it only happens the first * time. * XXX: although defrouter_select() should not have a bad effect * for those are not autoconfigured hosts, we explicitly avoid such * cases for safety. */ if (do_update && ln->ln_router && !ip6_forwarding && ip6_accept_rtadv) defrouter_select(); return rt; } static void nd6_slowtimo(ignored_arg) void *ignored_arg; { int s = splnet(); int i; struct nd_ifinfo *nd6if; callout_reset(&nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz, nd6_slowtimo, NULL); for (i = 1; i < if_index + 1; i++) { if (!nd_ifinfo || i >= nd_ifinfo_indexlim) continue; nd6if = &nd_ifinfo[i]; if (nd6if->basereachable && /* already initialized */ (nd6if->recalctm -= ND6_SLOWTIMER_INTERVAL) <= 0) { /* * Since reachable time rarely changes by router * advertisements, we SHOULD insure that a new random * value gets recomputed at least once every few hours. * (RFC 2461, 6.3.4) */ nd6if->recalctm = nd6_recalc_reachtm_interval; nd6if->reachable = ND_COMPUTE_RTIME(nd6if->basereachable); } } splx(s); } #define senderr(e) { error = (e); goto bad;} int nd6_output(ifp, origifp, m0, dst, rt0) struct ifnet *ifp; struct ifnet *origifp; struct mbuf *m0; struct sockaddr_in6 *dst; struct rtentry *rt0; { struct mbuf *m = m0; struct rtentry *rt = rt0; struct sockaddr_in6 *gw6 = NULL; struct llinfo_nd6 *ln = NULL; int error = 0; if (IN6_IS_ADDR_MULTICAST(&dst->sin6_addr)) goto sendpkt; if (nd6_need_cache(ifp) == 0) goto sendpkt; /* * next hop determination. This routine is derived from ether_outpout. */ if (rt) { if ((rt->rt_flags & RTF_UP) == 0) { - if ((rt0 = rt = rtalloc1((struct sockaddr *)dst, 1, 0UL)) != - NULL) - { + rt0 = rt = rtalloc1((struct sockaddr *)dst, 1, 0UL); + if (rt != NULL) { rt->rt_refcnt--; + RT_UNLOCK(rt); if (rt->rt_ifp != ifp) { /* XXX: loop care? */ return nd6_output(ifp, origifp, m0, dst, rt); } } else senderr(EHOSTUNREACH); } if (rt->rt_flags & RTF_GATEWAY) { gw6 = (struct sockaddr_in6 *)rt->rt_gateway; /* * We skip link-layer address resolution and NUD * if the gateway is not a neighbor from ND point * of view, regardless of the value of nd_ifinfo.flags. * The second condition is a bit tricky; we skip * if the gateway is our own address, which is * sometimes used to install a route to a p2p link. */ if (!nd6_is_addr_neighbor(gw6, ifp) || in6ifa_ifpwithaddr(ifp, &gw6->sin6_addr)) { /* * We allow this kind of tricky route only * when the outgoing interface is p2p. * XXX: we may need a more generic rule here. */ if ((ifp->if_flags & IFF_POINTOPOINT) == 0) senderr(EHOSTUNREACH); goto sendpkt; } if (rt->rt_gwroute == 0) goto lookup; if (((rt = rt->rt_gwroute)->rt_flags & RTF_UP) == 0) { rtfree(rt); rt = rt0; lookup: rt->rt_gwroute = rtalloc1(rt->rt_gateway, 1, 0UL); if ((rt = rt->rt_gwroute) == 0) senderr(EHOSTUNREACH); + RT_UNLOCK(rt); } } } /* * Address resolution or Neighbor Unreachability Detection * for the next hop. * At this point, the destination of the packet must be a unicast * or an anycast address(i.e. not a multicast). */ /* Look up the neighbor cache for the nexthop */ if (rt && (rt->rt_flags & RTF_LLINFO) != 0) ln = (struct llinfo_nd6 *)rt->rt_llinfo; else { /* * Since nd6_is_addr_neighbor() internally calls nd6_lookup(), * the condition below is not very efficient. But we believe * it is tolerable, because this should be a rare case. */ if (nd6_is_addr_neighbor(dst, ifp) && (rt = nd6_lookup(&dst->sin6_addr, 1, ifp)) != NULL) ln = (struct llinfo_nd6 *)rt->rt_llinfo; } if (!ln || !rt) { if ((ifp->if_flags & IFF_POINTOPOINT) == 0 && !(nd_ifinfo[ifp->if_index].flags & ND6_IFF_PERFORMNUD)) { log(LOG_DEBUG, "nd6_output: can't allocate llinfo for %s " "(ln=%p, rt=%p)\n", ip6_sprintf(&dst->sin6_addr), ln, rt); senderr(EIO); /* XXX: good error? */ } goto sendpkt; /* send anyway */ } /* We don't have to do link-layer address resolution on a p2p link. */ if ((ifp->if_flags & IFF_POINTOPOINT) != 0 && ln->ln_state < ND6_LLINFO_REACHABLE) { ln->ln_state = ND6_LLINFO_STALE; ln->ln_expire = time_second + nd6_gctimer; } /* * The first time we send a packet to a neighbor whose entry is * STALE, we have to change the state to DELAY and a sets a timer to * expire in DELAY_FIRST_PROBE_TIME seconds to ensure do * neighbor unreachability detection on expiration. * (RFC 2461 7.3.3) */ if (ln->ln_state == ND6_LLINFO_STALE) { ln->ln_asked = 0; ln->ln_state = ND6_LLINFO_DELAY; ln->ln_expire = time_second + nd6_delay; } /* * If the neighbor cache entry has a state other than INCOMPLETE * (i.e. its link-layer address is already resolved), just * send the packet. */ if (ln->ln_state > ND6_LLINFO_INCOMPLETE) goto sendpkt; /* * There is a neighbor cache entry, but no ethernet address * response yet. Replace the held mbuf (if any) with this * latest one. * * This code conforms to the rate-limiting rule described in Section * 7.2.2 of RFC 2461, because the timer is set correctly after sending * an NS below. */ if (ln->ln_state == ND6_LLINFO_NOSTATE) ln->ln_state = ND6_LLINFO_INCOMPLETE; if (ln->ln_hold) m_freem(ln->ln_hold); ln->ln_hold = m; if (ln->ln_expire) { if (ln->ln_asked < nd6_mmaxtries && ln->ln_expire < time_second) { ln->ln_asked++; ln->ln_expire = time_second + nd_ifinfo[ifp->if_index].retrans / 1000; nd6_ns_output(ifp, NULL, &dst->sin6_addr, ln, 0); } } return(0); sendpkt: #ifdef MAC mac_create_mbuf_linklayer(ifp, m); #endif if ((ifp->if_flags & IFF_LOOPBACK) != 0) { return((*ifp->if_output)(origifp, m, (struct sockaddr *)dst, rt)); } return((*ifp->if_output)(ifp, m, (struct sockaddr *)dst, rt)); bad: if (m) m_freem(m); return (error); } #undef senderr int nd6_need_cache(ifp) struct ifnet *ifp; { /* * XXX: we currently do not make neighbor cache on any interface * other than ARCnet, Ethernet, FDDI and GIF. * * RFC2893 says: * - unidirectional tunnels needs no ND */ switch (ifp->if_type) { case IFT_ARCNET: case IFT_ETHER: case IFT_FDDI: case IFT_IEEE1394: #ifdef IFT_L2VLAN case IFT_L2VLAN: #endif #ifdef IFT_IEEE80211 case IFT_IEEE80211: #endif case IFT_GIF: /* XXX need more cases? */ return(1); default: return(0); } } int nd6_storelladdr(ifp, rt, m, dst, desten) struct ifnet *ifp; struct rtentry *rt; struct mbuf *m; struct sockaddr *dst; u_char *desten; { int i; struct sockaddr_dl *sdl; if (m->m_flags & M_MCAST) { switch (ifp->if_type) { case IFT_ETHER: case IFT_FDDI: #ifdef IFT_L2VLAN case IFT_L2VLAN: #endif #ifdef IFT_IEEE80211 case IFT_IEEE80211: #endif case IFT_ISO88025: ETHER_MAP_IPV6_MULTICAST(&SIN6(dst)->sin6_addr, desten); return(1); case IFT_IEEE1394: /* * netbsd can use if_broadcastaddr, but we don't do so * to reduce # of ifdef. */ for (i = 0; i < ifp->if_addrlen; i++) desten[i] = ~0; return(1); case IFT_ARCNET: *desten = 0; return(1); default: m_freem(m); return(0); } } if (rt == NULL) { /* this could happen, if we could not allocate memory */ m_freem(m); return(0); } if (rt->rt_gateway->sa_family != AF_LINK) { printf("nd6_storelladdr: something odd happens\n"); m_freem(m); return(0); } sdl = SDL(rt->rt_gateway); if (sdl->sdl_alen == 0) { /* this should be impossible, but we bark here for debugging */ printf("nd6_storelladdr: sdl_alen == 0\n"); m_freem(m); return(0); } bcopy(LLADDR(sdl), desten, sdl->sdl_alen); return(1); } static int nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS); static int nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS); #ifdef SYSCTL_DECL SYSCTL_DECL(_net_inet6_icmp6); #endif SYSCTL_NODE(_net_inet6_icmp6, ICMPV6CTL_ND6_DRLIST, nd6_drlist, CTLFLAG_RD, nd6_sysctl_drlist, ""); SYSCTL_NODE(_net_inet6_icmp6, ICMPV6CTL_ND6_PRLIST, nd6_prlist, CTLFLAG_RD, nd6_sysctl_prlist, ""); static int nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS) { int error; char buf[1024]; struct in6_defrouter *d, *de; struct nd_defrouter *dr; if (req->newptr) return EPERM; error = 0; for (dr = TAILQ_FIRST(&nd_defrouter); dr; dr = TAILQ_NEXT(dr, dr_entry)) { d = (struct in6_defrouter *)buf; de = (struct in6_defrouter *)(buf + sizeof(buf)); if (d + 1 <= de) { bzero(d, sizeof(*d)); d->rtaddr.sin6_family = AF_INET6; d->rtaddr.sin6_len = sizeof(d->rtaddr); if (in6_recoverscope(&d->rtaddr, &dr->rtaddr, dr->ifp) != 0) log(LOG_ERR, "scope error in " "default router list (%s)\n", ip6_sprintf(&dr->rtaddr)); d->flags = dr->flags; d->rtlifetime = dr->rtlifetime; d->expire = dr->expire; d->if_index = dr->ifp->if_index; } else panic("buffer too short"); error = SYSCTL_OUT(req, buf, sizeof(*d)); if (error) break; } return error; } static int nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS) { int error; char buf[1024]; struct in6_prefix *p, *pe; struct nd_prefix *pr; if (req->newptr) return EPERM; error = 0; for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) { u_short advrtrs; size_t advance; struct sockaddr_in6 *sin6, *s6; struct nd_pfxrouter *pfr; p = (struct in6_prefix *)buf; pe = (struct in6_prefix *)(buf + sizeof(buf)); if (p + 1 <= pe) { bzero(p, sizeof(*p)); sin6 = (struct sockaddr_in6 *)(p + 1); p->prefix = pr->ndpr_prefix; if (in6_recoverscope(&p->prefix, &p->prefix.sin6_addr, pr->ndpr_ifp) != 0) log(LOG_ERR, "scope error in prefix list (%s)\n", ip6_sprintf(&p->prefix.sin6_addr)); p->raflags = pr->ndpr_raf; p->prefixlen = pr->ndpr_plen; p->vltime = pr->ndpr_vltime; p->pltime = pr->ndpr_pltime; p->if_index = pr->ndpr_ifp->if_index; p->expire = pr->ndpr_expire; p->refcnt = pr->ndpr_refcnt; p->flags = pr->ndpr_stateflags; p->origin = PR_ORIG_RA; advrtrs = 0; for (pfr = pr->ndpr_advrtrs.lh_first; pfr; pfr = pfr->pfr_next) { if ((void *)&sin6[advrtrs + 1] > (void *)pe) { advrtrs++; continue; } s6 = &sin6[advrtrs]; bzero(s6, sizeof(*s6)); s6->sin6_family = AF_INET6; s6->sin6_len = sizeof(*sin6); if (in6_recoverscope(s6, &pfr->router->rtaddr, pfr->router->ifp) != 0) log(LOG_ERR, "scope error in " "prefix list (%s)\n", ip6_sprintf(&pfr->router->rtaddr)); advrtrs++; } p->advrtrs = advrtrs; } else panic("buffer too short"); advance = sizeof(*p) + sizeof(*sin6) * advrtrs; error = SYSCTL_OUT(req, buf, advance); if (error) break; } return error; } Index: head/sys/netinet6/nd6_rtr.c =================================================================== --- head/sys/netinet6/nd6_rtr.c (revision 120726) +++ head/sys/netinet6/nd6_rtr.c (revision 120727) @@ -1,1989 +1,1990 @@ /* $FreeBSD$ */ /* $KAME: nd6_rtr.c,v 1.111 2001/04/27 01:37:15 jinmei Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define SDL(s) ((struct sockaddr_dl *)s) static struct nd_defrouter *defrtrlist_update __P((struct nd_defrouter *)); static struct in6_ifaddr *in6_ifadd __P((struct nd_prefix *, struct in6_addr *)); static struct nd_pfxrouter *pfxrtr_lookup __P((struct nd_prefix *, struct nd_defrouter *)); static void pfxrtr_add __P((struct nd_prefix *, struct nd_defrouter *)); static void pfxrtr_del __P((struct nd_pfxrouter *)); static struct nd_pfxrouter *find_pfxlist_reachable_router __P((struct nd_prefix *)); static void defrouter_addifreq __P((struct ifnet *)); static void nd6_rtmsg __P((int, struct rtentry *)); static void in6_init_address_ltimes __P((struct nd_prefix *ndpr, struct in6_addrlifetime *lt6)); static int rt6_deleteroute __P((struct radix_node *, void *)); extern int nd6_recalc_reachtm_interval; static struct ifnet *nd6_defifp; int nd6_defifindex; int ip6_use_tempaddr = 0; int ip6_desync_factor; u_int32_t ip6_temp_preferred_lifetime = DEF_TEMP_PREFERRED_LIFETIME; u_int32_t ip6_temp_valid_lifetime = DEF_TEMP_VALID_LIFETIME; /* * shorter lifetimes for debugging purposes. int ip6_temp_preferred_lifetime = 800; static int ip6_temp_valid_lifetime = 1800; */ int ip6_temp_regen_advance = TEMPADDR_REGEN_ADVANCE; /* * Receive Router Solicitation Message - just for routers. * Router solicitation/advertisement is mostly managed by userland program * (rtadvd) so here we have no function like nd6_ra_output(). * * Based on RFC 2461 */ void nd6_rs_input(m, off, icmp6len) struct mbuf *m; int off, icmp6len; { struct ifnet *ifp = m->m_pkthdr.rcvif; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct nd_router_solicit *nd_rs; struct in6_addr saddr6 = ip6->ip6_src; #if 0 struct in6_addr daddr6 = ip6->ip6_dst; #endif char *lladdr = NULL; int lladdrlen = 0; #if 0 struct sockaddr_dl *sdl = (struct sockaddr_dl *)NULL; struct llinfo_nd6 *ln = (struct llinfo_nd6 *)NULL; struct rtentry *rt = NULL; int is_newentry; #endif union nd_opts ndopts; /* If I'm not a router, ignore it. */ if (ip6_accept_rtadv != 0 || ip6_forwarding != 1) goto freeit; /* Sanity checks */ if (ip6->ip6_hlim != 255) { nd6log((LOG_ERR, "nd6_rs_input: invalid hlim (%d) from %s to %s on %s\n", ip6->ip6_hlim, ip6_sprintf(&ip6->ip6_src), ip6_sprintf(&ip6->ip6_dst), if_name(ifp))); goto bad; } /* * Don't update the neighbor cache, if src = ::. * This indicates that the src has no IP address assigned yet. */ if (IN6_IS_ADDR_UNSPECIFIED(&saddr6)) goto freeit; #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, icmp6len,); nd_rs = (struct nd_router_solicit *)((caddr_t)ip6 + off); #else IP6_EXTHDR_GET(nd_rs, struct nd_router_solicit *, m, off, icmp6len); if (nd_rs == NULL) { icmp6stat.icp6s_tooshort++; return; } #endif icmp6len -= sizeof(*nd_rs); nd6_option_init(nd_rs + 1, icmp6len, &ndopts); if (nd6_options(&ndopts) < 0) { nd6log((LOG_INFO, "nd6_rs_input: invalid ND option, ignored\n")); /* nd6_options have incremented stats */ goto freeit; } if (ndopts.nd_opts_src_lladdr) { lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1); lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3; } if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) { nd6log((LOG_INFO, "nd6_rs_input: lladdrlen mismatch for %s " "(if %d, RS packet %d)\n", ip6_sprintf(&saddr6), ifp->if_addrlen, lladdrlen - 2)); goto bad; } nd6_cache_lladdr(ifp, &saddr6, lladdr, lladdrlen, ND_ROUTER_SOLICIT, 0); freeit: m_freem(m); return; bad: icmp6stat.icp6s_badrs++; m_freem(m); } /* * Receive Router Advertisement Message. * * Based on RFC 2461 * TODO: on-link bit on prefix information * TODO: ND_RA_FLAG_{OTHER,MANAGED} processing */ void nd6_ra_input(m, off, icmp6len) struct mbuf *m; int off, icmp6len; { struct ifnet *ifp = m->m_pkthdr.rcvif; struct nd_ifinfo *ndi = &nd_ifinfo[ifp->if_index]; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct nd_router_advert *nd_ra; struct in6_addr saddr6 = ip6->ip6_src; #if 0 struct in6_addr daddr6 = ip6->ip6_dst; int flags; /* = nd_ra->nd_ra_flags_reserved; */ int is_managed = ((flags & ND_RA_FLAG_MANAGED) != 0); int is_other = ((flags & ND_RA_FLAG_OTHER) != 0); #endif union nd_opts ndopts; struct nd_defrouter *dr; /* * We only accept RAs only when * the system-wide variable allows the acceptance, and * per-interface variable allows RAs on the receiving interface. */ if (ip6_accept_rtadv == 0) goto freeit; if (!(ndi->flags & ND6_IFF_ACCEPT_RTADV)) goto freeit; if (ip6->ip6_hlim != 255) { nd6log((LOG_ERR, "nd6_ra_input: invalid hlim (%d) from %s to %s on %s\n", ip6->ip6_hlim, ip6_sprintf(&ip6->ip6_src), ip6_sprintf(&ip6->ip6_dst), if_name(ifp))); goto bad; } if (!IN6_IS_ADDR_LINKLOCAL(&saddr6)) { nd6log((LOG_ERR, "nd6_ra_input: src %s is not link-local\n", ip6_sprintf(&saddr6))); goto bad; } #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, icmp6len,); nd_ra = (struct nd_router_advert *)((caddr_t)ip6 + off); #else IP6_EXTHDR_GET(nd_ra, struct nd_router_advert *, m, off, icmp6len); if (nd_ra == NULL) { icmp6stat.icp6s_tooshort++; return; } #endif icmp6len -= sizeof(*nd_ra); nd6_option_init(nd_ra + 1, icmp6len, &ndopts); if (nd6_options(&ndopts) < 0) { nd6log((LOG_INFO, "nd6_ra_input: invalid ND option, ignored\n")); /* nd6_options have incremented stats */ goto freeit; } { struct nd_defrouter dr0; u_int32_t advreachable = nd_ra->nd_ra_reachable; dr0.rtaddr = saddr6; dr0.flags = nd_ra->nd_ra_flags_reserved; dr0.rtlifetime = ntohs(nd_ra->nd_ra_router_lifetime); dr0.expire = time_second + dr0.rtlifetime; dr0.ifp = ifp; dr0.advint = 0; /* Mobile IPv6 */ dr0.advint_expire = 0; /* Mobile IPv6 */ dr0.advints_lost = 0; /* Mobile IPv6 */ /* unspecified or not? (RFC 2461 6.3.4) */ if (advreachable) { advreachable = ntohl(advreachable); if (advreachable <= MAX_REACHABLE_TIME && ndi->basereachable != advreachable) { ndi->basereachable = advreachable; ndi->reachable = ND_COMPUTE_RTIME(ndi->basereachable); ndi->recalctm = nd6_recalc_reachtm_interval; /* reset */ } } if (nd_ra->nd_ra_retransmit) ndi->retrans = ntohl(nd_ra->nd_ra_retransmit); if (nd_ra->nd_ra_curhoplimit) ndi->chlim = nd_ra->nd_ra_curhoplimit; dr = defrtrlist_update(&dr0); } /* * prefix */ if (ndopts.nd_opts_pi) { struct nd_opt_hdr *pt; struct nd_opt_prefix_info *pi = NULL; struct nd_prefix pr; for (pt = (struct nd_opt_hdr *)ndopts.nd_opts_pi; pt <= (struct nd_opt_hdr *)ndopts.nd_opts_pi_end; pt = (struct nd_opt_hdr *)((caddr_t)pt + (pt->nd_opt_len << 3))) { if (pt->nd_opt_type != ND_OPT_PREFIX_INFORMATION) continue; pi = (struct nd_opt_prefix_info *)pt; if (pi->nd_opt_pi_len != 4) { nd6log((LOG_INFO, "nd6_ra_input: invalid option " "len %d for prefix information option, " "ignored\n", pi->nd_opt_pi_len)); continue; } if (128 < pi->nd_opt_pi_prefix_len) { nd6log((LOG_INFO, "nd6_ra_input: invalid prefix " "len %d for prefix information option, " "ignored\n", pi->nd_opt_pi_prefix_len)); continue; } if (IN6_IS_ADDR_MULTICAST(&pi->nd_opt_pi_prefix) || IN6_IS_ADDR_LINKLOCAL(&pi->nd_opt_pi_prefix)) { nd6log((LOG_INFO, "nd6_ra_input: invalid prefix " "%s, ignored\n", ip6_sprintf(&pi->nd_opt_pi_prefix))); continue; } /* aggregatable unicast address, rfc2374 */ if ((pi->nd_opt_pi_prefix.s6_addr8[0] & 0xe0) == 0x20 && pi->nd_opt_pi_prefix_len != 64) { nd6log((LOG_INFO, "nd6_ra_input: invalid prefixlen " "%d for rfc2374 prefix %s, ignored\n", pi->nd_opt_pi_prefix_len, ip6_sprintf(&pi->nd_opt_pi_prefix))); continue; } bzero(&pr, sizeof(pr)); pr.ndpr_prefix.sin6_family = AF_INET6; pr.ndpr_prefix.sin6_len = sizeof(pr.ndpr_prefix); pr.ndpr_prefix.sin6_addr = pi->nd_opt_pi_prefix; pr.ndpr_ifp = (struct ifnet *)m->m_pkthdr.rcvif; pr.ndpr_raf_onlink = (pi->nd_opt_pi_flags_reserved & ND_OPT_PI_FLAG_ONLINK) ? 1 : 0; pr.ndpr_raf_auto = (pi->nd_opt_pi_flags_reserved & ND_OPT_PI_FLAG_AUTO) ? 1 : 0; pr.ndpr_plen = pi->nd_opt_pi_prefix_len; pr.ndpr_vltime = ntohl(pi->nd_opt_pi_valid_time); pr.ndpr_pltime = ntohl(pi->nd_opt_pi_preferred_time); if (in6_init_prefix_ltimes(&pr)) continue; /* prefix lifetime init failed */ (void)prelist_update(&pr, dr, m); } } /* * MTU */ if (ndopts.nd_opts_mtu && ndopts.nd_opts_mtu->nd_opt_mtu_len == 1) { u_int32_t mtu = ntohl(ndopts.nd_opts_mtu->nd_opt_mtu_mtu); /* lower bound */ if (mtu < IPV6_MMTU) { nd6log((LOG_INFO, "nd6_ra_input: bogus mtu option " "mtu=%d sent from %s, ignoring\n", mtu, ip6_sprintf(&ip6->ip6_src))); goto skip; } /* upper bound */ if (ndi->maxmtu) { if (mtu <= ndi->maxmtu) { int change = (ndi->linkmtu != mtu); ndi->linkmtu = mtu; if (change) /* in6_maxmtu may change */ in6_setmaxmtu(); } else { nd6log((LOG_INFO, "nd6_ra_input: bogus mtu " "mtu=%d sent from %s; " "exceeds maxmtu %d, ignoring\n", mtu, ip6_sprintf(&ip6->ip6_src), ndi->maxmtu)); } } else { nd6log((LOG_INFO, "nd6_ra_input: mtu option " "mtu=%d sent from %s; maxmtu unknown, " "ignoring\n", mtu, ip6_sprintf(&ip6->ip6_src))); } } skip: /* * Source link layer address */ { char *lladdr = NULL; int lladdrlen = 0; if (ndopts.nd_opts_src_lladdr) { lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1); lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3; } if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) { nd6log((LOG_INFO, "nd6_ra_input: lladdrlen mismatch for %s " "(if %d, RA packet %d)\n", ip6_sprintf(&saddr6), ifp->if_addrlen, lladdrlen - 2)); goto bad; } nd6_cache_lladdr(ifp, &saddr6, lladdr, lladdrlen, ND_ROUTER_ADVERT, 0); /* * Installing a link-layer address might change the state of the * router's neighbor cache, which might also affect our on-link * detection of adveritsed prefixes. */ pfxlist_onlink_check(); } freeit: m_freem(m); return; bad: icmp6stat.icp6s_badra++; m_freem(m); } /* * default router list proccessing sub routines */ /* tell the change to user processes watching the routing socket. */ static void nd6_rtmsg(cmd, rt) int cmd; struct rtentry *rt; { struct rt_addrinfo info; bzero((caddr_t)&info, sizeof(info)); info.rti_info[RTAX_DST] = rt_key(rt); info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; info.rti_info[RTAX_NETMASK] = rt_mask(rt); info.rti_info[RTAX_IFP] = (struct sockaddr *)TAILQ_FIRST(&rt->rt_ifp->if_addrlist); info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; rt_missmsg(cmd, &info, rt->rt_flags, 0); } void defrouter_addreq(new) struct nd_defrouter *new; { struct sockaddr_in6 def, mask, gate; struct rtentry *newrt = NULL; - int s; Bzero(&def, sizeof(def)); Bzero(&mask, sizeof(mask)); Bzero(&gate, sizeof(gate)); def.sin6_len = mask.sin6_len = gate.sin6_len = sizeof(struct sockaddr_in6); def.sin6_family = mask.sin6_family = gate.sin6_family = AF_INET6; gate.sin6_addr = new->rtaddr; - s = splnet(); (void)rtrequest(RTM_ADD, (struct sockaddr *)&def, (struct sockaddr *)&gate, (struct sockaddr *)&mask, RTF_GATEWAY, &newrt); if (newrt) { + RT_LOCK(newrt); nd6_rtmsg(RTM_ADD, newrt); /* tell user process */ newrt->rt_refcnt--; + RT_UNLOCK(newrt); } - splx(s); return; } /* Add a route to a given interface as default */ void defrouter_addifreq(ifp) struct ifnet *ifp; { struct sockaddr_in6 def, mask; struct ifaddr *ifa; struct rtentry *newrt = NULL; int error, flags; bzero(&def, sizeof(def)); bzero(&mask, sizeof(mask)); def.sin6_len = mask.sin6_len = sizeof(struct sockaddr_in6); def.sin6_family = mask.sin6_family = AF_INET6; /* * Search for an ifaddr beloging to the specified interface. * XXX: An IPv6 address are required to be assigned on the interface. */ if ((ifa = ifaof_ifpforaddr((struct sockaddr *)&def, ifp)) == NULL) { nd6log((LOG_ERR, /* better error? */ "defrouter_addifreq: failed to find an ifaddr " "to install a route to interface %s\n", if_name(ifp))); return; } flags = ifa->ifa_flags; error = rtrequest(RTM_ADD, (struct sockaddr *)&def, ifa->ifa_addr, (struct sockaddr *)&mask, flags, &newrt); if (error != 0) { nd6log((LOG_ERR, "defrouter_addifreq: failed to install a route to " "interface %s (errno = %d)\n", if_name(ifp), error)); - - if (newrt) /* maybe unnecessary, but do it for safety */ - newrt->rt_refcnt--; } else { if (newrt) { + RT_LOCK(newrt); nd6_rtmsg(RTM_ADD, newrt); newrt->rt_refcnt--; + RT_UNLOCK(newrt); } } } struct nd_defrouter * defrouter_lookup(addr, ifp) struct in6_addr *addr; struct ifnet *ifp; { struct nd_defrouter *dr; for (dr = TAILQ_FIRST(&nd_defrouter); dr; dr = TAILQ_NEXT(dr, dr_entry)) { if (dr->ifp == ifp && IN6_ARE_ADDR_EQUAL(addr, &dr->rtaddr)) return(dr); } return(NULL); /* search failed */ } void defrouter_delreq(dr, dofree) struct nd_defrouter *dr; int dofree; { struct sockaddr_in6 def, mask, gate; struct rtentry *oldrt = NULL; Bzero(&def, sizeof(def)); Bzero(&mask, sizeof(mask)); Bzero(&gate, sizeof(gate)); def.sin6_len = mask.sin6_len = gate.sin6_len = sizeof(struct sockaddr_in6); def.sin6_family = mask.sin6_family = gate.sin6_family = AF_INET6; gate.sin6_addr = dr->rtaddr; rtrequest(RTM_DELETE, (struct sockaddr *)&def, (struct sockaddr *)&gate, (struct sockaddr *)&mask, RTF_GATEWAY, &oldrt); if (oldrt) { nd6_rtmsg(RTM_DELETE, oldrt); RTFREE(oldrt); } if (dofree) /* XXX: necessary? */ free(dr, M_IP6NDP); } void defrtrlist_del(dr) struct nd_defrouter *dr; { struct nd_defrouter *deldr = NULL; struct nd_prefix *pr; /* * Flush all the routing table entries that use the router * as a next hop. */ if (!ip6_forwarding && ip6_accept_rtadv) { /* above is a good condition? */ rt6_flush(&dr->rtaddr, dr->ifp); } if (dr == TAILQ_FIRST(&nd_defrouter)) deldr = dr; /* The router is primary. */ TAILQ_REMOVE(&nd_defrouter, dr, dr_entry); /* * Also delete all the pointers to the router in each prefix lists. */ for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) { struct nd_pfxrouter *pfxrtr; if ((pfxrtr = pfxrtr_lookup(pr, dr)) != NULL) pfxrtr_del(pfxrtr); } pfxlist_onlink_check(); /* * If the router is the primary one, choose a new one. * Note that defrouter_select() will remove the current gateway * from the routing table. */ if (deldr) defrouter_select(); free(dr, M_IP6NDP); } /* * Default Router Selection according to Section 6.3.6 of RFC 2461: * 1) Routers that are reachable or probably reachable should be * preferred. * 2) When no routers on the list are known to be reachable or * probably reachable, routers SHOULD be selected in a round-robin * fashion. * 3) If the Default Router List is empty, assume that all * destinations are on-link. */ void defrouter_select() { int s = splnet(); struct nd_defrouter *dr, anydr; struct rtentry *rt = NULL; struct llinfo_nd6 *ln = NULL; /* * Search for a (probably) reachable router from the list. */ for (dr = TAILQ_FIRST(&nd_defrouter); dr; dr = TAILQ_NEXT(dr, dr_entry)) { if ((rt = nd6_lookup(&dr->rtaddr, 0, dr->ifp)) && (ln = (struct llinfo_nd6 *)rt->rt_llinfo) && ND6_IS_LLINFO_PROBREACH(ln)) { /* Got it, and move it to the head */ TAILQ_REMOVE(&nd_defrouter, dr, dr_entry); TAILQ_INSERT_HEAD(&nd_defrouter, dr, dr_entry); break; } } if ((dr = TAILQ_FIRST(&nd_defrouter))) { /* * De-install the previous default gateway and install * a new one. * Note that if there is no reachable router in the list, * the head entry will be used anyway. * XXX: do we have to check the current routing table entry? */ bzero(&anydr, sizeof(anydr)); defrouter_delreq(&anydr, 0); defrouter_addreq(dr); } else { /* * The Default Router List is empty, so install the default * route to an inteface. * XXX: The specification does not say this mechanism should * be restricted to hosts, but this would be not useful * (even harmful) for routers. */ if (!ip6_forwarding) { /* * De-install the current default route * in advance. */ bzero(&anydr, sizeof(anydr)); defrouter_delreq(&anydr, 0); if (nd6_defifp) { /* * Install a route to the default interface * as default route. * XXX: we enable this for host only, because * this may override a default route installed * a user process (e.g. routing daemon) in a * router case. */ defrouter_addifreq(nd6_defifp); } else { nd6log((LOG_INFO, "defrouter_select: " "there's no default router and no default" " interface\n")); } } } splx(s); return; } static struct nd_defrouter * defrtrlist_update(new) struct nd_defrouter *new; { struct nd_defrouter *dr, *n; int s = splnet(); if ((dr = defrouter_lookup(&new->rtaddr, new->ifp)) != NULL) { /* entry exists */ if (new->rtlifetime == 0) { defrtrlist_del(dr); dr = NULL; } else { /* override */ dr->flags = new->flags; /* xxx flag check */ dr->rtlifetime = new->rtlifetime; dr->expire = new->expire; } splx(s); return(dr); } /* entry does not exist */ if (new->rtlifetime == 0) { splx(s); return(NULL); } n = (struct nd_defrouter *)malloc(sizeof(*n), M_IP6NDP, M_NOWAIT); if (n == NULL) { splx(s); return(NULL); } bzero(n, sizeof(*n)); *n = *new; /* * Insert the new router at the end of the Default Router List. * If there is no other router, install it anyway. Otherwise, * just continue to use the current default router. */ TAILQ_INSERT_TAIL(&nd_defrouter, n, dr_entry); if (TAILQ_FIRST(&nd_defrouter) == n) defrouter_select(); splx(s); return(n); } static struct nd_pfxrouter * pfxrtr_lookup(pr, dr) struct nd_prefix *pr; struct nd_defrouter *dr; { struct nd_pfxrouter *search; for (search = pr->ndpr_advrtrs.lh_first; search; search = search->pfr_next) { if (search->router == dr) break; } return(search); } static void pfxrtr_add(pr, dr) struct nd_prefix *pr; struct nd_defrouter *dr; { struct nd_pfxrouter *new; new = (struct nd_pfxrouter *)malloc(sizeof(*new), M_IP6NDP, M_NOWAIT); if (new == NULL) return; bzero(new, sizeof(*new)); new->router = dr; LIST_INSERT_HEAD(&pr->ndpr_advrtrs, new, pfr_entry); pfxlist_onlink_check(); } static void pfxrtr_del(pfr) struct nd_pfxrouter *pfr; { LIST_REMOVE(pfr, pfr_entry); free(pfr, M_IP6NDP); } struct nd_prefix * nd6_prefix_lookup(pr) struct nd_prefix *pr; { struct nd_prefix *search; for (search = nd_prefix.lh_first; search; search = search->ndpr_next) { if (pr->ndpr_ifp == search->ndpr_ifp && pr->ndpr_plen == search->ndpr_plen && in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr, &search->ndpr_prefix.sin6_addr, pr->ndpr_plen) ) { break; } } return(search); } int nd6_prelist_add(pr, dr, newp) struct nd_prefix *pr, **newp; struct nd_defrouter *dr; { struct nd_prefix *new = NULL; int i, s; new = (struct nd_prefix *)malloc(sizeof(*new), M_IP6NDP, M_NOWAIT); if (new == NULL) return ENOMEM; bzero(new, sizeof(*new)); *new = *pr; if (newp != NULL) *newp = new; /* initilization */ LIST_INIT(&new->ndpr_advrtrs); in6_prefixlen2mask(&new->ndpr_mask, new->ndpr_plen); /* make prefix in the canonical form */ for (i = 0; i < 4; i++) new->ndpr_prefix.sin6_addr.s6_addr32[i] &= new->ndpr_mask.s6_addr32[i]; s = splnet(); /* link ndpr_entry to nd_prefix list */ LIST_INSERT_HEAD(&nd_prefix, new, ndpr_entry); splx(s); /* ND_OPT_PI_FLAG_ONLINK processing */ if (new->ndpr_raf_onlink) { int e; if ((e = nd6_prefix_onlink(new)) != 0) { nd6log((LOG_ERR, "nd6_prelist_add: failed to make " "the prefix %s/%d on-link on %s (errno=%d)\n", ip6_sprintf(&pr->ndpr_prefix.sin6_addr), pr->ndpr_plen, if_name(pr->ndpr_ifp), e)); /* proceed anyway. XXX: is it correct? */ } } if (dr) { pfxrtr_add(new, dr); } return 0; } void prelist_remove(pr) struct nd_prefix *pr; { struct nd_pfxrouter *pfr, *next; int e, s; /* make sure to invalidate the prefix until it is really freed. */ pr->ndpr_vltime = 0; pr->ndpr_pltime = 0; #if 0 /* * Though these flags are now meaningless, we'd rather keep the value * not to confuse users when executing "ndp -p". */ pr->ndpr_raf_onlink = 0; pr->ndpr_raf_auto = 0; #endif if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0 && (e = nd6_prefix_offlink(pr)) != 0) { nd6log((LOG_ERR, "prelist_remove: failed to make %s/%d offlink " "on %s, errno=%d\n", ip6_sprintf(&pr->ndpr_prefix.sin6_addr), pr->ndpr_plen, if_name(pr->ndpr_ifp), e)); /* what should we do? */ } if (pr->ndpr_refcnt > 0) return; /* notice here? */ s = splnet(); /* unlink ndpr_entry from nd_prefix list */ LIST_REMOVE(pr, ndpr_entry); /* free list of routers that adversed the prefix */ for (pfr = pr->ndpr_advrtrs.lh_first; pfr; pfr = next) { next = pfr->pfr_next; free(pfr, M_IP6NDP); } splx(s); free(pr, M_IP6NDP); pfxlist_onlink_check(); } int prelist_update(new, dr, m) struct nd_prefix *new; struct nd_defrouter *dr; /* may be NULL */ struct mbuf *m; { struct in6_ifaddr *ia6 = NULL, *ia6_match = NULL; struct ifaddr *ifa; struct ifnet *ifp = new->ndpr_ifp; struct nd_prefix *pr; int s = splnet(); int error = 0; int newprefix = 0; int auth; struct in6_addrlifetime lt6_tmp; auth = 0; if (m) { /* * Authenticity for NA consists authentication for * both IP header and IP datagrams, doesn't it ? */ #if defined(M_AUTHIPHDR) && defined(M_AUTHIPDGM) auth = (m->m_flags & M_AUTHIPHDR && m->m_flags & M_AUTHIPDGM) ? 1 : 0; #endif } if ((pr = nd6_prefix_lookup(new)) != NULL) { /* * nd6_prefix_lookup() ensures that pr and new have the same * prefix on a same interface. */ /* * Update prefix information. Note that the on-link (L) bit * and the autonomous (A) bit should NOT be changed from 1 * to 0. */ if (new->ndpr_raf_onlink == 1) pr->ndpr_raf_onlink = 1; if (new->ndpr_raf_auto == 1) pr->ndpr_raf_auto = 1; if (new->ndpr_raf_onlink) { pr->ndpr_vltime = new->ndpr_vltime; pr->ndpr_pltime = new->ndpr_pltime; pr->ndpr_preferred = new->ndpr_preferred; pr->ndpr_expire = new->ndpr_expire; } if (new->ndpr_raf_onlink && (pr->ndpr_stateflags & NDPRF_ONLINK) == 0) { int e; if ((e = nd6_prefix_onlink(pr)) != 0) { nd6log((LOG_ERR, "prelist_update: failed to make " "the prefix %s/%d on-link on %s " "(errno=%d)\n", ip6_sprintf(&pr->ndpr_prefix.sin6_addr), pr->ndpr_plen, if_name(pr->ndpr_ifp), e)); /* proceed anyway. XXX: is it correct? */ } } if (dr && pfxrtr_lookup(pr, dr) == NULL) pfxrtr_add(pr, dr); } else { struct nd_prefix *newpr = NULL; newprefix = 1; if (new->ndpr_vltime == 0) goto end; if (new->ndpr_raf_onlink == 0 && new->ndpr_raf_auto == 0) goto end; bzero(&new->ndpr_addr, sizeof(struct in6_addr)); error = nd6_prelist_add(new, dr, &newpr); if (error != 0 || newpr == NULL) { nd6log((LOG_NOTICE, "prelist_update: " "nd6_prelist_add failed for %s/%d on %s " "errno=%d, returnpr=%p\n", ip6_sprintf(&new->ndpr_prefix.sin6_addr), new->ndpr_plen, if_name(new->ndpr_ifp), error, newpr)); goto end; /* we should just give up in this case. */ } /* * XXX: from the ND point of view, we can ignore a prefix * with the on-link bit being zero. However, we need a * prefix structure for references from autoconfigured * addresses. Thus, we explicitly make suret that the prefix * itself expires now. */ if (newpr->ndpr_raf_onlink == 0) { newpr->ndpr_vltime = 0; newpr->ndpr_pltime = 0; in6_init_prefix_ltimes(newpr); } pr = newpr; } /* * Address autoconfiguration based on Section 5.5.3 of RFC 2462. * Note that pr must be non NULL at this point. */ /* 5.5.3 (a). Ignore the prefix without the A bit set. */ if (!new->ndpr_raf_auto) goto afteraddrconf; /* * 5.5.3 (b). the link-local prefix should have been ignored in * nd6_ra_input. */ /* * 5.5.3 (c). Consistency check on lifetimes: pltime <= vltime. * This should have been done in nd6_ra_input. */ /* * 5.5.3 (d). If the prefix advertised does not match the prefix of an * address already in the list, and the Valid Lifetime is not 0, * form an address. Note that even a manually configured address * should reject autoconfiguration of a new address. */ TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { struct in6_ifaddr *ifa6; int ifa_plen; u_int32_t storedlifetime; if (ifa->ifa_addr->sa_family != AF_INET6) continue; ifa6 = (struct in6_ifaddr *)ifa; /* * Spec is not clear here, but I believe we should concentrate * on unicast (i.e. not anycast) addresses. * XXX: other ia6_flags? detached or duplicated? */ if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0) continue; ifa_plen = in6_mask2len(&ifa6->ia_prefixmask.sin6_addr, NULL); if (ifa_plen != new->ndpr_plen || !in6_are_prefix_equal(&ifa6->ia_addr.sin6_addr, &new->ndpr_prefix.sin6_addr, ifa_plen)) continue; if (ia6_match == NULL) /* remember the first one */ ia6_match = ifa6; if ((ifa6->ia6_flags & IN6_IFF_AUTOCONF) == 0) continue; /* * An already autoconfigured address matched. Now that we * are sure there is at least one matched address, we can * proceed to 5.5.3. (e): update the lifetimes according to the * "two hours" rule and the privacy extension. */ #define TWOHOUR (120*60) lt6_tmp = ifa6->ia6_lifetime; if (lt6_tmp.ia6t_vltime == ND6_INFINITE_LIFETIME) storedlifetime = ND6_INFINITE_LIFETIME; else if (IFA6_IS_INVALID(ifa6)) storedlifetime = 0; else storedlifetime = lt6_tmp.ia6t_expire - time_second; /* when not updating, keep the current stored lifetime. */ lt6_tmp.ia6t_vltime = storedlifetime; if (TWOHOUR < new->ndpr_vltime || storedlifetime < new->ndpr_vltime) { lt6_tmp.ia6t_vltime = new->ndpr_vltime; } else if (storedlifetime <= TWOHOUR #if 0 /* * This condition is logically redundant, so we just * omit it. * See IPng 6712, 6717, and 6721. */ && new->ndpr_vltime <= storedlifetime #endif ) { if (auth) { lt6_tmp.ia6t_vltime = new->ndpr_vltime; } } else { /* * new->ndpr_vltime <= TWOHOUR && * TWOHOUR < storedlifetime */ lt6_tmp.ia6t_vltime = TWOHOUR; } /* The 2 hour rule is not imposed for preferred lifetime. */ lt6_tmp.ia6t_pltime = new->ndpr_pltime; in6_init_address_ltimes(pr, <6_tmp); /* * When adjusting the lifetimes of an existing temporary * address, only lower the lifetimes. * RFC 3041 3.3. (1). * XXX: how should we modify ia6t_[pv]ltime? */ if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0) { if (lt6_tmp.ia6t_expire == 0 || /* no expire */ lt6_tmp.ia6t_expire > ifa6->ia6_lifetime.ia6t_expire) { lt6_tmp.ia6t_expire = ifa6->ia6_lifetime.ia6t_expire; } if (lt6_tmp.ia6t_preferred == 0 || /* no expire */ lt6_tmp.ia6t_preferred > ifa6->ia6_lifetime.ia6t_preferred) { lt6_tmp.ia6t_preferred = ifa6->ia6_lifetime.ia6t_preferred; } } ifa6->ia6_lifetime = lt6_tmp; } if (ia6_match == NULL && new->ndpr_vltime) { /* * No address matched and the valid lifetime is non-zero. * Create a new address. */ if ((ia6 = in6_ifadd(new, NULL)) != NULL) { /* * note that we should use pr (not new) for reference. */ pr->ndpr_refcnt++; ia6->ia6_ndpr = pr; #if 0 /* XXXYYY Don't do this, according to Jinmei. */ pr->ndpr_addr = new->ndpr_addr; #endif /* * RFC 3041 3.3 (2). * When a new public address is created as described * in RFC2462, also create a new temporary address. * * RFC 3041 3.5. * When an interface connects to a new link, a new * randomized interface identifier should be generated * immediately together with a new set of temporary * addresses. Thus, we specifiy 1 as the 2nd arg of * in6_tmpifadd(). */ if (ip6_use_tempaddr) { int e; if ((e = in6_tmpifadd(ia6, 1)) != 0) { nd6log((LOG_NOTICE, "prelist_update: " "failed to create a temporary " "address, errno=%d\n", e)); } } /* * A newly added address might affect the status * of other addresses, so we check and update it. * XXX: what if address duplication happens? */ pfxlist_onlink_check(); } else { /* just set an error. do not bark here. */ error = EADDRNOTAVAIL; /* XXX: might be unused. */ } } afteraddrconf: end: splx(s); return error; } /* * A supplement function used in the on-link detection below; * detect if a given prefix has a (probably) reachable advertising router. * XXX: lengthy function name... */ static struct nd_pfxrouter * find_pfxlist_reachable_router(pr) struct nd_prefix *pr; { struct nd_pfxrouter *pfxrtr; struct rtentry *rt; struct llinfo_nd6 *ln; for (pfxrtr = LIST_FIRST(&pr->ndpr_advrtrs); pfxrtr; pfxrtr = LIST_NEXT(pfxrtr, pfr_entry)) { if ((rt = nd6_lookup(&pfxrtr->router->rtaddr, 0, pfxrtr->router->ifp)) && (ln = (struct llinfo_nd6 *)rt->rt_llinfo) && ND6_IS_LLINFO_PROBREACH(ln)) break; /* found */ } return(pfxrtr); } /* * Check if each prefix in the prefix list has at least one available router * that advertised the prefix (a router is "available" if its neighbor cache * entry is reachable or probably reachable). * If the check fails, the prefix may be off-link, because, for example, * we have moved from the network but the lifetime of the prefix has not * expired yet. So we should not use the prefix if there is another prefix * that has an available router. * But, if there is no prefix that has an available router, we still regards * all the prefixes as on-link. This is because we can't tell if all the * routers are simply dead or if we really moved from the network and there * is no router around us. */ void pfxlist_onlink_check() { struct nd_prefix *pr; struct in6_ifaddr *ifa; /* * Check if there is a prefix that has a reachable advertising * router. */ for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) { if (pr->ndpr_raf_onlink && find_pfxlist_reachable_router(pr)) break; } if (pr) { /* * There is at least one prefix that has a reachable router. * Detach prefixes which have no reachable advertising * router, and attach other prefixes. */ for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) { /* XXX: a link-local prefix should never be detached */ if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr)) continue; /* * we aren't interested in prefixes without the L bit * set. */ if (pr->ndpr_raf_onlink == 0) continue; if ((pr->ndpr_stateflags & NDPRF_DETACHED) == 0 && find_pfxlist_reachable_router(pr) == NULL) pr->ndpr_stateflags |= NDPRF_DETACHED; if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0 && find_pfxlist_reachable_router(pr) != 0) pr->ndpr_stateflags &= ~NDPRF_DETACHED; } } else { /* there is no prefix that has a reachable router */ for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) { if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr)) continue; if (pr->ndpr_raf_onlink == 0) continue; if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0) pr->ndpr_stateflags &= ~NDPRF_DETACHED; } } /* * Remove each interface route associated with a (just) detached * prefix, and reinstall the interface route for a (just) attached * prefix. Note that all attempt of reinstallation does not * necessarily success, when a same prefix is shared among multiple * interfaces. Such cases will be handled in nd6_prefix_onlink, * so we don't have to care about them. */ for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) { int e; if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr)) continue; if (pr->ndpr_raf_onlink == 0) continue; if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0 && (pr->ndpr_stateflags & NDPRF_ONLINK) != 0) { if ((e = nd6_prefix_offlink(pr)) != 0) { nd6log((LOG_ERR, "pfxlist_onlink_check: failed to " "make %s/%d offlink, errno=%d\n", ip6_sprintf(&pr->ndpr_prefix.sin6_addr), pr->ndpr_plen, e)); } } if ((pr->ndpr_stateflags & NDPRF_DETACHED) == 0 && (pr->ndpr_stateflags & NDPRF_ONLINK) == 0 && pr->ndpr_raf_onlink) { if ((e = nd6_prefix_onlink(pr)) != 0) { nd6log((LOG_ERR, "pfxlist_onlink_check: failed to " "make %s/%d offlink, errno=%d\n", ip6_sprintf(&pr->ndpr_prefix.sin6_addr), pr->ndpr_plen, e)); } } } /* * Changes on the prefix status might affect address status as well. * Make sure that all addresses derived from an attached prefix are * attached, and that all addresses derived from a detached prefix are * detached. Note, however, that a manually configured address should * always be attached. * The precise detection logic is same as the one for prefixes. */ for (ifa = in6_ifaddr; ifa; ifa = ifa->ia_next) { if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0) continue; if (ifa->ia6_ndpr == NULL) { /* * This can happen when we first configure the address * (i.e. the address exists, but the prefix does not). * XXX: complicated relationships... */ continue; } if (find_pfxlist_reachable_router(ifa->ia6_ndpr)) break; } if (ifa) { for (ifa = in6_ifaddr; ifa; ifa = ifa->ia_next) { if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0) continue; if (ifa->ia6_ndpr == NULL) /* XXX: see above. */ continue; if (find_pfxlist_reachable_router(ifa->ia6_ndpr)) ifa->ia6_flags &= ~IN6_IFF_DETACHED; else ifa->ia6_flags |= IN6_IFF_DETACHED; } } else { for (ifa = in6_ifaddr; ifa; ifa = ifa->ia_next) { if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0) continue; ifa->ia6_flags &= ~IN6_IFF_DETACHED; } } } int nd6_prefix_onlink(pr) struct nd_prefix *pr; { struct ifaddr *ifa; struct ifnet *ifp = pr->ndpr_ifp; struct sockaddr_in6 mask6; struct nd_prefix *opr; u_long rtflags; int error = 0; struct rtentry *rt = NULL; /* sanity check */ if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0) { nd6log((LOG_ERR, "nd6_prefix_onlink: %s/%d is already on-link\n", ip6_sprintf(&pr->ndpr_prefix.sin6_addr), pr->ndpr_plen); return(EEXIST)); } /* * Add the interface route associated with the prefix. Before * installing the route, check if there's the same prefix on another * interface, and the prefix has already installed the interface route. * Although such a configuration is expected to be rare, we explicitly * allow it. */ for (opr = nd_prefix.lh_first; opr; opr = opr->ndpr_next) { if (opr == pr) continue; if ((opr->ndpr_stateflags & NDPRF_ONLINK) == 0) continue; if (opr->ndpr_plen == pr->ndpr_plen && in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr, &opr->ndpr_prefix.sin6_addr, pr->ndpr_plen)) return(0); } /* * We prefer link-local addresses as the associated interface address. */ /* search for a link-local addr */ ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY| IN6_IFF_ANYCAST); if (ifa == NULL) { /* XXX: freebsd does not have ifa_ifwithaf */ TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { if (ifa->ifa_addr->sa_family == AF_INET6) break; } /* should we care about ia6_flags? */ } if (ifa == NULL) { /* * This can still happen, when, for example, we receive an RA * containing a prefix with the L bit set and the A bit clear, * after removing all IPv6 addresses on the receiving * interface. This should, of course, be rare though. */ nd6log((LOG_NOTICE, "nd6_prefix_onlink: failed to find any ifaddr" " to add route for a prefix(%s/%d) on %s\n", ip6_sprintf(&pr->ndpr_prefix.sin6_addr), pr->ndpr_plen, if_name(ifp))); return(0); } /* * in6_ifinit() sets nd6_rtrequest to ifa_rtrequest for all ifaddrs. * ifa->ifa_rtrequest = nd6_rtrequest; */ bzero(&mask6, sizeof(mask6)); mask6.sin6_len = sizeof(mask6); mask6.sin6_addr = pr->ndpr_mask; rtflags = ifa->ifa_flags | RTF_CLONING | RTF_UP; if (nd6_need_cache(ifp)) { /* explicitly set in case ifa_flags does not set the flag. */ rtflags |= RTF_CLONING; } else { /* * explicitly clear the cloning bit in case ifa_flags sets it. */ rtflags &= ~RTF_CLONING; } error = rtrequest(RTM_ADD, (struct sockaddr *)&pr->ndpr_prefix, ifa->ifa_addr, (struct sockaddr *)&mask6, rtflags, &rt); if (error == 0) { if (rt != NULL) /* this should be non NULL, though */ nd6_rtmsg(RTM_ADD, rt); pr->ndpr_stateflags |= NDPRF_ONLINK; } else { nd6log((LOG_ERR, "nd6_prefix_onlink: failed to add route for a" " prefix (%s/%d) on %s, gw=%s, mask=%s, flags=%lx " "errno = %d\n", ip6_sprintf(&pr->ndpr_prefix.sin6_addr), pr->ndpr_plen, if_name(ifp), ip6_sprintf(&((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr), ip6_sprintf(&mask6.sin6_addr), rtflags, error)); } - if (rt != NULL) + if (rt != NULL) { + RT_LOCK(rt); rt->rt_refcnt--; + RT_UNLOCK(rt); + } return(error); } int nd6_prefix_offlink(pr) struct nd_prefix *pr; { int error = 0; struct ifnet *ifp = pr->ndpr_ifp; struct nd_prefix *opr; struct sockaddr_in6 sa6, mask6; struct rtentry *rt = NULL; /* sanity check */ if ((pr->ndpr_stateflags & NDPRF_ONLINK) == 0) { nd6log((LOG_ERR, "nd6_prefix_offlink: %s/%d is already off-link\n", ip6_sprintf(&pr->ndpr_prefix.sin6_addr), pr->ndpr_plen)); return(EEXIST); } bzero(&sa6, sizeof(sa6)); sa6.sin6_family = AF_INET6; sa6.sin6_len = sizeof(sa6); bcopy(&pr->ndpr_prefix.sin6_addr, &sa6.sin6_addr, sizeof(struct in6_addr)); bzero(&mask6, sizeof(mask6)); mask6.sin6_family = AF_INET6; mask6.sin6_len = sizeof(sa6); bcopy(&pr->ndpr_mask, &mask6.sin6_addr, sizeof(struct in6_addr)); error = rtrequest(RTM_DELETE, (struct sockaddr *)&sa6, NULL, (struct sockaddr *)&mask6, 0, &rt); if (error == 0) { pr->ndpr_stateflags &= ~NDPRF_ONLINK; /* report the route deletion to the routing socket. */ if (rt != NULL) nd6_rtmsg(RTM_DELETE, rt); /* * There might be the same prefix on another interface, * the prefix which could not be on-link just because we have * the interface route (see comments in nd6_prefix_onlink). * If there's one, try to make the prefix on-link on the * interface. */ for (opr = nd_prefix.lh_first; opr; opr = opr->ndpr_next) { if (opr == pr) continue; if ((opr->ndpr_stateflags & NDPRF_ONLINK) != 0) continue; /* * KAME specific: detached prefixes should not be * on-link. */ if ((opr->ndpr_stateflags & NDPRF_DETACHED) != 0) continue; if (opr->ndpr_plen == pr->ndpr_plen && in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr, &opr->ndpr_prefix.sin6_addr, pr->ndpr_plen)) { int e; if ((e = nd6_prefix_onlink(opr)) != 0) { nd6log((LOG_ERR, "nd6_prefix_offlink: failed to " "recover a prefix %s/%d from %s " "to %s (errno = %d)\n", ip6_sprintf(&opr->ndpr_prefix.sin6_addr), opr->ndpr_plen, if_name(ifp), if_name(opr->ndpr_ifp), e)); } } } } else { /* XXX: can we still set the NDPRF_ONLINK flag? */ nd6log((LOG_ERR, "nd6_prefix_offlink: failed to delete route: " "%s/%d on %s (errno = %d)\n", ip6_sprintf(&sa6.sin6_addr), pr->ndpr_plen, if_name(ifp), error)); } if (rt != NULL) RTFREE(rt); return(error); } static struct in6_ifaddr * in6_ifadd(pr, ifid) struct nd_prefix *pr; struct in6_addr *ifid; /* Mobile IPv6 addition */ { struct ifnet *ifp = pr->ndpr_ifp; struct ifaddr *ifa; struct in6_aliasreq ifra; struct in6_ifaddr *ia, *ib; int error, plen0; struct in6_addr mask; int prefixlen = pr->ndpr_plen; in6_len2mask(&mask, prefixlen); /* * find a link-local address (will be interface ID). * Is it really mandatory? Theoretically, a global or a site-local * address can be configured without a link-local address, if we * have a unique interface identifier... * * it is not mandatory to have a link-local address, we can generate * interface identifier on the fly. we do this because: * (1) it should be the easiest way to find interface identifier. * (2) RFC2462 5.4 suggesting the use of the same interface identifier * for multiple addresses on a single interface, and possible shortcut * of DAD. we omitted DAD for this reason in the past. * (3) a user can prevent autoconfiguration of global address * by removing link-local address by hand (this is partly because we * don't have other way to control the use of IPv6 on an interface. * this has been our design choice - cf. NRL's "ifconfig auto"). * (4) it is easier to manage when an interface has addresses * with the same interface identifier, than to have multiple addresses * with different interface identifiers. * * Mobile IPv6 addition: allow for caller to specify a wished interface * ID. This is to not break connections when moving addresses between * interfaces. */ ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, 0);/* 0 is OK? */ if (ifa) ib = (struct in6_ifaddr *)ifa; else return NULL; #if 0 /* don't care link local addr state, and always do DAD */ /* if link-local address is not eligible, do not autoconfigure. */ if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_NOTREADY) { printf("in6_ifadd: link-local address not ready\n"); return NULL; } #endif /* prefixlen + ifidlen must be equal to 128 */ plen0 = in6_mask2len(&ib->ia_prefixmask.sin6_addr, NULL); if (prefixlen != plen0) { nd6log((LOG_INFO, "in6_ifadd: wrong prefixlen for %s " "(prefix=%d ifid=%d)\n", if_name(ifp), prefixlen, 128 - plen0)); return NULL; } /* make ifaddr */ bzero(&ifra, sizeof(ifra)); /* * in6_update_ifa() does not use ifra_name, but we accurately set it * for safety. */ strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name)); ifra.ifra_addr.sin6_family = AF_INET6; ifra.ifra_addr.sin6_len = sizeof(struct sockaddr_in6); /* prefix */ bcopy(&pr->ndpr_prefix.sin6_addr, &ifra.ifra_addr.sin6_addr, sizeof(ifra.ifra_addr.sin6_addr)); ifra.ifra_addr.sin6_addr.s6_addr32[0] &= mask.s6_addr32[0]; ifra.ifra_addr.sin6_addr.s6_addr32[1] &= mask.s6_addr32[1]; ifra.ifra_addr.sin6_addr.s6_addr32[2] &= mask.s6_addr32[2]; ifra.ifra_addr.sin6_addr.s6_addr32[3] &= mask.s6_addr32[3]; /* interface ID */ if (ifid == NULL || IN6_IS_ADDR_UNSPECIFIED(ifid)) ifid = &ib->ia_addr.sin6_addr; ifra.ifra_addr.sin6_addr.s6_addr32[0] |= (ifid->s6_addr32[0] & ~mask.s6_addr32[0]); ifra.ifra_addr.sin6_addr.s6_addr32[1] |= (ifid->s6_addr32[1] & ~mask.s6_addr32[1]); ifra.ifra_addr.sin6_addr.s6_addr32[2] |= (ifid->s6_addr32[2] & ~mask.s6_addr32[2]); ifra.ifra_addr.sin6_addr.s6_addr32[3] |= (ifid->s6_addr32[3] & ~mask.s6_addr32[3]); /* new prefix mask. */ ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6); ifra.ifra_prefixmask.sin6_family = AF_INET6; bcopy(&mask, &ifra.ifra_prefixmask.sin6_addr, sizeof(ifra.ifra_prefixmask.sin6_addr)); /* * lifetime. * XXX: in6_init_address_ltimes would override these values later. * We should reconsider this logic. */ ifra.ifra_lifetime.ia6t_vltime = pr->ndpr_vltime; ifra.ifra_lifetime.ia6t_pltime = pr->ndpr_pltime; /* XXX: scope zone ID? */ ifra.ifra_flags |= IN6_IFF_AUTOCONF; /* obey autoconf */ /* * temporarily set the nopfx flag to avoid conflict. * XXX: we should reconsider the entire mechanism about prefix * manipulation. */ ifra.ifra_flags |= IN6_IFF_NOPFX; /* * keep the new address, regardless of the result of in6_update_ifa. * XXX: this address is now meaningless. * We should reconsider its role. */ pr->ndpr_addr = ifra.ifra_addr.sin6_addr; /* allocate ifaddr structure, link into chain, etc. */ if ((error = in6_update_ifa(ifp, &ifra, NULL)) != 0) { nd6log((LOG_ERR, "in6_ifadd: failed to make ifaddr %s on %s (errno=%d)\n", ip6_sprintf(&ifra.ifra_addr.sin6_addr), if_name(ifp), error)); return(NULL); /* ifaddr must not have been allocated. */ } ia = in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr); return(ia); /* this must NOT be NULL. */ } int in6_tmpifadd(ia0, forcegen) const struct in6_ifaddr *ia0; /* corresponding public address */ int forcegen; { struct ifnet *ifp = ia0->ia_ifa.ifa_ifp; struct in6_ifaddr *newia; struct in6_aliasreq ifra; int i, error; int trylimit = 3; /* XXX: adhoc value */ u_int32_t randid[2]; time_t vltime0, pltime0; bzero(&ifra, sizeof(ifra)); strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name)); ifra.ifra_addr = ia0->ia_addr; /* copy prefix mask */ ifra.ifra_prefixmask = ia0->ia_prefixmask; /* clear the old IFID */ for (i = 0; i < 4; i++) { ifra.ifra_addr.sin6_addr.s6_addr32[i] &= ifra.ifra_prefixmask.sin6_addr.s6_addr32[i]; } again: in6_get_tmpifid(ifp, (u_int8_t *)randid, (const u_int8_t *)&ia0->ia_addr.sin6_addr.s6_addr[8], forcegen); ifra.ifra_addr.sin6_addr.s6_addr32[2] |= (randid[0] & ~(ifra.ifra_prefixmask.sin6_addr.s6_addr32[2])); ifra.ifra_addr.sin6_addr.s6_addr32[3] |= (randid[1] & ~(ifra.ifra_prefixmask.sin6_addr.s6_addr32[3])); /* * If by chance the new temporary address is the same as an address * already assigned to the interface, generate a new randomized * interface identifier and repeat this step. * RFC 3041 3.3 (4). */ if (in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr) != NULL) { if (trylimit-- == 0) { nd6log((LOG_NOTICE, "in6_tmpifadd: failed to find " "a unique random IFID\n")); return(EEXIST); } forcegen = 1; goto again; } /* * The Valid Lifetime is the lower of the Valid Lifetime of the * public address or TEMP_VALID_LIFETIME. * The Preferred Lifetime is the lower of the Preferred Lifetime * of the public address or TEMP_PREFERRED_LIFETIME - * DESYNC_FACTOR. */ if (ia0->ia6_lifetime.ia6t_expire != 0) { vltime0 = IFA6_IS_INVALID(ia0) ? 0 : (ia0->ia6_lifetime.ia6t_expire - time_second); if (vltime0 > ip6_temp_valid_lifetime) vltime0 = ip6_temp_valid_lifetime; } else vltime0 = ip6_temp_valid_lifetime; if (ia0->ia6_lifetime.ia6t_preferred != 0) { pltime0 = IFA6_IS_DEPRECATED(ia0) ? 0 : (ia0->ia6_lifetime.ia6t_preferred - time_second); if (pltime0 > ip6_temp_preferred_lifetime - ip6_desync_factor){ pltime0 = ip6_temp_preferred_lifetime - ip6_desync_factor; } } else pltime0 = ip6_temp_preferred_lifetime - ip6_desync_factor; ifra.ifra_lifetime.ia6t_vltime = vltime0; ifra.ifra_lifetime.ia6t_pltime = pltime0; /* * A temporary address is created only if this calculated Preferred * Lifetime is greater than REGEN_ADVANCE time units. */ if (ifra.ifra_lifetime.ia6t_pltime <= ip6_temp_regen_advance) return(0); /* XXX: scope zone ID? */ ifra.ifra_flags |= (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY); /* allocate ifaddr structure, link into chain, etc. */ if ((error = in6_update_ifa(ifp, &ifra, NULL)) != 0) return(error); newia = in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr); if (newia == NULL) { /* XXX: can it happen? */ nd6log((LOG_ERR, "in6_tmpifadd: ifa update succeeded, but we got " "no ifaddr\n")); return(EINVAL); /* XXX */ } newia->ia6_ndpr = ia0->ia6_ndpr; newia->ia6_ndpr->ndpr_refcnt++; /* * A newly added address might affect the status of other addresses. * XXX: when the temporary address is generated with a new public * address, the onlink check is redundant. However, it would be safe * to do the check explicitly everywhere a new address is generated, * and, in fact, we surely need the check when we create a new * temporary address due to deprecation of an old temporary address. */ pfxlist_onlink_check(); return(0); } int in6_init_prefix_ltimes(struct nd_prefix *ndpr) { /* check if preferred lifetime > valid lifetime. RFC2462 5.5.3 (c) */ if (ndpr->ndpr_pltime > ndpr->ndpr_vltime) { nd6log((LOG_INFO, "in6_init_prefix_ltimes: preferred lifetime" "(%d) is greater than valid lifetime(%d)\n", (u_int)ndpr->ndpr_pltime, (u_int)ndpr->ndpr_vltime)); return (EINVAL); } if (ndpr->ndpr_pltime == ND6_INFINITE_LIFETIME) ndpr->ndpr_preferred = 0; else ndpr->ndpr_preferred = time_second + ndpr->ndpr_pltime; if (ndpr->ndpr_vltime == ND6_INFINITE_LIFETIME) ndpr->ndpr_expire = 0; else ndpr->ndpr_expire = time_second + ndpr->ndpr_vltime; return 0; } static void in6_init_address_ltimes(struct nd_prefix *new, struct in6_addrlifetime *lt6) { /* init ia6t_expire */ if (lt6->ia6t_vltime == ND6_INFINITE_LIFETIME) lt6->ia6t_expire = 0; else { lt6->ia6t_expire = time_second; lt6->ia6t_expire += lt6->ia6t_vltime; } /* init ia6t_preferred */ if (lt6->ia6t_pltime == ND6_INFINITE_LIFETIME) lt6->ia6t_preferred = 0; else { lt6->ia6t_preferred = time_second; lt6->ia6t_preferred += lt6->ia6t_pltime; } } /* * Delete all the routing table entries that use the specified gateway. * XXX: this function causes search through all entries of routing table, so * it shouldn't be called when acting as a router. */ void rt6_flush(gateway, ifp) struct in6_addr *gateway; struct ifnet *ifp; { struct radix_node_head *rnh = rt_tables[AF_INET6]; int s = splnet(); /* We'll care only link-local addresses */ if (!IN6_IS_ADDR_LINKLOCAL(gateway)) { splx(s); return; } /* XXX: hack for KAME's link-local address kludge */ gateway->s6_addr16[1] = htons(ifp->if_index); RADIX_NODE_HEAD_LOCK(rnh); rnh->rnh_walktree(rnh, rt6_deleteroute, (void *)gateway); RADIX_NODE_HEAD_UNLOCK(rnh); splx(s); } static int rt6_deleteroute(rn, arg) struct radix_node *rn; void *arg; { #define SIN6(s) ((struct sockaddr_in6 *)s) struct rtentry *rt = (struct rtentry *)rn; struct in6_addr *gate = (struct in6_addr *)arg; if (rt->rt_gateway == NULL || rt->rt_gateway->sa_family != AF_INET6) return(0); if (!IN6_ARE_ADDR_EQUAL(gate, &SIN6(rt->rt_gateway)->sin6_addr)) return(0); /* * Do not delete a static route. * XXX: this seems to be a bit ad-hoc. Should we consider the * 'cloned' bit instead? */ if ((rt->rt_flags & RTF_STATIC) != 0) return(0); /* * We delete only host route. This means, in particular, we don't * delete default route. */ if ((rt->rt_flags & RTF_HOST) == 0) return(0); return(rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0)); #undef SIN6 } int nd6_setdefaultiface(ifindex) int ifindex; { int error = 0; if (ifindex < 0 || if_index < ifindex) return(EINVAL); if (nd6_defifindex != ifindex) { nd6_defifindex = ifindex; if (nd6_defifindex > 0) nd6_defifp = ifnet_byindex(nd6_defifindex); else nd6_defifp = NULL; /* * If the Default Router List is empty, install a route * to the specified interface as default or remove the default * route when the default interface becomes canceled. * The check for the queue is actually redundant, but * we do this here to avoid re-install the default route * if the list is NOT empty. */ if (TAILQ_FIRST(&nd_defrouter) == NULL) defrouter_select(); /* * Our current implementation assumes one-to-one maping between * interfaces and links, so it would be natural to use the * default interface as the default link. */ scope6_setdefault(nd6_defifp); } return(error); }