Index: head/sys/netinet6/nd6.c =================================================================== --- head/sys/netinet6/nd6.c (revision 296062) +++ head/sys/netinet6/nd6.c (revision 296063) @@ -1,2632 +1,2670 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: nd6.c,v 1.144 2001/05/24 07:44:00 itojun Exp $ */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define ND6_SLOWTIMER_INTERVAL (60 * 60) /* 1 hour */ #define ND6_RECALC_REACHTM_INTERVAL (60 * 120) /* 2 hours */ #define SIN6(s) ((const struct sockaddr_in6 *)(s)) /* timer values */ VNET_DEFINE(int, nd6_prune) = 1; /* walk list every 1 seconds */ VNET_DEFINE(int, nd6_delay) = 5; /* delay first probe time 5 second */ VNET_DEFINE(int, nd6_umaxtries) = 3; /* maximum unicast query */ VNET_DEFINE(int, nd6_mmaxtries) = 3; /* maximum multicast query */ VNET_DEFINE(int, nd6_useloopback) = 1; /* use loopback interface for * local traffic */ VNET_DEFINE(int, nd6_gctimer) = (60 * 60 * 24); /* 1 day: garbage * collection timer */ /* preventing too many loops in ND option parsing */ static VNET_DEFINE(int, nd6_maxndopt) = 10; /* max # of ND options allowed */ VNET_DEFINE(int, nd6_maxnudhint) = 0; /* max # of subsequent upper * layer hints */ static VNET_DEFINE(int, nd6_maxqueuelen) = 1; /* max pkts cached in unresolved * ND entries */ #define V_nd6_maxndopt VNET(nd6_maxndopt) #define V_nd6_maxqueuelen VNET(nd6_maxqueuelen) #ifdef ND6_DEBUG VNET_DEFINE(int, nd6_debug) = 1; #else VNET_DEFINE(int, nd6_debug) = 0; #endif static eventhandler_tag lle_event_eh, iflladdr_event_eh; VNET_DEFINE(struct nd_drhead, nd_defrouter); VNET_DEFINE(struct nd_prhead, nd_prefix); +VNET_DEFINE(struct rwlock, nd6_lock); VNET_DEFINE(int, nd6_recalc_reachtm_interval) = ND6_RECALC_REACHTM_INTERVAL; #define V_nd6_recalc_reachtm_interval VNET(nd6_recalc_reachtm_interval) int (*send_sendso_input_hook)(struct mbuf *, struct ifnet *, int, int); static int nd6_is_new_addr_neighbor(const struct sockaddr_in6 *, struct ifnet *); static void nd6_setmtu0(struct ifnet *, struct nd_ifinfo *); static void nd6_slowtimo(void *); static int regen_tmpaddr(struct in6_ifaddr *); static void nd6_free(struct llentry *, int); static void nd6_free_redirect(const struct llentry *); static void nd6_llinfo_timer(void *); static void nd6_llinfo_settimer_locked(struct llentry *, long); static void clear_llinfo_pqueue(struct llentry *); static void nd6_rtrequest(int, struct rtentry *, struct rt_addrinfo *); static int nd6_resolve_slow(struct ifnet *, int, struct mbuf *, const struct sockaddr_in6 *, u_char *, uint32_t *); static int nd6_need_cache(struct ifnet *); static VNET_DEFINE(struct callout, nd6_slowtimo_ch); #define V_nd6_slowtimo_ch VNET(nd6_slowtimo_ch) VNET_DEFINE(struct callout, nd6_timer_ch); static void nd6_lle_event(void *arg __unused, struct llentry *lle, int evt) { struct rt_addrinfo rtinfo; struct sockaddr_in6 dst; struct sockaddr_dl gw; struct ifnet *ifp; int type; LLE_WLOCK_ASSERT(lle); if (lltable_get_af(lle->lle_tbl) != AF_INET6) return; switch (evt) { case LLENTRY_RESOLVED: type = RTM_ADD; KASSERT(lle->la_flags & LLE_VALID, ("%s: %p resolved but not valid?", __func__, lle)); break; case LLENTRY_EXPIRED: type = RTM_DELETE; break; default: return; } ifp = lltable_get_ifp(lle->lle_tbl); bzero(&dst, sizeof(dst)); bzero(&gw, sizeof(gw)); bzero(&rtinfo, sizeof(rtinfo)); lltable_fill_sa_entry(lle, (struct sockaddr *)&dst); dst.sin6_scope_id = in6_getscopezone(ifp, in6_addrscope(&dst.sin6_addr)); gw.sdl_len = sizeof(struct sockaddr_dl); gw.sdl_family = AF_LINK; gw.sdl_alen = ifp->if_addrlen; gw.sdl_index = ifp->if_index; gw.sdl_type = ifp->if_type; if (evt == LLENTRY_RESOLVED) bcopy(lle->ll_addr, gw.sdl_data, ifp->if_addrlen); rtinfo.rti_info[RTAX_DST] = (struct sockaddr *)&dst; rtinfo.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&gw; rtinfo.rti_addrs = RTA_DST | RTA_GATEWAY; rt_missmsg_fib(type, &rtinfo, RTF_HOST | RTF_LLDATA | ( type == RTM_ADD ? RTF_UP: 0), 0, RT_DEFAULT_FIB); } /* * A handler for interface link layer address change event. */ static void nd6_iflladdr(void *arg __unused, struct ifnet *ifp) { lltable_update_ifaddr(LLTABLE6(ifp)); } void nd6_init(void) { + rw_init(&V_nd6_lock, "nd6"); + LIST_INIT(&V_nd_prefix); /* initialization of the default router list */ TAILQ_INIT(&V_nd_defrouter); /* start timer */ callout_init(&V_nd6_slowtimo_ch, 0); callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz, nd6_slowtimo, curvnet); nd6_dad_init(); if (IS_DEFAULT_VNET(curvnet)) { lle_event_eh = EVENTHANDLER_REGISTER(lle_event, nd6_lle_event, NULL, EVENTHANDLER_PRI_ANY); iflladdr_event_eh = EVENTHANDLER_REGISTER(iflladdr_event, nd6_iflladdr, NULL, EVENTHANDLER_PRI_ANY); } } #ifdef VIMAGE void nd6_destroy() { callout_drain(&V_nd6_slowtimo_ch); callout_drain(&V_nd6_timer_ch); if (IS_DEFAULT_VNET(curvnet)) { EVENTHANDLER_DEREGISTER(lle_event, lle_event_eh); EVENTHANDLER_DEREGISTER(iflladdr_event, iflladdr_event_eh); } + rw_destroy(&V_nd6_lock); } #endif struct nd_ifinfo * nd6_ifattach(struct ifnet *ifp) { struct nd_ifinfo *nd; nd = malloc(sizeof(*nd), M_IP6NDP, M_WAITOK | M_ZERO); nd->initialized = 1; nd->chlim = IPV6_DEFHLIM; nd->basereachable = REACHABLE_TIME; nd->reachable = ND_COMPUTE_RTIME(nd->basereachable); nd->retrans = RETRANS_TIMER; nd->flags = ND6_IFF_PERFORMNUD; /* A loopback interface always has ND6_IFF_AUTO_LINKLOCAL. * XXXHRS: Clear ND6_IFF_AUTO_LINKLOCAL on an IFT_BRIDGE interface by * default regardless of the V_ip6_auto_linklocal configuration to * give a reasonable default behavior. */ if ((V_ip6_auto_linklocal && ifp->if_type != IFT_BRIDGE) || (ifp->if_flags & IFF_LOOPBACK)) nd->flags |= ND6_IFF_AUTO_LINKLOCAL; /* * A loopback interface does not need to accept RTADV. * XXXHRS: Clear ND6_IFF_ACCEPT_RTADV on an IFT_BRIDGE interface by * default regardless of the V_ip6_accept_rtadv configuration to * prevent the interface from accepting RA messages arrived * on one of the member interfaces with ND6_IFF_ACCEPT_RTADV. */ if (V_ip6_accept_rtadv && !(ifp->if_flags & IFF_LOOPBACK) && (ifp->if_type != IFT_BRIDGE)) nd->flags |= ND6_IFF_ACCEPT_RTADV; if (V_ip6_no_radr && !(ifp->if_flags & IFF_LOOPBACK)) nd->flags |= ND6_IFF_NO_RADR; /* XXX: we cannot call nd6_setmtu since ifp is not fully initialized */ nd6_setmtu0(ifp, nd); return nd; } void nd6_ifdetach(struct nd_ifinfo *nd) { free(nd, M_IP6NDP); } /* * Reset ND level link MTU. This function is called when the physical MTU * changes, which means we might have to adjust the ND level MTU. */ void nd6_setmtu(struct ifnet *ifp) { if (ifp->if_afdata[AF_INET6] == NULL) return; nd6_setmtu0(ifp, ND_IFINFO(ifp)); } /* XXX todo: do not maintain copy of ifp->if_mtu in ndi->maxmtu */ void nd6_setmtu0(struct ifnet *ifp, struct nd_ifinfo *ndi) { u_int32_t omaxmtu; omaxmtu = ndi->maxmtu; switch (ifp->if_type) { case IFT_ARCNET: ndi->maxmtu = MIN(ARC_PHDS_MAXMTU, ifp->if_mtu); /* RFC2497 */ break; case IFT_FDDI: ndi->maxmtu = MIN(FDDIIPMTU, ifp->if_mtu); /* RFC2467 */ break; case IFT_ISO88025: ndi->maxmtu = MIN(ISO88025_MAX_MTU, ifp->if_mtu); break; default: ndi->maxmtu = ifp->if_mtu; break; } /* * Decreasing the interface MTU under IPV6 minimum MTU may cause * undesirable situation. We thus notify the operator of the change * explicitly. The check for omaxmtu is necessary to restrict the * log to the case of changing the MTU, not initializing it. */ if (omaxmtu >= IPV6_MMTU && ndi->maxmtu < IPV6_MMTU) { log(LOG_NOTICE, "nd6_setmtu0: " "new link MTU on %s (%lu) is too small for IPv6\n", if_name(ifp), (unsigned long)ndi->maxmtu); } if (ndi->maxmtu > V_in6_maxmtu) in6_setmaxmtu(); /* check all interfaces just in case */ } void nd6_option_init(void *opt, int icmp6len, union nd_opts *ndopts) { bzero(ndopts, sizeof(*ndopts)); ndopts->nd_opts_search = (struct nd_opt_hdr *)opt; ndopts->nd_opts_last = (struct nd_opt_hdr *)(((u_char *)opt) + icmp6len); if (icmp6len == 0) { ndopts->nd_opts_done = 1; ndopts->nd_opts_search = NULL; } } /* * Take one ND option. */ struct nd_opt_hdr * nd6_option(union nd_opts *ndopts) { struct nd_opt_hdr *nd_opt; int olen; KASSERT(ndopts != NULL, ("%s: ndopts == NULL", __func__)); KASSERT(ndopts->nd_opts_last != NULL, ("%s: uninitialized ndopts", __func__)); if (ndopts->nd_opts_search == NULL) return NULL; if (ndopts->nd_opts_done) return NULL; nd_opt = ndopts->nd_opts_search; /* make sure nd_opt_len is inside the buffer */ if ((caddr_t)&nd_opt->nd_opt_len >= (caddr_t)ndopts->nd_opts_last) { bzero(ndopts, sizeof(*ndopts)); return NULL; } olen = nd_opt->nd_opt_len << 3; if (olen == 0) { /* * Message validation requires that all included * options have a length that is greater than zero. */ bzero(ndopts, sizeof(*ndopts)); return NULL; } ndopts->nd_opts_search = (struct nd_opt_hdr *)((caddr_t)nd_opt + olen); if (ndopts->nd_opts_search > ndopts->nd_opts_last) { /* option overruns the end of buffer, invalid */ bzero(ndopts, sizeof(*ndopts)); return NULL; } else if (ndopts->nd_opts_search == ndopts->nd_opts_last) { /* reached the end of options chain */ ndopts->nd_opts_done = 1; ndopts->nd_opts_search = NULL; } return nd_opt; } /* * Parse multiple ND options. * This function is much easier to use, for ND routines that do not need * multiple options of the same type. */ int nd6_options(union nd_opts *ndopts) { struct nd_opt_hdr *nd_opt; int i = 0; KASSERT(ndopts != NULL, ("%s: ndopts == NULL", __func__)); KASSERT(ndopts->nd_opts_last != NULL, ("%s: uninitialized ndopts", __func__)); if (ndopts->nd_opts_search == NULL) return 0; while (1) { nd_opt = nd6_option(ndopts); if (nd_opt == NULL && ndopts->nd_opts_last == NULL) { /* * Message validation requires that all included * options have a length that is greater than zero. */ ICMP6STAT_INC(icp6s_nd_badopt); bzero(ndopts, sizeof(*ndopts)); return -1; } if (nd_opt == NULL) goto skip1; switch (nd_opt->nd_opt_type) { case ND_OPT_SOURCE_LINKADDR: case ND_OPT_TARGET_LINKADDR: case ND_OPT_MTU: case ND_OPT_REDIRECTED_HEADER: case ND_OPT_NONCE: if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) { nd6log((LOG_INFO, "duplicated ND6 option found (type=%d)\n", nd_opt->nd_opt_type)); /* XXX bark? */ } else { ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt; } break; case ND_OPT_PREFIX_INFORMATION: if (ndopts->nd_opt_array[nd_opt->nd_opt_type] == 0) { ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt; } ndopts->nd_opts_pi_end = (struct nd_opt_prefix_info *)nd_opt; break; /* What about ND_OPT_ROUTE_INFO? RFC 4191 */ case ND_OPT_RDNSS: /* RFC 6106 */ case ND_OPT_DNSSL: /* RFC 6106 */ /* * Silently ignore options we know and do not care about * in the kernel. */ break; default: /* * Unknown options must be silently ignored, * to accomodate future extension to the protocol. */ nd6log((LOG_DEBUG, "nd6_options: unsupported option %d - " "option ignored\n", nd_opt->nd_opt_type)); } skip1: i++; if (i > V_nd6_maxndopt) { ICMP6STAT_INC(icp6s_nd_toomanyopt); nd6log((LOG_INFO, "too many loop in nd opt\n")); break; } if (ndopts->nd_opts_done) break; } return 0; } /* * ND6 timer routine to handle ND6 entries */ static void nd6_llinfo_settimer_locked(struct llentry *ln, long tick) { int canceled; LLE_WLOCK_ASSERT(ln); if (tick < 0) { ln->la_expire = 0; ln->ln_ntick = 0; canceled = callout_stop(&ln->lle_timer); } else { ln->la_expire = time_uptime + tick / hz; LLE_ADDREF(ln); if (tick > INT_MAX) { ln->ln_ntick = tick - INT_MAX; canceled = callout_reset(&ln->lle_timer, INT_MAX, nd6_llinfo_timer, ln); } else { ln->ln_ntick = 0; canceled = callout_reset(&ln->lle_timer, tick, nd6_llinfo_timer, ln); } } if (canceled > 0) LLE_REMREF(ln); } /* * Gets source address of the first packet in hold queue * and stores it in @src. * Returns pointer to @src (if hold queue is not empty) or NULL. * * Set noinline to be dtrace-friendly */ static __noinline struct in6_addr * nd6_llinfo_get_holdsrc(struct llentry *ln, struct in6_addr *src) { struct ip6_hdr hdr; struct mbuf *m; if (ln->la_hold == NULL) return (NULL); /* * assume every packet in la_hold has the same IP header */ m = ln->la_hold; if (sizeof(hdr) > m->m_len) return (NULL); m_copydata(m, 0, sizeof(hdr), (caddr_t)&hdr); *src = hdr.ip6_src; return (src); } /* * Checks if we need to switch from STALE state. * * RFC 4861 requires switching from STALE to DELAY state * on first packet matching entry, waiting V_nd6_delay and * transition to PROBE state (if upper layer confirmation was * not received). * * This code performs a bit differently: * On packet hit we don't change state (but desired state * can be guessed by control plane). However, after V_nd6_delay * seconds code will transition to PROBE state (so DELAY state * is kinda skipped in most situations). * * Typically, V_nd6_gctimer is bigger than V_nd6_delay, so * we perform the following upon entering STALE state: * * 1) Arm timer to run each V_nd6_delay seconds to make sure that * if packet was transmitted at the start of given interval, we * would be able to switch to PROBE state in V_nd6_delay seconds * as user expects. * * 2) Reschedule timer until original V_nd6_gctimer expires keeping * lle in STALE state (remaining timer value stored in lle_remtime). * * 3) Reschedule timer if packet was transmitted less that V_nd6_delay * seconds ago. * * Returns non-zero value if the entry is still STALE (storing * the next timer interval in @pdelay). * * Returns zero value if original timer expired or we need to switch to * PROBE (store that in @do_switch variable). */ static int nd6_is_stale(struct llentry *lle, long *pdelay, int *do_switch) { int nd_delay, nd_gctimer, r_skip_req; time_t lle_hittime; long delay; *do_switch = 0; nd_gctimer = V_nd6_gctimer; nd_delay = V_nd6_delay; LLE_REQ_LOCK(lle); r_skip_req = lle->r_skip_req; lle_hittime = lle->lle_hittime; LLE_REQ_UNLOCK(lle); if (r_skip_req > 0) { /* * Nonzero r_skip_req value was set upon entering * STALE state. Since value was not changed, no * packets were passed using this lle. Ask for * timer reschedule and keep STALE state. */ delay = (long)(MIN(nd_gctimer, nd_delay)); delay *= hz; if (lle->lle_remtime > delay) lle->lle_remtime -= delay; else { delay = lle->lle_remtime; lle->lle_remtime = 0; } if (delay == 0) { /* * The original ng6_gctime timeout ended, * no more rescheduling. */ return (0); } *pdelay = delay; return (1); } /* * Packet received. Verify timestamp */ delay = (long)(time_uptime - lle_hittime); if (delay < nd_delay) { /* * V_nd6_delay still not passed since the first * hit in STALE state. * Reshedule timer and return. */ *pdelay = (long)(nd_delay - delay) * hz; return (1); } /* Request switching to probe */ *do_switch = 1; return (0); } /* * Switch @lle state to new state optionally arming timers. * * Set noinline to be dtrace-friendly */ __noinline void nd6_llinfo_setstate(struct llentry *lle, int newstate) { struct ifnet *ifp; int nd_gctimer, nd_delay; long delay, remtime; delay = 0; remtime = 0; switch (newstate) { case ND6_LLINFO_INCOMPLETE: ifp = lle->lle_tbl->llt_ifp; delay = (long)ND_IFINFO(ifp)->retrans * hz / 1000; break; case ND6_LLINFO_REACHABLE: if (!ND6_LLINFO_PERMANENT(lle)) { ifp = lle->lle_tbl->llt_ifp; delay = (long)ND_IFINFO(ifp)->reachable * hz; } break; case ND6_LLINFO_STALE: /* * Notify fast path that we want to know if any packet * is transmitted by setting r_skip_req. */ LLE_REQ_LOCK(lle); lle->r_skip_req = 1; LLE_REQ_UNLOCK(lle); nd_delay = V_nd6_delay; nd_gctimer = V_nd6_gctimer; delay = (long)(MIN(nd_gctimer, nd_delay)) * hz; remtime = (long)nd_gctimer * hz - delay; break; case ND6_LLINFO_DELAY: lle->la_asked = 0; delay = (long)V_nd6_delay * hz; break; } if (delay > 0) nd6_llinfo_settimer_locked(lle, delay); lle->lle_remtime = remtime; lle->ln_state = newstate; } /* * Timer-dependent part of nd state machine. * * Set noinline to be dtrace-friendly */ static __noinline void nd6_llinfo_timer(void *arg) { struct llentry *ln; struct in6_addr *dst, *pdst, *psrc, src; struct ifnet *ifp; struct nd_ifinfo *ndi = NULL; int do_switch, send_ns; long delay; KASSERT(arg != NULL, ("%s: arg NULL", __func__)); ln = (struct llentry *)arg; LLE_WLOCK(ln); if (callout_pending(&ln->lle_timer)) { /* * Here we are a bit odd here in the treatment of * active/pending. If the pending bit is set, it got * rescheduled before I ran. The active * bit we ignore, since if it was stopped * in ll_tablefree() and was currently running * it would have return 0 so the code would * not have deleted it since the callout could * not be stopped so we want to go through * with the delete here now. If the callout * was restarted, the pending bit will be back on and * we just want to bail since the callout_reset would * return 1 and our reference would have been removed * by nd6_llinfo_settimer_locked above since canceled * would have been 1. */ LLE_WUNLOCK(ln); return; } ifp = ln->lle_tbl->llt_ifp; CURVNET_SET(ifp->if_vnet); ndi = ND_IFINFO(ifp); send_ns = 0; dst = &ln->r_l3addr.addr6; pdst = dst; if (ln->ln_ntick > 0) { if (ln->ln_ntick > INT_MAX) { ln->ln_ntick -= INT_MAX; nd6_llinfo_settimer_locked(ln, INT_MAX); } else { ln->ln_ntick = 0; nd6_llinfo_settimer_locked(ln, ln->ln_ntick); } goto done; } if (ln->la_flags & LLE_STATIC) { goto done; } if (ln->la_flags & LLE_DELETED) { nd6_free(ln, 0); ln = NULL; goto done; } switch (ln->ln_state) { case ND6_LLINFO_INCOMPLETE: if (ln->la_asked < V_nd6_mmaxtries) { ln->la_asked++; send_ns = 1; /* Send NS to multicast address */ pdst = NULL; } else { struct mbuf *m = ln->la_hold; if (m) { struct mbuf *m0; /* * assuming every packet in la_hold has the * same IP header. Send error after unlock. */ m0 = m->m_nextpkt; m->m_nextpkt = NULL; ln->la_hold = m0; clear_llinfo_pqueue(ln); } EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_TIMEDOUT); nd6_free(ln, 0); ln = NULL; if (m != NULL) icmp6_error2(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADDR, 0, ifp); } break; case ND6_LLINFO_REACHABLE: if (!ND6_LLINFO_PERMANENT(ln)) nd6_llinfo_setstate(ln, ND6_LLINFO_STALE); break; case ND6_LLINFO_STALE: if (nd6_is_stale(ln, &delay, &do_switch) != 0) { /* * No packet has used this entry and GC timeout * has not been passed. Reshedule timer and * return. */ nd6_llinfo_settimer_locked(ln, delay); break; } if (do_switch == 0) { /* * GC timer has ended and entry hasn't been used. * Run Garbage collector (RFC 4861, 5.3) */ if (!ND6_LLINFO_PERMANENT(ln)) { EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_EXPIRED); nd6_free(ln, 1); ln = NULL; } break; } /* Entry has been used AND delay timer has ended. */ /* FALLTHROUGH */ case ND6_LLINFO_DELAY: if (ndi && (ndi->flags & ND6_IFF_PERFORMNUD) != 0) { /* We need NUD */ ln->la_asked = 1; nd6_llinfo_setstate(ln, ND6_LLINFO_PROBE); send_ns = 1; } else nd6_llinfo_setstate(ln, ND6_LLINFO_STALE); /* XXX */ break; case ND6_LLINFO_PROBE: if (ln->la_asked < V_nd6_umaxtries) { ln->la_asked++; send_ns = 1; } else { EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_EXPIRED); nd6_free(ln, 0); ln = NULL; } break; default: panic("%s: paths in a dark night can be confusing: %d", __func__, ln->ln_state); } done: if (send_ns != 0) { nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000); psrc = nd6_llinfo_get_holdsrc(ln, &src); LLE_FREE_LOCKED(ln); ln = NULL; nd6_ns_output(ifp, psrc, pdst, dst, NULL); } if (ln != NULL) LLE_FREE_LOCKED(ln); CURVNET_RESTORE(); } /* * ND6 timer routine to expire default route list and prefix list */ void nd6_timer(void *arg) { CURVNET_SET((struct vnet *) arg); + struct nd_drhead drq; struct nd_defrouter *dr, *ndr; struct nd_prefix *pr, *npr; struct in6_ifaddr *ia6, *nia6; callout_reset(&V_nd6_timer_ch, V_nd6_prune * hz, nd6_timer, curvnet); + TAILQ_INIT(&drq); + /* expire default router list */ - TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) { + ND6_WLOCK(); + TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) if (dr->expire && dr->expire < time_uptime) - defrtrlist_del(dr); + defrouter_unlink(dr, &drq); + ND6_WUNLOCK(); + + while ((dr = TAILQ_FIRST(&drq)) != NULL) { + TAILQ_REMOVE(&drq, dr, dr_entry); + defrouter_del(dr); } /* * expire interface addresses. * in the past the loop was inside prefix expiry processing. * However, from a stricter speci-confrmance standpoint, we should * rather separate address lifetimes and prefix lifetimes. * * XXXRW: in6_ifaddrhead locking. */ addrloop: TAILQ_FOREACH_SAFE(ia6, &V_in6_ifaddrhead, ia_link, nia6) { /* check address lifetime */ if (IFA6_IS_INVALID(ia6)) { int regen = 0; /* * If the expiring address is temporary, try * regenerating a new one. This would be useful when * we suspended a laptop PC, then turned it on after a * period that could invalidate all temporary * addresses. Although we may have to restart the * loop (see below), it must be after purging the * address. Otherwise, we'd see an infinite loop of * regeneration. */ if (V_ip6_use_tempaddr && (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0) { if (regen_tmpaddr(ia6) == 0) regen = 1; } in6_purgeaddr(&ia6->ia_ifa); if (regen) goto addrloop; /* XXX: see below */ } else if (IFA6_IS_DEPRECATED(ia6)) { int oldflags = ia6->ia6_flags; ia6->ia6_flags |= IN6_IFF_DEPRECATED; /* * If a temporary address has just become deprecated, * regenerate a new one if possible. */ if (V_ip6_use_tempaddr && (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0 && (oldflags & IN6_IFF_DEPRECATED) == 0) { if (regen_tmpaddr(ia6) == 0) { /* * A new temporary address is * generated. * XXX: this means the address chain * has changed while we are still in * the loop. Although the change * would not cause disaster (because * it's not a deletion, but an * addition,) we'd rather restart the * loop just for safety. Or does this * significantly reduce performance?? */ goto addrloop; } } } else if ((ia6->ia6_flags & IN6_IFF_TENTATIVE) != 0) { /* * Schedule DAD for a tentative address. This happens * if the interface was down or not running * when the address was configured. */ int delay; delay = arc4random() % (MAX_RTR_SOLICITATION_DELAY * hz); nd6_dad_start((struct ifaddr *)ia6, delay); } else { /* * Check status of the interface. If it is down, * mark the address as tentative for future DAD. */ if ((ia6->ia_ifp->if_flags & IFF_UP) == 0 || (ia6->ia_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || (ND_IFINFO(ia6->ia_ifp)->flags & ND6_IFF_IFDISABLED) != 0) { ia6->ia6_flags &= ~IN6_IFF_DUPLICATED; ia6->ia6_flags |= IN6_IFF_TENTATIVE; } /* * A new RA might have made a deprecated address * preferred. */ ia6->ia6_flags &= ~IN6_IFF_DEPRECATED; } } /* expire prefix list */ LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, npr) { /* * check prefix lifetime. * since pltime is just for autoconf, pltime processing for * prefix is not necessary. */ if (pr->ndpr_vltime != ND6_INFINITE_LIFETIME && time_uptime - pr->ndpr_lastupdate > pr->ndpr_vltime) { /* * address expiration and prefix expiration are * separate. NEVER perform in6_purgeaddr here. */ prelist_remove(pr); } } CURVNET_RESTORE(); } /* * ia6 - deprecated/invalidated temporary address */ static int regen_tmpaddr(struct in6_ifaddr *ia6) { struct ifaddr *ifa; struct ifnet *ifp; struct in6_ifaddr *public_ifa6 = NULL; ifp = ia6->ia_ifa.ifa_ifp; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { struct in6_ifaddr *it6; if (ifa->ifa_addr->sa_family != AF_INET6) continue; it6 = (struct in6_ifaddr *)ifa; /* ignore no autoconf addresses. */ if ((it6->ia6_flags & IN6_IFF_AUTOCONF) == 0) continue; /* ignore autoconf addresses with different prefixes. */ if (it6->ia6_ndpr == NULL || it6->ia6_ndpr != ia6->ia6_ndpr) continue; /* * Now we are looking at an autoconf address with the same * prefix as ours. If the address is temporary and is still * preferred, do not create another one. It would be rare, but * could happen, for example, when we resume a laptop PC after * a long period. */ if ((it6->ia6_flags & IN6_IFF_TEMPORARY) != 0 && !IFA6_IS_DEPRECATED(it6)) { public_ifa6 = NULL; break; } /* * This is a public autoconf address that has the same prefix * as ours. If it is preferred, keep it. We can't break the * loop here, because there may be a still-preferred temporary * address with the prefix. */ if (!IFA6_IS_DEPRECATED(it6)) public_ifa6 = it6; } if (public_ifa6 != NULL) ifa_ref(&public_ifa6->ia_ifa); IF_ADDR_RUNLOCK(ifp); if (public_ifa6 != NULL) { int e; if ((e = in6_tmpifadd(public_ifa6, 0, 0)) != 0) { ifa_free(&public_ifa6->ia_ifa); log(LOG_NOTICE, "regen_tmpaddr: failed to create a new" " tmp addr,errno=%d\n", e); return (-1); } ifa_free(&public_ifa6->ia_ifa); return (0); } return (-1); } /* * Nuke neighbor cache/prefix/default router management table, right before * ifp goes away. */ void nd6_purge(struct ifnet *ifp) { + struct nd_drhead drq; struct nd_defrouter *dr, *ndr; struct nd_prefix *pr, *npr; + TAILQ_INIT(&drq); + /* * Nuke default router list entries toward ifp. * We defer removal of default router list entries that is installed * in the routing table, in order to keep additional side effects as * small as possible. */ + ND6_WLOCK(); TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) { if (dr->installed) continue; - if (dr->ifp == ifp) - defrtrlist_del(dr); + defrouter_unlink(dr, &drq); } TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) { if (!dr->installed) continue; - if (dr->ifp == ifp) - defrtrlist_del(dr); + defrouter_unlink(dr, &drq); } + ND6_WUNLOCK(); + while ((dr = TAILQ_FIRST(&drq)) != NULL) { + TAILQ_REMOVE(&drq, dr, dr_entry); + defrouter_del(dr); + } + /* Nuke prefix list entries toward ifp */ LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, npr) { if (pr->ndpr_ifp == ifp) { /* * Because if_detach() does *not* release prefixes * while purging addresses the reference count will * still be above zero. We therefore reset it to * make sure that the prefix really gets purged. */ pr->ndpr_refcnt = 0; /* * Previously, pr->ndpr_addr is removed as well, * but I strongly believe we don't have to do it. * nd6_purge() is only called from in6_ifdetach(), * which removes all the associated interface addresses * by itself. * (jinmei@kame.net 20010129) */ prelist_remove(pr); } } /* cancel default outgoing interface setting */ if (V_nd6_defifindex == ifp->if_index) nd6_setdefaultiface(0); if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) { /* Refresh default router list. */ defrouter_select(); } /* XXXXX * We do not nuke the neighbor cache entries here any more * because the neighbor cache is kept in if_afdata[AF_INET6]. * nd6_purge() is invoked by in6_ifdetach() which is called * from if_detach() where everything gets purged. So let * in6_domifdetach() do the actual L2 table purging work. */ } /* * the caller acquires and releases the lock on the lltbls * Returns the llentry locked */ struct llentry * nd6_lookup(const struct in6_addr *addr6, int flags, struct ifnet *ifp) { struct sockaddr_in6 sin6; struct llentry *ln; bzero(&sin6, sizeof(sin6)); sin6.sin6_len = sizeof(struct sockaddr_in6); sin6.sin6_family = AF_INET6; sin6.sin6_addr = *addr6; IF_AFDATA_LOCK_ASSERT(ifp); ln = lla_lookup(LLTABLE6(ifp), flags, (struct sockaddr *)&sin6); return (ln); } struct llentry * nd6_alloc(const struct in6_addr *addr6, int flags, struct ifnet *ifp) { struct sockaddr_in6 sin6; struct llentry *ln; bzero(&sin6, sizeof(sin6)); sin6.sin6_len = sizeof(struct sockaddr_in6); sin6.sin6_family = AF_INET6; sin6.sin6_addr = *addr6; ln = lltable_alloc_entry(LLTABLE6(ifp), 0, (struct sockaddr *)&sin6); if (ln != NULL) ln->ln_state = ND6_LLINFO_NOSTATE; return (ln); } /* * Test whether a given IPv6 address is a neighbor or not, ignoring * the actual neighbor cache. The neighbor cache is ignored in order * to not reenter the routing code from within itself. */ static int nd6_is_new_addr_neighbor(const struct sockaddr_in6 *addr, struct ifnet *ifp) { struct nd_prefix *pr; struct ifaddr *dstaddr; struct rt_addrinfo info; struct sockaddr_in6 rt_key; struct sockaddr *dst6; int fibnum; /* * A link-local address is always a neighbor. * XXX: a link does not necessarily specify a single interface. */ if (IN6_IS_ADDR_LINKLOCAL(&addr->sin6_addr)) { struct sockaddr_in6 sin6_copy; u_int32_t zone; /* * We need sin6_copy since sa6_recoverscope() may modify the * content (XXX). */ sin6_copy = *addr; if (sa6_recoverscope(&sin6_copy)) return (0); /* XXX: should be impossible */ if (in6_setscope(&sin6_copy.sin6_addr, ifp, &zone)) return (0); if (sin6_copy.sin6_scope_id == zone) return (1); else return (0); } bzero(&rt_key, sizeof(rt_key)); bzero(&info, sizeof(info)); info.rti_info[RTAX_DST] = (struct sockaddr *)&rt_key; /* Always use the default FIB here. XXME - why? */ fibnum = RT_DEFAULT_FIB; /* * If the address matches one of our addresses, * it should be a neighbor. * If the address matches one of our on-link prefixes, it should be a * neighbor. */ LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) { if (pr->ndpr_ifp != ifp) continue; if (!(pr->ndpr_stateflags & NDPRF_ONLINK)) { /* Always use the default FIB here. */ dst6 = (struct sockaddr *)&pr->ndpr_prefix; /* Restore length field before retrying lookup */ rt_key.sin6_len = sizeof(rt_key); if (rib_lookup_info(fibnum, dst6, 0, 0, &info) != 0) continue; /* * This is the case where multiple interfaces * have the same prefix, but only one is installed * into the routing table and that prefix entry * is not the one being examined here. In the case * where RADIX_MPATH is enabled, multiple route * entries (of the same rt_key value) will be * installed because the interface addresses all * differ. */ if (!IN6_ARE_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr, &rt_key.sin6_addr)) continue; } if (IN6_ARE_MASKED_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr, &addr->sin6_addr, &pr->ndpr_mask)) return (1); } /* * If the address is assigned on the node of the other side of * a p2p interface, the address should be a neighbor. */ dstaddr = ifa_ifwithdstaddr((const struct sockaddr *)addr, RT_ALL_FIBS); if (dstaddr != NULL) { if (dstaddr->ifa_ifp == ifp) { ifa_free(dstaddr); return (1); } ifa_free(dstaddr); } /* * If the default router list is empty, all addresses are regarded * as on-link, and thus, as a neighbor. */ if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV && TAILQ_EMPTY(&V_nd_defrouter) && V_nd6_defifindex == ifp->if_index) { return (1); } return (0); } /* * Detect if a given IPv6 address identifies a neighbor on a given link. * XXX: should take care of the destination of a p2p link? */ int nd6_is_addr_neighbor(const struct sockaddr_in6 *addr, struct ifnet *ifp) { struct llentry *lle; int rc = 0; IF_AFDATA_UNLOCK_ASSERT(ifp); if (nd6_is_new_addr_neighbor(addr, ifp)) return (1); /* * Even if the address matches none of our addresses, it might be * in the neighbor cache. */ IF_AFDATA_RLOCK(ifp); if ((lle = nd6_lookup(&addr->sin6_addr, 0, ifp)) != NULL) { LLE_RUNLOCK(lle); rc = 1; } IF_AFDATA_RUNLOCK(ifp); return (rc); } /* * Free an nd6 llinfo entry. * Since the function would cause significant changes in the kernel, DO NOT * make it global, unless you have a strong reason for the change, and are sure * that the change is safe. * * Set noinline to be dtrace-friendly */ static __noinline void nd6_free(struct llentry *ln, int gc) { struct nd_defrouter *dr; struct ifnet *ifp; LLE_WLOCK_ASSERT(ln); /* * we used to have pfctlinput(PRC_HOSTDEAD) here. * even though it is not harmful, it was not really necessary. */ /* cancel timer */ nd6_llinfo_settimer_locked(ln, -1); + dr = NULL; ifp = ln->lle_tbl->llt_ifp; - if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) { dr = defrouter_lookup(&ln->r_l3addr.addr6, ifp); if (dr != NULL && dr->expire && ln->ln_state == ND6_LLINFO_STALE && gc) { /* * If the reason for the deletion is just garbage * collection, and the neighbor is an active default * router, do not delete it. Instead, reset the GC * timer using the router's lifetime. * Simply deleting the entry would affect default * router selection, which is not necessarily a good * thing, especially when we're using router preference * values. * XXX: the check for ln_state would be redundant, * but we intentionally keep it just in case. */ if (dr->expire > time_uptime) nd6_llinfo_settimer_locked(ln, (dr->expire - time_uptime) * hz); else nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz); LLE_REMREF(ln); LLE_WUNLOCK(ln); + defrouter_rele(dr); return; } if (dr) { /* * Unreachablity of a router might affect the default * router selection and on-link detection of advertised * prefixes. */ /* * Temporarily fake the state to choose a new default * router and to perform on-link determination of * prefixes correctly. * Below the state will be set correctly, * or the entry itself will be deleted. */ ln->ln_state = ND6_LLINFO_INCOMPLETE; } if (ln->ln_router || dr) { /* * We need to unlock to avoid a LOR with rt6_flush() with the * rnh and for the calls to pfxlist_onlink_check() and * defrouter_select() in the block further down for calls * into nd6_lookup(). We still hold a ref. */ LLE_WUNLOCK(ln); /* * rt6_flush must be called whether or not the neighbor * is in the Default Router List. * See a corresponding comment in nd6_na_input(). */ rt6_flush(&ln->r_l3addr.addr6, ifp); } if (dr) { /* * Since defrouter_select() does not affect the * on-link determination and MIP6 needs the check * before the default router selection, we perform * the check now. */ pfxlist_onlink_check(); /* * Refresh default router list. */ defrouter_select(); } /* * If this entry was added by an on-link redirect, remove the * corresponding host route. */ if (ln->la_flags & LLE_REDIRECT) nd6_free_redirect(ln); if (ln->ln_router || dr) LLE_WLOCK(ln); } /* * Save to unlock. We still hold an extra reference and will not * free(9) in llentry_free() if someone else holds one as well. */ LLE_WUNLOCK(ln); IF_AFDATA_LOCK(ifp); LLE_WLOCK(ln); /* Guard against race with other llentry_free(). */ if (ln->la_flags & LLE_LINKED) { /* Remove callout reference */ LLE_REMREF(ln); lltable_unlink_entry(ln->lle_tbl, ln); } IF_AFDATA_UNLOCK(ifp); llentry_free(ln); + if (dr != NULL) + defrouter_rele(dr); } static int nd6_isdynrte(const struct rtentry *rt, void *xap) { if (rt->rt_flags == (RTF_UP | RTF_HOST | RTF_DYNAMIC)) return (1); return (0); } /* * Remove the rtentry for the given llentry, * both of which were installed by a redirect. */ static void nd6_free_redirect(const struct llentry *ln) { int fibnum; struct sockaddr_in6 sin6; struct rt_addrinfo info; lltable_fill_sa_entry(ln, (struct sockaddr *)&sin6); memset(&info, 0, sizeof(info)); info.rti_info[RTAX_DST] = (struct sockaddr *)&sin6; info.rti_filter = nd6_isdynrte; for (fibnum = 0; fibnum < rt_numfibs; fibnum++) rtrequest1_fib(RTM_DELETE, &info, NULL, fibnum); } /* * Rejuvenate this function for routing operations related * processing. */ void nd6_rtrequest(int req, struct rtentry *rt, struct rt_addrinfo *info) { struct sockaddr_in6 *gateway; struct nd_defrouter *dr; struct ifnet *ifp; gateway = (struct sockaddr_in6 *)rt->rt_gateway; ifp = rt->rt_ifp; switch (req) { case RTM_ADD: break; case RTM_DELETE: if (!ifp) return; /* * Only indirect routes are interesting. */ if ((rt->rt_flags & RTF_GATEWAY) == 0) return; /* * check for default route */ - if (IN6_ARE_ADDR_EQUAL(&in6addr_any, - &SIN6(rt_key(rt))->sin6_addr)) { - + if (IN6_ARE_ADDR_EQUAL(&in6addr_any, + &SIN6(rt_key(rt))->sin6_addr)) { dr = defrouter_lookup(&gateway->sin6_addr, ifp); - if (dr != NULL) + if (dr != NULL) { dr->installed = 0; + defrouter_rele(dr); + } } break; } } int nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) { struct in6_ndireq *ndi = (struct in6_ndireq *)data; struct in6_nbrinfo *nbi = (struct in6_nbrinfo *)data; struct in6_ndifreq *ndif = (struct in6_ndifreq *)data; int error = 0; if (ifp->if_afdata[AF_INET6] == NULL) return (EPFNOSUPPORT); switch (cmd) { case OSIOCGIFINFO_IN6: #define ND ndi->ndi /* XXX: old ndp(8) assumes a positive value for linkmtu. */ bzero(&ND, sizeof(ND)); ND.linkmtu = IN6_LINKMTU(ifp); ND.maxmtu = ND_IFINFO(ifp)->maxmtu; ND.basereachable = ND_IFINFO(ifp)->basereachable; ND.reachable = ND_IFINFO(ifp)->reachable; ND.retrans = ND_IFINFO(ifp)->retrans; ND.flags = ND_IFINFO(ifp)->flags; ND.recalctm = ND_IFINFO(ifp)->recalctm; ND.chlim = ND_IFINFO(ifp)->chlim; break; case SIOCGIFINFO_IN6: ND = *ND_IFINFO(ifp); break; case SIOCSIFINFO_IN6: /* * used to change host variables from userland. * intented for a use on router to reflect RA configurations. */ /* 0 means 'unspecified' */ if (ND.linkmtu != 0) { if (ND.linkmtu < IPV6_MMTU || ND.linkmtu > IN6_LINKMTU(ifp)) { error = EINVAL; break; } ND_IFINFO(ifp)->linkmtu = ND.linkmtu; } if (ND.basereachable != 0) { int obasereachable = ND_IFINFO(ifp)->basereachable; ND_IFINFO(ifp)->basereachable = ND.basereachable; if (ND.basereachable != obasereachable) ND_IFINFO(ifp)->reachable = ND_COMPUTE_RTIME(ND.basereachable); } if (ND.retrans != 0) ND_IFINFO(ifp)->retrans = ND.retrans; if (ND.chlim != 0) ND_IFINFO(ifp)->chlim = ND.chlim; /* FALLTHROUGH */ case SIOCSIFINFO_FLAGS: { struct ifaddr *ifa; struct in6_ifaddr *ia; if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) && !(ND.flags & ND6_IFF_IFDISABLED)) { /* ifdisabled 1->0 transision */ /* * If the interface is marked as ND6_IFF_IFDISABLED and * has an link-local address with IN6_IFF_DUPLICATED, * do not clear ND6_IFF_IFDISABLED. * See RFC 4862, Section 5.4.5. */ IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; ia = (struct in6_ifaddr *)ifa; if ((ia->ia6_flags & IN6_IFF_DUPLICATED) && IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia))) break; } IF_ADDR_RUNLOCK(ifp); if (ifa != NULL) { /* LLA is duplicated. */ ND.flags |= ND6_IFF_IFDISABLED; log(LOG_ERR, "Cannot enable an interface" " with a link-local address marked" " duplicate.\n"); } else { ND_IFINFO(ifp)->flags &= ~ND6_IFF_IFDISABLED; if (ifp->if_flags & IFF_UP) in6_if_up(ifp); } } else if (!(ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) && (ND.flags & ND6_IFF_IFDISABLED)) { /* ifdisabled 0->1 transision */ /* Mark all IPv6 address as tentative. */ ND_IFINFO(ifp)->flags |= ND6_IFF_IFDISABLED; if (V_ip6_dad_count > 0 && (ND_IFINFO(ifp)->flags & ND6_IFF_NO_DAD) == 0) { IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; ia = (struct in6_ifaddr *)ifa; ia->ia6_flags |= IN6_IFF_TENTATIVE; } IF_ADDR_RUNLOCK(ifp); } } if (ND.flags & ND6_IFF_AUTO_LINKLOCAL) { if (!(ND_IFINFO(ifp)->flags & ND6_IFF_AUTO_LINKLOCAL)) { /* auto_linklocal 0->1 transision */ /* If no link-local address on ifp, configure */ ND_IFINFO(ifp)->flags |= ND6_IFF_AUTO_LINKLOCAL; in6_ifattach(ifp, NULL); } else if (!(ND.flags & ND6_IFF_IFDISABLED) && ifp->if_flags & IFF_UP) { /* * When the IF already has * ND6_IFF_AUTO_LINKLOCAL, no link-local * address is assigned, and IFF_UP, try to * assign one. */ IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; ia = (struct in6_ifaddr *)ifa; if (IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia))) break; } IF_ADDR_RUNLOCK(ifp); if (ifa != NULL) /* No LLA is configured. */ in6_ifattach(ifp, NULL); } } } ND_IFINFO(ifp)->flags = ND.flags; break; #undef ND case SIOCSNDFLUSH_IN6: /* XXX: the ioctl name is confusing... */ /* sync kernel routing table with the default router list */ defrouter_reset(); defrouter_select(); break; case SIOCSPFXFLUSH_IN6: { /* flush all the prefix advertised by routers */ struct nd_prefix *pr, *next; LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, next) { struct in6_ifaddr *ia, *ia_next; if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr)) continue; /* XXX */ /* do we really have to remove addresses as well? */ /* XXXRW: in6_ifaddrhead locking. */ TAILQ_FOREACH_SAFE(ia, &V_in6_ifaddrhead, ia_link, ia_next) { if ((ia->ia6_flags & IN6_IFF_AUTOCONF) == 0) continue; if (ia->ia6_ndpr == pr) in6_purgeaddr(&ia->ia_ifa); } prelist_remove(pr); } break; } case SIOCSRTRFLUSH_IN6: { /* flush all the default routers */ - struct nd_defrouter *dr, *next; + struct nd_drhead drq; + struct nd_defrouter *dr; + TAILQ_INIT(&drq); + defrouter_reset(); - TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, next) { - defrtrlist_del(dr); + + ND6_WLOCK(); + while ((dr = TAILQ_FIRST(&V_nd_defrouter)) != NULL) + defrouter_unlink(dr, &drq); + ND6_WUNLOCK(); + while ((dr = TAILQ_FIRST(&drq)) != NULL) { + TAILQ_REMOVE(&drq, dr, dr_entry); + defrouter_del(dr); } + defrouter_select(); break; } case SIOCGNBRINFO_IN6: { struct llentry *ln; struct in6_addr nb_addr = nbi->addr; /* make local for safety */ if ((error = in6_setscope(&nb_addr, ifp, NULL)) != 0) return (error); IF_AFDATA_RLOCK(ifp); ln = nd6_lookup(&nb_addr, 0, ifp); IF_AFDATA_RUNLOCK(ifp); if (ln == NULL) { error = EINVAL; break; } nbi->state = ln->ln_state; nbi->asked = ln->la_asked; nbi->isrouter = ln->ln_router; if (ln->la_expire == 0) nbi->expire = 0; else nbi->expire = ln->la_expire + ln->lle_remtime / hz + (time_second - time_uptime); LLE_RUNLOCK(ln); break; } case SIOCGDEFIFACE_IN6: /* XXX: should be implemented as a sysctl? */ ndif->ifindex = V_nd6_defifindex; break; case SIOCSDEFIFACE_IN6: /* XXX: should be implemented as a sysctl? */ return (nd6_setdefaultiface(ndif->ifindex)); } return (error); } /* * Calculates new isRouter value based on provided parameters and * returns it. */ static int nd6_is_router(int type, int code, int is_new, int old_addr, int new_addr, int ln_router) { /* * ICMP6 type dependent behavior. * * NS: clear IsRouter if new entry * RS: clear IsRouter * RA: set IsRouter if there's lladdr * redir: clear IsRouter if new entry * * RA case, (1): * The spec says that we must set IsRouter in the following cases: * - If lladdr exist, set IsRouter. This means (1-5). * - If it is old entry (!newentry), set IsRouter. This means (7). * So, based on the spec, in (1-5) and (7) cases we must set IsRouter. * A quetion arises for (1) case. (1) case has no lladdr in the * neighbor cache, this is similar to (6). * This case is rare but we figured that we MUST NOT set IsRouter. * * is_new old_addr new_addr NS RS RA redir * D R * 0 n n (1) c ? s * 0 y n (2) c s s * 0 n y (3) c s s * 0 y y (4) c s s * 0 y y (5) c s s * 1 -- n (6) c c c s * 1 -- y (7) c c s c s * * (c=clear s=set) */ switch (type & 0xff) { case ND_NEIGHBOR_SOLICIT: /* * New entry must have is_router flag cleared. */ if (is_new) /* (6-7) */ ln_router = 0; break; case ND_REDIRECT: /* * If the icmp is a redirect to a better router, always set the * is_router flag. Otherwise, if the entry is newly created, * clear the flag. [RFC 2461, sec 8.3] */ if (code == ND_REDIRECT_ROUTER) ln_router = 1; else { if (is_new) /* (6-7) */ ln_router = 0; } break; case ND_ROUTER_SOLICIT: /* * is_router flag must always be cleared. */ ln_router = 0; break; case ND_ROUTER_ADVERT: /* * Mark an entry with lladdr as a router. */ if ((!is_new && (old_addr || new_addr)) || /* (2-5) */ (is_new && new_addr)) { /* (7) */ ln_router = 1; } break; } return (ln_router); } /* * Create neighbor cache entry and cache link-layer address, * on reception of inbound ND6 packets. (RS/RA/NS/redirect) * * type - ICMP6 type * code - type dependent information * */ void nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr, int lladdrlen, int type, int code) { struct llentry *ln = NULL, *ln_tmp; int is_newentry; int do_update; int olladdr; int llchange; int flags; uint16_t router = 0; struct sockaddr_in6 sin6; struct mbuf *chain = NULL; u_char linkhdr[LLE_MAX_LINKHDR]; size_t linkhdrsize; int lladdr_off; IF_AFDATA_UNLOCK_ASSERT(ifp); KASSERT(ifp != NULL, ("%s: ifp == NULL", __func__)); KASSERT(from != NULL, ("%s: from == NULL", __func__)); /* nothing must be updated for unspecified address */ if (IN6_IS_ADDR_UNSPECIFIED(from)) return; /* * Validation about ifp->if_addrlen and lladdrlen must be done in * the caller. * * XXX If the link does not have link-layer adderss, what should * we do? (ifp->if_addrlen == 0) * Spec says nothing in sections for RA, RS and NA. There's small * description on it in NS section (RFC 2461 7.2.3). */ flags = lladdr ? LLE_EXCLUSIVE : 0; IF_AFDATA_RLOCK(ifp); ln = nd6_lookup(from, flags, ifp); IF_AFDATA_RUNLOCK(ifp); is_newentry = 0; if (ln == NULL) { flags |= LLE_EXCLUSIVE; ln = nd6_alloc(from, 0, ifp); if (ln == NULL) return; /* * Since we already know all the data for the new entry, * fill it before insertion. */ if (lladdr != NULL) { linkhdrsize = sizeof(linkhdr); if (lltable_calc_llheader(ifp, AF_INET6, lladdr, linkhdr, &linkhdrsize, &lladdr_off) != 0) return; lltable_set_entry_addr(ifp, ln, linkhdr, linkhdrsize, lladdr_off); } IF_AFDATA_WLOCK(ifp); LLE_WLOCK(ln); /* Prefer any existing lle over newly-created one */ ln_tmp = nd6_lookup(from, LLE_EXCLUSIVE, ifp); if (ln_tmp == NULL) lltable_link_entry(LLTABLE6(ifp), ln); IF_AFDATA_WUNLOCK(ifp); if (ln_tmp == NULL) { /* No existing lle, mark as new entry (6,7) */ is_newentry = 1; nd6_llinfo_setstate(ln, ND6_LLINFO_STALE); if (lladdr != NULL) /* (7) */ EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED); } else { lltable_free_entry(LLTABLE6(ifp), ln); ln = ln_tmp; ln_tmp = NULL; } } /* do nothing if static ndp is set */ if ((ln->la_flags & LLE_STATIC)) { if (flags & LLE_EXCLUSIVE) LLE_WUNLOCK(ln); else LLE_RUNLOCK(ln); return; } olladdr = (ln->la_flags & LLE_VALID) ? 1 : 0; if (olladdr && lladdr) { llchange = bcmp(lladdr, ln->ll_addr, ifp->if_addrlen); } else if (!olladdr && lladdr) llchange = 1; else llchange = 0; /* * newentry olladdr lladdr llchange (*=record) * 0 n n -- (1) * 0 y n -- (2) * 0 n y y (3) * STALE * 0 y y n (4) * * 0 y y y (5) * STALE * 1 -- n -- (6) NOSTATE(= PASSIVE) * 1 -- y -- (7) * STALE */ do_update = 0; if (is_newentry == 0 && llchange != 0) { do_update = 1; /* (3,5) */ /* * Record source link-layer address * XXX is it dependent to ifp->if_type? */ linkhdrsize = sizeof(linkhdr); if (lltable_calc_llheader(ifp, AF_INET6, lladdr, linkhdr, &linkhdrsize, &lladdr_off) != 0) return; if (lltable_try_set_entry_addr(ifp, ln, linkhdr, linkhdrsize, lladdr_off) == 0) { /* Entry was deleted */ return; } nd6_llinfo_setstate(ln, ND6_LLINFO_STALE); EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED); if (ln->la_hold != NULL) nd6_grab_holdchain(ln, &chain, &sin6); } /* Calculates new router status */ router = nd6_is_router(type, code, is_newentry, olladdr, lladdr != NULL ? 1 : 0, ln->ln_router); ln->ln_router = router; /* Mark non-router redirects with special flag */ if ((type & 0xFF) == ND_REDIRECT && code != ND_REDIRECT_ROUTER) ln->la_flags |= LLE_REDIRECT; if (flags & LLE_EXCLUSIVE) LLE_WUNLOCK(ln); else LLE_RUNLOCK(ln); if (chain != NULL) nd6_flush_holdchain(ifp, ifp, chain, &sin6); /* * When the link-layer address of a router changes, select the * best router again. In particular, when the neighbor entry is newly * created, it might affect the selection policy. * Question: can we restrict the first condition to the "is_newentry" * case? * XXX: when we hear an RA from a new router with the link-layer * address option, defrouter_select() is called twice, since * defrtrlist_update called the function as well. However, I believe * we can compromise the overhead, since it only happens the first * time. * XXX: although defrouter_select() should not have a bad effect * for those are not autoconfigured hosts, we explicitly avoid such * cases for safety. */ if ((do_update || is_newentry) && router && ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) { /* * guaranteed recursion */ defrouter_select(); } } static void nd6_slowtimo(void *arg) { CURVNET_SET((struct vnet *) arg); struct nd_ifinfo *nd6if; struct ifnet *ifp; callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz, nd6_slowtimo, curvnet); IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { if (ifp->if_afdata[AF_INET6] == NULL) continue; nd6if = ND_IFINFO(ifp); if (nd6if->basereachable && /* already initialized */ (nd6if->recalctm -= ND6_SLOWTIMER_INTERVAL) <= 0) { /* * Since reachable time rarely changes by router * advertisements, we SHOULD insure that a new random * value gets recomputed at least once every few hours. * (RFC 2461, 6.3.4) */ nd6if->recalctm = V_nd6_recalc_reachtm_interval; nd6if->reachable = ND_COMPUTE_RTIME(nd6if->basereachable); } } IFNET_RUNLOCK_NOSLEEP(); CURVNET_RESTORE(); } void nd6_grab_holdchain(struct llentry *ln, struct mbuf **chain, struct sockaddr_in6 *sin6) { LLE_WLOCK_ASSERT(ln); *chain = ln->la_hold; ln->la_hold = NULL; lltable_fill_sa_entry(ln, (struct sockaddr *)sin6); if (ln->ln_state == ND6_LLINFO_STALE) { /* * The first time we send a packet to a * neighbor whose entry is STALE, we have * to change the state to DELAY and a sets * a timer to expire in DELAY_FIRST_PROBE_TIME * seconds to ensure do neighbor unreachability * detection on expiration. * (RFC 2461 7.3.3) */ nd6_llinfo_setstate(ln, ND6_LLINFO_DELAY); } } int nd6_output_ifp(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m, struct sockaddr_in6 *dst, struct route *ro) { int error; int ip6len; struct ip6_hdr *ip6; struct m_tag *mtag; #ifdef MAC mac_netinet6_nd6_send(ifp, m); #endif /* * If called from nd6_ns_output() (NS), nd6_na_output() (NA), * icmp6_redirect_output() (REDIRECT) or from rip6_output() (RS, RA * as handled by rtsol and rtadvd), mbufs will be tagged for SeND * to be diverted to user space. When re-injected into the kernel, * send_output() will directly dispatch them to the outgoing interface. */ if (send_sendso_input_hook != NULL) { mtag = m_tag_find(m, PACKET_TAG_ND_OUTGOING, NULL); if (mtag != NULL) { ip6 = mtod(m, struct ip6_hdr *); ip6len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen); /* Use the SEND socket */ error = send_sendso_input_hook(m, ifp, SND_OUT, ip6len); /* -1 == no app on SEND socket */ if (error == 0 || error != -1) return (error); } } m_clrprotoflags(m); /* Avoid confusing lower layers. */ IP_PROBE(send, NULL, NULL, mtod(m, struct ip6_hdr *), ifp, NULL, mtod(m, struct ip6_hdr *)); if ((ifp->if_flags & IFF_LOOPBACK) == 0) origifp = ifp; error = (*ifp->if_output)(origifp, m, (struct sockaddr *)dst, ro); return (error); } /* * Lookup link headerfor @sa_dst address. Stores found * data in @desten buffer. Copy of lle ln_flags can be also * saved in @pflags if @pflags is non-NULL. * * If destination LLE does not exists or lle state modification * is required, call "slow" version. * * Return values: * - 0 on success (address copied to buffer). * - EWOULDBLOCK (no local error, but address is still unresolved) * - other errors (alloc failure, etc) */ int nd6_resolve(struct ifnet *ifp, int is_gw, struct mbuf *m, const struct sockaddr *sa_dst, u_char *desten, uint32_t *pflags) { struct llentry *ln = NULL; const struct sockaddr_in6 *dst6; if (pflags != NULL) *pflags = 0; dst6 = (const struct sockaddr_in6 *)sa_dst; /* discard the packet if IPv6 operation is disabled on the interface */ if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)) { m_freem(m); return (ENETDOWN); /* better error? */ } if (m != NULL && m->m_flags & M_MCAST) { switch (ifp->if_type) { case IFT_ETHER: case IFT_FDDI: case IFT_L2VLAN: case IFT_IEEE80211: case IFT_BRIDGE: case IFT_ISO88025: ETHER_MAP_IPV6_MULTICAST(&dst6->sin6_addr, desten); return (0); default: m_freem(m); return (EAFNOSUPPORT); } } IF_AFDATA_RLOCK(ifp); ln = nd6_lookup(&dst6->sin6_addr, LLE_UNLOCKED, ifp); if (ln != NULL && (ln->r_flags & RLLE_VALID) != 0) { /* Entry found, let's copy lle info */ bcopy(ln->r_linkdata, desten, ln->r_hdrlen); if (pflags != NULL) *pflags = LLE_VALID | (ln->r_flags & RLLE_IFADDR); /* Check if we have feedback request from nd6 timer */ if (ln->r_skip_req != 0) { LLE_REQ_LOCK(ln); ln->r_skip_req = 0; /* Notify that entry was used */ ln->lle_hittime = time_uptime; LLE_REQ_UNLOCK(ln); } IF_AFDATA_RUNLOCK(ifp); return (0); } IF_AFDATA_RUNLOCK(ifp); return (nd6_resolve_slow(ifp, 0, m, dst6, desten, pflags)); } /* * Do L2 address resolution for @sa_dst address. Stores found * address in @desten buffer. Copy of lle ln_flags can be also * saved in @pflags if @pflags is non-NULL. * * Heavy version. * Function assume that destination LLE does not exist, * is invalid or stale, so LLE_EXCLUSIVE lock needs to be acquired. * * Set noinline to be dtrace-friendly */ static __noinline int nd6_resolve_slow(struct ifnet *ifp, int flags, struct mbuf *m, const struct sockaddr_in6 *dst, u_char *desten, uint32_t *pflags) { struct llentry *lle = NULL, *lle_tmp; struct in6_addr *psrc, src; int send_ns, ll_len; char *lladdr; /* * Address resolution or Neighbor Unreachability Detection * for the next hop. * At this point, the destination of the packet must be a unicast * or an anycast address(i.e. not a multicast). */ if (lle == NULL) { IF_AFDATA_RLOCK(ifp); lle = nd6_lookup(&dst->sin6_addr, LLE_EXCLUSIVE, ifp); IF_AFDATA_RUNLOCK(ifp); if ((lle == NULL) && nd6_is_addr_neighbor(dst, ifp)) { /* * Since nd6_is_addr_neighbor() internally calls nd6_lookup(), * the condition below is not very efficient. But we believe * it is tolerable, because this should be a rare case. */ lle = nd6_alloc(&dst->sin6_addr, 0, ifp); if (lle == NULL) { char ip6buf[INET6_ADDRSTRLEN]; log(LOG_DEBUG, "nd6_output: can't allocate llinfo for %s " "(ln=%p)\n", ip6_sprintf(ip6buf, &dst->sin6_addr), lle); m_freem(m); return (ENOBUFS); } IF_AFDATA_WLOCK(ifp); LLE_WLOCK(lle); /* Prefer any existing entry over newly-created one */ lle_tmp = nd6_lookup(&dst->sin6_addr, LLE_EXCLUSIVE, ifp); if (lle_tmp == NULL) lltable_link_entry(LLTABLE6(ifp), lle); IF_AFDATA_WUNLOCK(ifp); if (lle_tmp != NULL) { lltable_free_entry(LLTABLE6(ifp), lle); lle = lle_tmp; lle_tmp = NULL; } } } if (lle == NULL) { if (!(ND_IFINFO(ifp)->flags & ND6_IFF_PERFORMNUD)) { m_freem(m); return (ENOBUFS); } if (m != NULL) m_freem(m); return (ENOBUFS); } LLE_WLOCK_ASSERT(lle); /* * The first time we send a packet to a neighbor whose entry is * STALE, we have to change the state to DELAY and a sets a timer to * expire in DELAY_FIRST_PROBE_TIME seconds to ensure do * neighbor unreachability detection on expiration. * (RFC 2461 7.3.3) */ if (lle->ln_state == ND6_LLINFO_STALE) nd6_llinfo_setstate(lle, ND6_LLINFO_DELAY); /* * If the neighbor cache entry has a state other than INCOMPLETE * (i.e. its link-layer address is already resolved), just * send the packet. */ if (lle->ln_state > ND6_LLINFO_INCOMPLETE) { if (flags & LLE_ADDRONLY) { lladdr = lle->ll_addr; ll_len = ifp->if_addrlen; } else { lladdr = lle->r_linkdata; ll_len = lle->r_hdrlen; } bcopy(lladdr, desten, ll_len); if (pflags != NULL) *pflags = lle->la_flags; LLE_WUNLOCK(lle); return (0); } /* * There is a neighbor cache entry, but no ethernet address * response yet. Append this latest packet to the end of the * packet queue in the mbuf, unless the number of the packet * does not exceed nd6_maxqueuelen. When it exceeds nd6_maxqueuelen, * the oldest packet in the queue will be removed. */ if (lle->la_hold != NULL) { struct mbuf *m_hold; int i; i = 0; for (m_hold = lle->la_hold; m_hold; m_hold = m_hold->m_nextpkt){ i++; if (m_hold->m_nextpkt == NULL) { m_hold->m_nextpkt = m; break; } } while (i >= V_nd6_maxqueuelen) { m_hold = lle->la_hold; lle->la_hold = lle->la_hold->m_nextpkt; m_freem(m_hold); i--; } } else { lle->la_hold = m; } /* * If there has been no NS for the neighbor after entering the * INCOMPLETE state, send the first solicitation. * Note that for newly-created lle la_asked will be 0, * so we will transition from ND6_LLINFO_NOSTATE to * ND6_LLINFO_INCOMPLETE state here. */ psrc = NULL; send_ns = 0; if (lle->la_asked == 0) { lle->la_asked++; send_ns = 1; psrc = nd6_llinfo_get_holdsrc(lle, &src); nd6_llinfo_setstate(lle, ND6_LLINFO_INCOMPLETE); } LLE_WUNLOCK(lle); if (send_ns != 0) nd6_ns_output(ifp, psrc, NULL, &dst->sin6_addr, NULL); return (EWOULDBLOCK); } /* * Do L2 address resolution for @sa_dst address. Stores found * address in @desten buffer. Copy of lle ln_flags can be also * saved in @pflags if @pflags is non-NULL. * * Return values: * - 0 on success (address copied to buffer). * - EWOULDBLOCK (no local error, but address is still unresolved) * - other errors (alloc failure, etc) */ int nd6_resolve_addr(struct ifnet *ifp, int flags, const struct sockaddr *dst, char *desten, uint32_t *pflags) { int error; flags |= LLE_ADDRONLY; error = nd6_resolve_slow(ifp, flags, NULL, (const struct sockaddr_in6 *)dst, desten, pflags); return (error); } int nd6_flush_holdchain(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *chain, struct sockaddr_in6 *dst) { struct mbuf *m, *m_head; struct ifnet *outifp; int error = 0; m_head = chain; if ((ifp->if_flags & IFF_LOOPBACK) != 0) outifp = origifp; else outifp = ifp; while (m_head) { m = m_head; m_head = m_head->m_nextpkt; error = nd6_output_ifp(ifp, origifp, m, dst, NULL); } /* * XXX * note that intermediate errors are blindly ignored */ return (error); } static int nd6_need_cache(struct ifnet *ifp) { /* * XXX: we currently do not make neighbor cache on any interface * other than ARCnet, Ethernet, FDDI and GIF. * * RFC2893 says: * - unidirectional tunnels needs no ND */ switch (ifp->if_type) { case IFT_ARCNET: case IFT_ETHER: case IFT_FDDI: case IFT_IEEE1394: case IFT_L2VLAN: case IFT_IEEE80211: case IFT_INFINIBAND: case IFT_BRIDGE: case IFT_PROPVIRTUAL: return (1); default: return (0); } } /* * Add pernament ND6 link-layer record for given * interface address. * * Very similar to IPv4 arp_ifinit(), but: * 1) IPv6 DAD is performed in different place * 2) It is called by IPv6 protocol stack in contrast to * arp_ifinit() which is typically called in SIOCSIFADDR * driver ioctl handler. * */ int nd6_add_ifa_lle(struct in6_ifaddr *ia) { struct ifnet *ifp; struct llentry *ln, *ln_tmp; struct sockaddr *dst; ifp = ia->ia_ifa.ifa_ifp; if (nd6_need_cache(ifp) == 0) return (0); ia->ia_ifa.ifa_rtrequest = nd6_rtrequest; dst = (struct sockaddr *)&ia->ia_addr; ln = lltable_alloc_entry(LLTABLE6(ifp), LLE_IFADDR, dst); if (ln == NULL) return (ENOBUFS); IF_AFDATA_WLOCK(ifp); LLE_WLOCK(ln); /* Unlink any entry if exists */ ln_tmp = lla_lookup(LLTABLE6(ifp), LLE_EXCLUSIVE, dst); if (ln_tmp != NULL) lltable_unlink_entry(LLTABLE6(ifp), ln_tmp); lltable_link_entry(LLTABLE6(ifp), ln); IF_AFDATA_WUNLOCK(ifp); if (ln_tmp != NULL) EVENTHANDLER_INVOKE(lle_event, ln_tmp, LLENTRY_EXPIRED); EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED); LLE_WUNLOCK(ln); if (ln_tmp != NULL) llentry_free(ln_tmp); return (0); } /* * Removes either all lle entries for given @ia, or lle * corresponding to @ia address. */ void nd6_rem_ifa_lle(struct in6_ifaddr *ia, int all) { struct sockaddr_in6 mask, addr; struct sockaddr *saddr, *smask; struct ifnet *ifp; ifp = ia->ia_ifa.ifa_ifp; memcpy(&addr, &ia->ia_addr, sizeof(ia->ia_addr)); memcpy(&mask, &ia->ia_prefixmask, sizeof(ia->ia_prefixmask)); saddr = (struct sockaddr *)&addr; smask = (struct sockaddr *)&mask; if (all != 0) lltable_prefix_free(AF_INET6, saddr, smask, LLE_STATIC); else lltable_delete_addr(LLTABLE6(ifp), LLE_IFADDR, saddr); } static void clear_llinfo_pqueue(struct llentry *ln) { struct mbuf *m_hold, *m_hold_next; for (m_hold = ln->la_hold; m_hold; m_hold = m_hold_next) { m_hold_next = m_hold->m_nextpkt; m_freem(m_hold); } ln->la_hold = NULL; } static int nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS); static int nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS); #ifdef SYSCTL_DECL SYSCTL_DECL(_net_inet6_icmp6); #endif SYSCTL_NODE(_net_inet6_icmp6, ICMPV6CTL_ND6_DRLIST, nd6_drlist, CTLFLAG_RD, nd6_sysctl_drlist, ""); SYSCTL_NODE(_net_inet6_icmp6, ICMPV6CTL_ND6_PRLIST, nd6_prlist, CTLFLAG_RD, nd6_sysctl_prlist, ""); SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MAXQLEN, nd6_maxqueuelen, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_maxqueuelen), 1, ""); SYSCTL_INT(_net_inet6_icmp6, OID_AUTO, nd6_gctimer, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_gctimer), (60 * 60 * 24), ""); static int nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS) { struct in6_defrouter d; struct nd_defrouter *dr; int error; - if (req->newptr) + if (req->newptr != NULL) return (EPERM); + error = sysctl_wire_old_buffer(req, 0); + if (error != 0) + return (error); + bzero(&d, sizeof(d)); d.rtaddr.sin6_family = AF_INET6; d.rtaddr.sin6_len = sizeof(d.rtaddr); - /* - * XXX locking - */ + ND6_RLOCK(); TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) { d.rtaddr.sin6_addr = dr->rtaddr; error = sa6_recoverscope(&d.rtaddr); if (error != 0) - return (error); + break; d.flags = dr->raflags; d.rtlifetime = dr->rtlifetime; d.expire = dr->expire + (time_second - time_uptime); d.if_index = dr->ifp->if_index; error = SYSCTL_OUT(req, &d, sizeof(d)); if (error != 0) - return (error); + break; } - return (0); + ND6_RUNLOCK(); + return (error); } static int nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS) { struct in6_prefix p; struct sockaddr_in6 s6; struct nd_prefix *pr; struct nd_pfxrouter *pfr; time_t maxexpire; int error; char ip6buf[INET6_ADDRSTRLEN]; if (req->newptr) return (EPERM); bzero(&p, sizeof(p)); p.origin = PR_ORIG_RA; bzero(&s6, sizeof(s6)); s6.sin6_family = AF_INET6; s6.sin6_len = sizeof(s6); /* * XXX locking */ LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) { p.prefix = pr->ndpr_prefix; if (sa6_recoverscope(&p.prefix)) { log(LOG_ERR, "scope error in prefix list (%s)\n", ip6_sprintf(ip6buf, &p.prefix.sin6_addr)); /* XXX: press on... */ } p.raflags = pr->ndpr_raf; p.prefixlen = pr->ndpr_plen; p.vltime = pr->ndpr_vltime; p.pltime = pr->ndpr_pltime; p.if_index = pr->ndpr_ifp->if_index; if (pr->ndpr_vltime == ND6_INFINITE_LIFETIME) p.expire = 0; else { /* XXX: we assume time_t is signed. */ maxexpire = (-1) & ~((time_t)1 << ((sizeof(maxexpire) * 8) - 1)); if (pr->ndpr_vltime < maxexpire - pr->ndpr_lastupdate) p.expire = pr->ndpr_lastupdate + pr->ndpr_vltime + (time_second - time_uptime); else p.expire = maxexpire; } p.refcnt = pr->ndpr_refcnt; p.flags = pr->ndpr_stateflags; p.advrtrs = 0; LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) p.advrtrs++; error = SYSCTL_OUT(req, &p, sizeof(p)); if (error != 0) return (error); LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) { s6.sin6_addr = pfr->router->rtaddr; if (sa6_recoverscope(&s6)) log(LOG_ERR, "scope error in prefix list (%s)\n", ip6_sprintf(ip6buf, &pfr->router->rtaddr)); error = SYSCTL_OUT(req, &s6, sizeof(s6)); if (error != 0) return (error); } } return (0); } Index: head/sys/netinet6/nd6.h =================================================================== --- head/sys/netinet6/nd6.h (revision 296062) +++ head/sys/netinet6/nd6.h (revision 296063) @@ -1,459 +1,478 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: nd6.h,v 1.76 2001/12/18 02:10:31 itojun Exp $ * $FreeBSD$ */ #ifndef _NETINET6_ND6_H_ #define _NETINET6_ND6_H_ /* see net/route.h, or net/if_inarp.h */ #ifndef RTF_ANNOUNCE #define RTF_ANNOUNCE RTF_PROTO2 #endif #include #include struct llentry; #define ND6_LLINFO_NOSTATE -2 /* * We don't need the WAITDELETE state any more, but we keep the definition * in a comment line instead of removing it. This is necessary to avoid * unintentionally reusing the value for another purpose, which might * affect backward compatibility with old applications. * (20000711 jinmei@kame.net) */ /* #define ND6_LLINFO_WAITDELETE -1 */ #define ND6_LLINFO_INCOMPLETE 0 #define ND6_LLINFO_REACHABLE 1 #define ND6_LLINFO_STALE 2 #define ND6_LLINFO_DELAY 3 #define ND6_LLINFO_PROBE 4 #define ND6_IS_LLINFO_PROBREACH(n) ((n)->ln_state > ND6_LLINFO_INCOMPLETE) #define ND6_LLINFO_PERMANENT(n) (((n)->la_expire == 0) && ((n)->ln_state > ND6_LLINFO_INCOMPLETE)) struct nd_ifinfo { u_int32_t linkmtu; /* LinkMTU */ u_int32_t maxmtu; /* Upper bound of LinkMTU */ u_int32_t basereachable; /* BaseReachableTime */ u_int32_t reachable; /* Reachable Time */ u_int32_t retrans; /* Retrans Timer */ u_int32_t flags; /* Flags */ int recalctm; /* BaseReacable re-calculation timer */ u_int8_t chlim; /* CurHopLimit */ u_int8_t initialized; /* Flag to see the entry is initialized */ /* the following 3 members are for privacy extension for addrconf */ u_int8_t randomseed0[8]; /* upper 64 bits of MD5 digest */ u_int8_t randomseed1[8]; /* lower 64 bits (usually the EUI64 IFID) */ u_int8_t randomid[8]; /* current random ID */ }; #define ND6_IFF_PERFORMNUD 0x1 #define ND6_IFF_ACCEPT_RTADV 0x2 #define ND6_IFF_PREFER_SOURCE 0x4 /* Not used in FreeBSD. */ #define ND6_IFF_IFDISABLED 0x8 /* IPv6 operation is disabled due to * DAD failure. (XXX: not ND-specific) */ #define ND6_IFF_DONT_SET_IFROUTE 0x10 #define ND6_IFF_AUTO_LINKLOCAL 0x20 #define ND6_IFF_NO_RADR 0x40 #define ND6_IFF_NO_PREFER_IFACE 0x80 /* XXX: not related to ND. */ #define ND6_IFF_NO_DAD 0x100 #ifdef _KERNEL #define ND_IFINFO(ifp) \ (((struct in6_ifextra *)(ifp)->if_afdata[AF_INET6])->nd_ifinfo) #define IN6_LINKMTU(ifp) \ ((ND_IFINFO(ifp)->linkmtu && ND_IFINFO(ifp)->linkmtu < (ifp)->if_mtu) \ ? ND_IFINFO(ifp)->linkmtu \ : ((ND_IFINFO(ifp)->maxmtu && ND_IFINFO(ifp)->maxmtu < (ifp)->if_mtu) \ ? ND_IFINFO(ifp)->maxmtu : (ifp)->if_mtu)) #endif struct in6_nbrinfo { char ifname[IFNAMSIZ]; /* if name, e.g. "en0" */ struct in6_addr addr; /* IPv6 address of the neighbor */ long asked; /* number of queries already sent for this addr */ int isrouter; /* if it acts as a router */ int state; /* reachability state */ int expire; /* lifetime for NDP state transition */ }; #define DRLSTSIZ 10 #define PRLSTSIZ 10 struct in6_drlist { char ifname[IFNAMSIZ]; struct { struct in6_addr rtaddr; u_char flags; u_short rtlifetime; u_long expire; u_short if_index; } defrouter[DRLSTSIZ]; }; struct in6_defrouter { struct sockaddr_in6 rtaddr; u_char flags; u_short rtlifetime; u_long expire; u_short if_index; }; #ifdef _KERNEL struct in6_oprlist { char ifname[IFNAMSIZ]; struct { struct in6_addr prefix; struct prf_ra raflags; u_char prefixlen; u_char origin; u_long vltime; u_long pltime; u_long expire; u_short if_index; u_short advrtrs; /* number of advertisement routers */ struct in6_addr advrtr[DRLSTSIZ]; /* XXX: explicit limit */ } prefix[PRLSTSIZ]; }; #endif struct in6_prlist { char ifname[IFNAMSIZ]; struct { struct in6_addr prefix; struct prf_ra raflags; u_char prefixlen; u_char origin; u_int32_t vltime; u_int32_t pltime; time_t expire; u_short if_index; u_short advrtrs; /* number of advertisement routers */ struct in6_addr advrtr[DRLSTSIZ]; /* XXX: explicit limit */ } prefix[PRLSTSIZ]; }; struct in6_prefix { struct sockaddr_in6 prefix; struct prf_ra raflags; u_char prefixlen; u_char origin; u_int32_t vltime; u_int32_t pltime; time_t expire; u_int32_t flags; int refcnt; u_short if_index; u_short advrtrs; /* number of advertisement routers */ /* struct sockaddr_in6 advrtr[] */ }; #ifdef _KERNEL struct in6_ondireq { char ifname[IFNAMSIZ]; struct { u_int32_t linkmtu; /* LinkMTU */ u_int32_t maxmtu; /* Upper bound of LinkMTU */ u_int32_t basereachable; /* BaseReachableTime */ u_int32_t reachable; /* Reachable Time */ u_int32_t retrans; /* Retrans Timer */ u_int32_t flags; /* Flags */ int recalctm; /* BaseReacable re-calculation timer */ u_int8_t chlim; /* CurHopLimit */ u_int8_t receivedra; } ndi; }; #endif struct in6_ndireq { char ifname[IFNAMSIZ]; struct nd_ifinfo ndi; }; struct in6_ndifreq { char ifname[IFNAMSIZ]; u_long ifindex; }; /* Prefix status */ #define NDPRF_ONLINK 0x1 #define NDPRF_DETACHED 0x2 /* protocol constants */ #define MAX_RTR_SOLICITATION_DELAY 1 /* 1sec */ #define RTR_SOLICITATION_INTERVAL 4 /* 4sec */ #define MAX_RTR_SOLICITATIONS 3 #define ND6_INFINITE_LIFETIME 0xffffffff #ifdef _KERNEL /* node constants */ #define MAX_REACHABLE_TIME 3600000 /* msec */ #define REACHABLE_TIME 30000 /* msec */ #define RETRANS_TIMER 1000 /* msec */ #define MIN_RANDOM_FACTOR 512 /* 1024 * 0.5 */ #define MAX_RANDOM_FACTOR 1536 /* 1024 * 1.5 */ #define DEF_TEMP_VALID_LIFETIME 604800 /* 1 week */ #define DEF_TEMP_PREFERRED_LIFETIME 86400 /* 1 day */ #define TEMPADDR_REGEN_ADVANCE 5 /* sec */ #define MAX_TEMP_DESYNC_FACTOR 600 /* 10 min */ #define ND_COMPUTE_RTIME(x) \ (((MIN_RANDOM_FACTOR * (x >> 10)) + (arc4random() & \ ((MAX_RANDOM_FACTOR - MIN_RANDOM_FACTOR) * (x >> 10)))) /1000) TAILQ_HEAD(nd_drhead, nd_defrouter); struct nd_defrouter { TAILQ_ENTRY(nd_defrouter) dr_entry; struct in6_addr rtaddr; u_char raflags; /* flags on RA message */ u_short rtlifetime; u_long expire; struct ifnet *ifp; int installed; /* is installed into kernel routing table */ + u_int refcnt; }; struct nd_prefixctl { struct ifnet *ndpr_ifp; /* prefix */ struct sockaddr_in6 ndpr_prefix; u_char ndpr_plen; u_int32_t ndpr_vltime; /* advertised valid lifetime */ u_int32_t ndpr_pltime; /* advertised preferred lifetime */ struct prf_ra ndpr_flags; }; struct nd_prefix { struct ifnet *ndpr_ifp; LIST_ENTRY(nd_prefix) ndpr_entry; struct sockaddr_in6 ndpr_prefix; /* prefix */ struct in6_addr ndpr_mask; /* netmask derived from the prefix */ u_int32_t ndpr_vltime; /* advertised valid lifetime */ u_int32_t ndpr_pltime; /* advertised preferred lifetime */ time_t ndpr_expire; /* expiration time of the prefix */ time_t ndpr_preferred; /* preferred time of the prefix */ time_t ndpr_lastupdate; /* reception time of last advertisement */ struct prf_ra ndpr_flags; u_int32_t ndpr_stateflags; /* actual state flags */ /* list of routers that advertise the prefix: */ LIST_HEAD(pr_rtrhead, nd_pfxrouter) ndpr_advrtrs; u_char ndpr_plen; int ndpr_refcnt; /* reference couter from addresses */ }; #define ndpr_raf ndpr_flags #define ndpr_raf_onlink ndpr_flags.onlink #define ndpr_raf_auto ndpr_flags.autonomous #define ndpr_raf_router ndpr_flags.router /* * Message format for use in obtaining information about prefixes * from inet6 sysctl function */ struct inet6_ndpr_msghdr { u_short inpm_msglen; /* to skip over non-understood messages */ u_char inpm_version; /* future binary compatibility */ u_char inpm_type; /* message type */ struct in6_addr inpm_prefix; u_long prm_vltim; u_long prm_pltime; u_long prm_expire; u_long prm_preferred; struct in6_prflags prm_flags; u_short prm_index; /* index for associated ifp */ u_char prm_plen; /* length of prefix in bits */ }; #define prm_raf_onlink prm_flags.prf_ra.onlink #define prm_raf_auto prm_flags.prf_ra.autonomous #define prm_statef_onlink prm_flags.prf_state.onlink #define prm_rrf_decrvalid prm_flags.prf_rr.decrvalid #define prm_rrf_decrprefd prm_flags.prf_rr.decrprefd struct nd_pfxrouter { LIST_ENTRY(nd_pfxrouter) pfr_entry; struct nd_defrouter *router; }; LIST_HEAD(nd_prhead, nd_prefix); /* nd6.c */ VNET_DECLARE(int, nd6_prune); VNET_DECLARE(int, nd6_delay); VNET_DECLARE(int, nd6_umaxtries); VNET_DECLARE(int, nd6_mmaxtries); VNET_DECLARE(int, nd6_useloopback); VNET_DECLARE(int, nd6_maxnudhint); VNET_DECLARE(int, nd6_gctimer); VNET_DECLARE(struct nd_drhead, nd_defrouter); VNET_DECLARE(struct nd_prhead, nd_prefix); VNET_DECLARE(int, nd6_debug); VNET_DECLARE(int, nd6_onlink_ns_rfc4861); #define V_nd6_prune VNET(nd6_prune) #define V_nd6_delay VNET(nd6_delay) #define V_nd6_umaxtries VNET(nd6_umaxtries) #define V_nd6_mmaxtries VNET(nd6_mmaxtries) #define V_nd6_useloopback VNET(nd6_useloopback) #define V_nd6_maxnudhint VNET(nd6_maxnudhint) #define V_nd6_gctimer VNET(nd6_gctimer) #define V_nd_defrouter VNET(nd_defrouter) #define V_nd_prefix VNET(nd_prefix) #define V_nd6_debug VNET(nd6_debug) #define V_nd6_onlink_ns_rfc4861 VNET(nd6_onlink_ns_rfc4861) +/* Lock for the prefix and default router lists. */ +VNET_DECLARE(struct rwlock, nd6_lock); +#define V_nd6_lock VNET(nd6_lock) + +#define ND6_RLOCK() rw_rlock(&V_nd6_lock) +#define ND6_RUNLOCK() rw_runlock(&V_nd6_lock) +#define ND6_WLOCK() rw_wlock(&V_nd6_lock) +#define ND6_WUNLOCK() rw_wunlock(&V_nd6_lock) +#define ND6_WLOCK_ASSERT() rw_assert(&V_nd6_lock, RA_WLOCKED) +#define ND6_RLOCK_ASSERT() rw_assert(&V_nd6_lock, RA_RLOCKED) +#define ND6_LOCK_ASSERT() rw_assert(&V_nd6_lock, RA_LOCKED) +#define ND6_UNLOCK_ASSERT() rw_assert(&V_nd6_lock, RA_UNLOCKED) + #define nd6log(x) do { if (V_nd6_debug) log x; } while (/*CONSTCOND*/ 0) VNET_DECLARE(struct callout, nd6_timer_ch); #define V_nd6_timer_ch VNET(nd6_timer_ch) /* nd6_rtr.c */ VNET_DECLARE(int, nd6_defifindex); VNET_DECLARE(int, ip6_desync_factor); /* seconds */ VNET_DECLARE(u_int32_t, ip6_temp_preferred_lifetime); /* seconds */ VNET_DECLARE(u_int32_t, ip6_temp_valid_lifetime); /* seconds */ VNET_DECLARE(int, ip6_temp_regen_advance); /* seconds */ #define V_nd6_defifindex VNET(nd6_defifindex) #define V_ip6_desync_factor VNET(ip6_desync_factor) #define V_ip6_temp_preferred_lifetime VNET(ip6_temp_preferred_lifetime) #define V_ip6_temp_valid_lifetime VNET(ip6_temp_valid_lifetime) #define V_ip6_temp_regen_advance VNET(ip6_temp_regen_advance) union nd_opts { struct nd_opt_hdr *nd_opt_array[16]; /* max = ND_OPT_NONCE */ struct { struct nd_opt_hdr *zero; struct nd_opt_hdr *src_lladdr; struct nd_opt_hdr *tgt_lladdr; struct nd_opt_prefix_info *pi_beg; /* multiple opts, start */ struct nd_opt_rd_hdr *rh; struct nd_opt_mtu *mtu; struct nd_opt_hdr *__res6; struct nd_opt_hdr *__res7; struct nd_opt_hdr *__res8; struct nd_opt_hdr *__res9; struct nd_opt_hdr *__res10; struct nd_opt_hdr *__res11; struct nd_opt_hdr *__res12; struct nd_opt_hdr *__res13; struct nd_opt_nonce *nonce; struct nd_opt_hdr *__res15; struct nd_opt_hdr *search; /* multiple opts */ struct nd_opt_hdr *last; /* multiple opts */ int done; struct nd_opt_prefix_info *pi_end;/* multiple opts, end */ } nd_opt_each; }; #define nd_opts_src_lladdr nd_opt_each.src_lladdr #define nd_opts_tgt_lladdr nd_opt_each.tgt_lladdr #define nd_opts_pi nd_opt_each.pi_beg #define nd_opts_pi_end nd_opt_each.pi_end #define nd_opts_rh nd_opt_each.rh #define nd_opts_mtu nd_opt_each.mtu #define nd_opts_nonce nd_opt_each.nonce #define nd_opts_search nd_opt_each.search #define nd_opts_last nd_opt_each.last #define nd_opts_done nd_opt_each.done /* XXX: need nd6_var.h?? */ /* nd6.c */ void nd6_init(void); #ifdef VIMAGE void nd6_destroy(void); #endif struct nd_ifinfo *nd6_ifattach(struct ifnet *); void nd6_ifdetach(struct nd_ifinfo *); int nd6_is_addr_neighbor(const struct sockaddr_in6 *, struct ifnet *); void nd6_option_init(void *, int, union nd_opts *); struct nd_opt_hdr *nd6_option(union nd_opts *); int nd6_options(union nd_opts *); struct llentry *nd6_lookup(const struct in6_addr *, int, struct ifnet *); struct llentry *nd6_alloc(const struct in6_addr *, int, struct ifnet *); void nd6_setmtu(struct ifnet *); void nd6_llinfo_setstate(struct llentry *lle, int newstate); void nd6_timer(void *); void nd6_purge(struct ifnet *); int nd6_resolve_addr(struct ifnet *ifp, int flags, const struct sockaddr *dst, char *desten, uint32_t *pflags); int nd6_resolve(struct ifnet *, int, struct mbuf *, const struct sockaddr *, u_char *, uint32_t *); int nd6_ioctl(u_long, caddr_t, struct ifnet *); void nd6_cache_lladdr(struct ifnet *, struct in6_addr *, char *, int, int, int); void nd6_grab_holdchain(struct llentry *, struct mbuf **, struct sockaddr_in6 *); int nd6_flush_holdchain(struct ifnet *, struct ifnet *, struct mbuf *, struct sockaddr_in6 *); int nd6_add_ifa_lle(struct in6_ifaddr *); void nd6_rem_ifa_lle(struct in6_ifaddr *, int); int nd6_output_ifp(struct ifnet *, struct ifnet *, struct mbuf *, struct sockaddr_in6 *, struct route *); /* nd6_nbr.c */ void nd6_na_input(struct mbuf *, int, int); void nd6_na_output(struct ifnet *, const struct in6_addr *, const struct in6_addr *, u_long, int, struct sockaddr *); void nd6_ns_input(struct mbuf *, int, int); void nd6_ns_output(struct ifnet *, const struct in6_addr *, const struct in6_addr *, const struct in6_addr *, uint8_t *); caddr_t nd6_ifptomac(struct ifnet *); void nd6_dad_init(void); void nd6_dad_start(struct ifaddr *, int); void nd6_dad_stop(struct ifaddr *); /* nd6_rtr.c */ void nd6_rs_input(struct mbuf *, int, int); void nd6_ra_input(struct mbuf *, int, int); void defrouter_reset(void); void defrouter_select(void); -void defrtrlist_del(struct nd_defrouter *); +void defrouter_ref(struct nd_defrouter *); +void defrouter_rele(struct nd_defrouter *); +void defrouter_remove(struct nd_defrouter *); +void defrouter_unlink(struct nd_defrouter *, struct nd_drhead *); +void defrouter_del(struct nd_defrouter *); void prelist_remove(struct nd_prefix *); int nd6_prelist_add(struct nd_prefixctl *, struct nd_defrouter *, struct nd_prefix **); void pfxlist_onlink_check(void); struct nd_defrouter *defrouter_lookup(struct in6_addr *, struct ifnet *); +struct nd_defrouter *defrouter_lookup_locked(struct in6_addr *, struct ifnet *); struct nd_prefix *nd6_prefix_lookup(struct nd_prefixctl *); void rt6_flush(struct in6_addr *, struct ifnet *); int nd6_setdefaultiface(int); int in6_tmpifadd(const struct in6_ifaddr *, int, int); #endif /* _KERNEL */ #endif /* _NETINET6_ND6_H_ */ Index: head/sys/netinet6/nd6_nbr.c =================================================================== --- head/sys/netinet6/nd6_nbr.c (revision 296062) +++ head/sys/netinet6/nd6_nbr.c (revision 296063) @@ -1,1576 +1,1579 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: nd6_nbr.c,v 1.86 2002/01/21 02:33:04 jinmei Exp $ */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include "opt_mpath.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef RADIX_MPATH #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #define SDL(s) ((struct sockaddr_dl *)s) struct dadq; static struct dadq *nd6_dad_find(struct ifaddr *, struct nd_opt_nonce *); static void nd6_dad_add(struct dadq *dp); static void nd6_dad_del(struct dadq *dp); static void nd6_dad_rele(struct dadq *); static void nd6_dad_starttimer(struct dadq *, int, int); static void nd6_dad_stoptimer(struct dadq *); static void nd6_dad_timer(struct dadq *); static void nd6_dad_duplicated(struct ifaddr *, struct dadq *); static void nd6_dad_ns_output(struct dadq *); static void nd6_dad_ns_input(struct ifaddr *, struct nd_opt_nonce *); static void nd6_dad_na_input(struct ifaddr *); static void nd6_na_output_fib(struct ifnet *, const struct in6_addr *, const struct in6_addr *, u_long, int, struct sockaddr *, u_int); static void nd6_ns_output_fib(struct ifnet *, const struct in6_addr *, const struct in6_addr *, const struct in6_addr *, uint8_t *, u_int); static VNET_DEFINE(int, dad_enhanced) = 1; #define V_dad_enhanced VNET(dad_enhanced) SYSCTL_DECL(_net_inet6_ip6); SYSCTL_INT(_net_inet6_ip6, OID_AUTO, dad_enhanced, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dad_enhanced), 0, "Enable Enhanced DAD, which adds a random nonce to NS messages for DAD."); static VNET_DEFINE(int, dad_maxtry) = 15; /* max # of *tries* to transmit DAD packet */ #define V_dad_maxtry VNET(dad_maxtry) /* * Input a Neighbor Solicitation Message. * * Based on RFC 2461 * Based on RFC 2462 (duplicate address detection) */ void nd6_ns_input(struct mbuf *m, int off, int icmp6len) { struct ifnet *ifp = m->m_pkthdr.rcvif; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct nd_neighbor_solicit *nd_ns; struct in6_addr saddr6 = ip6->ip6_src; struct in6_addr daddr6 = ip6->ip6_dst; struct in6_addr taddr6; struct in6_addr myaddr6; char *lladdr = NULL; struct ifaddr *ifa = NULL; int lladdrlen = 0; int anycast = 0, proxy = 0, tentative = 0; int tlladdr; int rflag; union nd_opts ndopts; struct sockaddr_dl proxydl; char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; rflag = (V_ip6_forwarding) ? ND_NA_FLAG_ROUTER : 0; if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV && V_ip6_norbit_raif) rflag = 0; #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, icmp6len,); nd_ns = (struct nd_neighbor_solicit *)((caddr_t)ip6 + off); #else IP6_EXTHDR_GET(nd_ns, struct nd_neighbor_solicit *, m, off, icmp6len); if (nd_ns == NULL) { ICMP6STAT_INC(icp6s_tooshort); return; } #endif ip6 = mtod(m, struct ip6_hdr *); /* adjust pointer for safety */ taddr6 = nd_ns->nd_ns_target; if (in6_setscope(&taddr6, ifp, NULL) != 0) goto bad; if (ip6->ip6_hlim != 255) { nd6log((LOG_ERR, "nd6_ns_input: invalid hlim (%d) from %s to %s on %s\n", ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src), ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp))); goto bad; } if (IN6_IS_ADDR_UNSPECIFIED(&saddr6)) { /* dst has to be a solicited node multicast address. */ if (daddr6.s6_addr16[0] == IPV6_ADDR_INT16_MLL && /* don't check ifindex portion */ daddr6.s6_addr32[1] == 0 && daddr6.s6_addr32[2] == IPV6_ADDR_INT32_ONE && daddr6.s6_addr8[12] == 0xff) { ; /* good */ } else { nd6log((LOG_INFO, "nd6_ns_input: bad DAD packet " "(wrong ip6 dst)\n")); goto bad; } } else if (!V_nd6_onlink_ns_rfc4861) { struct sockaddr_in6 src_sa6; /* * According to recent IETF discussions, it is not a good idea * to accept a NS from an address which would not be deemed * to be a neighbor otherwise. This point is expected to be * clarified in future revisions of the specification. */ bzero(&src_sa6, sizeof(src_sa6)); src_sa6.sin6_family = AF_INET6; src_sa6.sin6_len = sizeof(src_sa6); src_sa6.sin6_addr = saddr6; if (nd6_is_addr_neighbor(&src_sa6, ifp) == 0) { nd6log((LOG_INFO, "nd6_ns_input: " "NS packet from non-neighbor\n")); goto bad; } } if (IN6_IS_ADDR_MULTICAST(&taddr6)) { nd6log((LOG_INFO, "nd6_ns_input: bad NS target (multicast)\n")); goto bad; } icmp6len -= sizeof(*nd_ns); nd6_option_init(nd_ns + 1, icmp6len, &ndopts); if (nd6_options(&ndopts) < 0) { nd6log((LOG_INFO, "nd6_ns_input: invalid ND option, ignored\n")); /* nd6_options have incremented stats */ goto freeit; } if (ndopts.nd_opts_src_lladdr) { lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1); lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3; } if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) && lladdr) { nd6log((LOG_INFO, "nd6_ns_input: bad DAD packet " "(link-layer address option)\n")); goto bad; } /* * Attaching target link-layer address to the NA? * (RFC 2461 7.2.4) * * NS IP dst is unicast/anycast MUST NOT add * NS IP dst is solicited-node multicast MUST add * * In implementation, we add target link-layer address by default. * We do not add one in MUST NOT cases. */ if (!IN6_IS_ADDR_MULTICAST(&daddr6)) tlladdr = 0; else tlladdr = 1; /* * Target address (taddr6) must be either: * (1) Valid unicast/anycast address for my receiving interface, * (2) Unicast address for which I'm offering proxy service, or * (3) "tentative" address on which DAD is being performed. */ /* (1) and (3) check. */ if (ifp->if_carp) ifa = (*carp_iamatch6_p)(ifp, &taddr6); else ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6); /* (2) check. */ if (ifa == NULL) { struct sockaddr_dl rt_gateway; struct rt_addrinfo info; struct sockaddr_in6 dst6; bzero(&dst6, sizeof(dst6)); dst6.sin6_len = sizeof(struct sockaddr_in6); dst6.sin6_family = AF_INET6; dst6.sin6_addr = taddr6; bzero(&rt_gateway, sizeof(rt_gateway)); rt_gateway.sdl_len = sizeof(rt_gateway); bzero(&info, sizeof(info)); info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&rt_gateway; /* Always use the default FIB. */ if (rib_lookup_info(RT_DEFAULT_FIB, (struct sockaddr *)&dst6, 0, 0, &info) == 0) { if ((info.rti_flags & RTF_ANNOUNCE) != 0 && rt_gateway.sdl_family == AF_LINK) { /* * proxy NDP for single entry */ proxydl = *SDL(&rt_gateway); ifa = (struct ifaddr *)in6ifa_ifpforlinklocal( ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST); if (ifa) proxy = 1; } } } if (ifa == NULL) { /* * We've got an NS packet, and we don't have that adddress * assigned for us. We MUST silently ignore it. * See RFC2461 7.2.3. */ goto freeit; } myaddr6 = *IFA_IN6(ifa); anycast = ((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_ANYCAST; tentative = ((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_TENTATIVE; if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DUPLICATED) goto freeit; if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) { nd6log((LOG_INFO, "nd6_ns_input: lladdrlen mismatch for %s " "(if %d, NS packet %d)\n", ip6_sprintf(ip6bufs, &taddr6), ifp->if_addrlen, lladdrlen - 2)); goto bad; } if (IN6_ARE_ADDR_EQUAL(&myaddr6, &saddr6)) { nd6log((LOG_INFO, "nd6_ns_input: duplicate IP6 address %s\n", ip6_sprintf(ip6bufs, &saddr6))); goto freeit; } /* * We have neighbor solicitation packet, with target address equals to * one of my tentative address. * * src addr how to process? * --- --- * multicast of course, invalid (rejected in ip6_input) * unicast somebody is doing address resolution -> ignore * unspec dup address detection * * The processing is defined in RFC 2462. */ if (tentative) { /* * If source address is unspecified address, it is for * duplicate address detection. * * If not, the packet is for addess resolution; * silently ignore it. */ if (IN6_IS_ADDR_UNSPECIFIED(&saddr6)) nd6_dad_ns_input(ifa, ndopts.nd_opts_nonce); goto freeit; } /* * If the source address is unspecified address, entries must not * be created or updated. * It looks that sender is performing DAD. Output NA toward * all-node multicast address, to tell the sender that I'm using * the address. * S bit ("solicited") must be zero. */ if (IN6_IS_ADDR_UNSPECIFIED(&saddr6)) { struct in6_addr in6_all; in6_all = in6addr_linklocal_allnodes; if (in6_setscope(&in6_all, ifp, NULL) != 0) goto bad; nd6_na_output_fib(ifp, &in6_all, &taddr6, ((anycast || proxy || !tlladdr) ? 0 : ND_NA_FLAG_OVERRIDE) | rflag, tlladdr, proxy ? (struct sockaddr *)&proxydl : NULL, M_GETFIB(m)); goto freeit; } nd6_cache_lladdr(ifp, &saddr6, lladdr, lladdrlen, ND_NEIGHBOR_SOLICIT, 0); nd6_na_output_fib(ifp, &saddr6, &taddr6, ((anycast || proxy || !tlladdr) ? 0 : ND_NA_FLAG_OVERRIDE) | rflag | ND_NA_FLAG_SOLICITED, tlladdr, proxy ? (struct sockaddr *)&proxydl : NULL, M_GETFIB(m)); freeit: if (ifa != NULL) ifa_free(ifa); m_freem(m); return; bad: nd6log((LOG_ERR, "nd6_ns_input: src=%s\n", ip6_sprintf(ip6bufs, &saddr6))); nd6log((LOG_ERR, "nd6_ns_input: dst=%s\n", ip6_sprintf(ip6bufs, &daddr6))); nd6log((LOG_ERR, "nd6_ns_input: tgt=%s\n", ip6_sprintf(ip6bufs, &taddr6))); ICMP6STAT_INC(icp6s_badns); if (ifa != NULL) ifa_free(ifa); m_freem(m); } /* * Output a Neighbor Solicitation Message. Caller specifies: * - ICMP6 header source IP6 address * - ND6 header target IP6 address * - ND6 header source datalink address * * Based on RFC 2461 * Based on RFC 2462 (duplicate address detection) * * ln - for source address determination * nonce - If non-NULL, NS is used for duplicate address detection and * the value (length is ND_OPT_NONCE_LEN) is used as a random nonce. */ static void nd6_ns_output_fib(struct ifnet *ifp, const struct in6_addr *saddr6, const struct in6_addr *daddr6, const struct in6_addr *taddr6, uint8_t *nonce, u_int fibnum) { struct mbuf *m; struct m_tag *mtag; struct ip6_hdr *ip6; struct nd_neighbor_solicit *nd_ns; struct ip6_moptions im6o; int icmp6len; int maxlen; caddr_t mac; if (IN6_IS_ADDR_MULTICAST(taddr6)) return; /* estimate the size of message */ maxlen = sizeof(*ip6) + sizeof(*nd_ns); maxlen += (sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7; KASSERT(max_linkhdr + maxlen <= MCLBYTES, ( "%s: max_linkhdr + maxlen > MCLBYTES (%d + %d > %d)", __func__, max_linkhdr, maxlen, MCLBYTES)); if (max_linkhdr + maxlen > MHLEN) m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); else m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) return; M_SETFIB(m, fibnum); if (daddr6 == NULL || IN6_IS_ADDR_MULTICAST(daddr6)) { m->m_flags |= M_MCAST; im6o.im6o_multicast_ifp = ifp; im6o.im6o_multicast_hlim = 255; im6o.im6o_multicast_loop = 0; } icmp6len = sizeof(*nd_ns); m->m_pkthdr.len = m->m_len = sizeof(*ip6) + icmp6len; m->m_data += max_linkhdr; /* or M_ALIGN() equivalent? */ /* fill neighbor solicitation packet */ ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_flow = 0; ip6->ip6_vfc &= ~IPV6_VERSION_MASK; ip6->ip6_vfc |= IPV6_VERSION; /* ip6->ip6_plen will be set later */ ip6->ip6_nxt = IPPROTO_ICMPV6; ip6->ip6_hlim = 255; if (daddr6) ip6->ip6_dst = *daddr6; else { ip6->ip6_dst.s6_addr16[0] = IPV6_ADDR_INT16_MLL; ip6->ip6_dst.s6_addr16[1] = 0; ip6->ip6_dst.s6_addr32[1] = 0; ip6->ip6_dst.s6_addr32[2] = IPV6_ADDR_INT32_ONE; ip6->ip6_dst.s6_addr32[3] = taddr6->s6_addr32[3]; ip6->ip6_dst.s6_addr8[12] = 0xff; if (in6_setscope(&ip6->ip6_dst, ifp, NULL) != 0) goto bad; } if (nonce == NULL) { struct ifaddr *ifa = NULL; /* * RFC2461 7.2.2: * "If the source address of the packet prompting the * solicitation is the same as one of the addresses assigned * to the outgoing interface, that address SHOULD be placed * in the IP Source Address of the outgoing solicitation. * Otherwise, any one of the addresses assigned to the * interface should be used." * * We use the source address for the prompting packet * (saddr6), if saddr6 belongs to the outgoing interface. * Otherwise, we perform the source address selection as usual. */ if (saddr6 != NULL) ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, saddr6); if (ifa != NULL) { /* ip6_src set already. */ ip6->ip6_src = *saddr6; ifa_free(ifa); } else { int error; struct in6_addr dst6, src6; uint32_t scopeid; in6_splitscope(&ip6->ip6_dst, &dst6, &scopeid); error = in6_selectsrc_addr(RT_DEFAULT_FIB, &dst6, scopeid, ifp, &src6, NULL); if (error) { char ip6buf[INET6_ADDRSTRLEN]; nd6log((LOG_DEBUG, "%s: source can't be " "determined: dst=%s, error=%d\n", __func__, ip6_sprintf(ip6buf, &dst6), error)); goto bad; } ip6->ip6_src = src6; } } else { /* * Source address for DAD packet must always be IPv6 * unspecified address. (0::0) * We actually don't have to 0-clear the address (we did it * above), but we do so here explicitly to make the intention * clearer. */ bzero(&ip6->ip6_src, sizeof(ip6->ip6_src)); } nd_ns = (struct nd_neighbor_solicit *)(ip6 + 1); nd_ns->nd_ns_type = ND_NEIGHBOR_SOLICIT; nd_ns->nd_ns_code = 0; nd_ns->nd_ns_reserved = 0; nd_ns->nd_ns_target = *taddr6; in6_clearscope(&nd_ns->nd_ns_target); /* XXX */ /* * Add source link-layer address option. * * spec implementation * --- --- * DAD packet MUST NOT do not add the option * there's no link layer address: * impossible do not add the option * there's link layer address: * Multicast NS MUST add one add the option * Unicast NS SHOULD add one add the option */ if (nonce == NULL && (mac = nd6_ifptomac(ifp))) { int optlen = sizeof(struct nd_opt_hdr) + ifp->if_addrlen; struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)(nd_ns + 1); /* 8 byte alignments... */ optlen = (optlen + 7) & ~7; m->m_pkthdr.len += optlen; m->m_len += optlen; icmp6len += optlen; bzero((caddr_t)nd_opt, optlen); nd_opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR; nd_opt->nd_opt_len = optlen >> 3; bcopy(mac, (caddr_t)(nd_opt + 1), ifp->if_addrlen); } /* * Add a Nonce option (RFC 3971) to detect looped back NS messages. * This behavior is documented as Enhanced Duplicate Address * Detection in RFC 7527. * net.inet6.ip6.dad_enhanced=0 disables this. */ if (V_dad_enhanced != 0 && nonce != NULL) { int optlen = sizeof(struct nd_opt_hdr) + ND_OPT_NONCE_LEN; struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)(nd_ns + 1); /* 8-byte alignment is required. */ optlen = (optlen + 7) & ~7; m->m_pkthdr.len += optlen; m->m_len += optlen; icmp6len += optlen; bzero((caddr_t)nd_opt, optlen); nd_opt->nd_opt_type = ND_OPT_NONCE; nd_opt->nd_opt_len = optlen >> 3; bcopy(nonce, (caddr_t)(nd_opt + 1), ND_OPT_NONCE_LEN); } ip6->ip6_plen = htons((u_short)icmp6len); nd_ns->nd_ns_cksum = 0; nd_ns->nd_ns_cksum = in6_cksum(m, IPPROTO_ICMPV6, sizeof(*ip6), icmp6len); if (send_sendso_input_hook != NULL) { mtag = m_tag_get(PACKET_TAG_ND_OUTGOING, sizeof(unsigned short), M_NOWAIT); if (mtag == NULL) goto bad; *(unsigned short *)(mtag + 1) = nd_ns->nd_ns_type; m_tag_prepend(m, mtag); } ip6_output(m, NULL, NULL, (nonce != NULL) ? IPV6_UNSPECSRC : 0, &im6o, NULL, NULL); icmp6_ifstat_inc(ifp, ifs6_out_msg); icmp6_ifstat_inc(ifp, ifs6_out_neighborsolicit); ICMP6STAT_INC(icp6s_outhist[ND_NEIGHBOR_SOLICIT]); return; bad: m_freem(m); } #ifndef BURN_BRIDGES void nd6_ns_output(struct ifnet *ifp, const struct in6_addr *saddr6, const struct in6_addr *daddr6, const struct in6_addr *taddr6,uint8_t *nonce) { nd6_ns_output_fib(ifp, saddr6, daddr6, taddr6, nonce, RT_DEFAULT_FIB); } #endif /* * Neighbor advertisement input handling. * * Based on RFC 2461 * Based on RFC 2462 (duplicate address detection) * * the following items are not implemented yet: * - proxy advertisement delay rule (RFC2461 7.2.8, last paragraph, SHOULD) * - anycast advertisement delay rule (RFC2461 7.2.7, SHOULD) */ void nd6_na_input(struct mbuf *m, int off, int icmp6len) { struct ifnet *ifp = m->m_pkthdr.rcvif; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct nd_neighbor_advert *nd_na; struct in6_addr daddr6 = ip6->ip6_dst; struct in6_addr taddr6; int flags; int is_router; int is_solicited; int is_override; char *lladdr = NULL; int lladdrlen = 0; int checklink = 0; struct ifaddr *ifa; struct llentry *ln = NULL; union nd_opts ndopts; struct mbuf *chain = NULL; struct sockaddr_in6 sin6; u_char linkhdr[LLE_MAX_LINKHDR]; size_t linkhdrsize; int lladdr_off; char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; if (ip6->ip6_hlim != 255) { nd6log((LOG_ERR, "nd6_na_input: invalid hlim (%d) from %s to %s on %s\n", ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src), ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp))); goto bad; } #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, icmp6len,); nd_na = (struct nd_neighbor_advert *)((caddr_t)ip6 + off); #else IP6_EXTHDR_GET(nd_na, struct nd_neighbor_advert *, m, off, icmp6len); if (nd_na == NULL) { ICMP6STAT_INC(icp6s_tooshort); return; } #endif flags = nd_na->nd_na_flags_reserved; is_router = ((flags & ND_NA_FLAG_ROUTER) != 0); is_solicited = ((flags & ND_NA_FLAG_SOLICITED) != 0); is_override = ((flags & ND_NA_FLAG_OVERRIDE) != 0); memset(&sin6, 0, sizeof(sin6)); taddr6 = nd_na->nd_na_target; if (in6_setscope(&taddr6, ifp, NULL)) goto bad; /* XXX: impossible */ if (IN6_IS_ADDR_MULTICAST(&taddr6)) { nd6log((LOG_ERR, "nd6_na_input: invalid target address %s\n", ip6_sprintf(ip6bufs, &taddr6))); goto bad; } if (IN6_IS_ADDR_MULTICAST(&daddr6)) if (is_solicited) { nd6log((LOG_ERR, "nd6_na_input: a solicited adv is multicasted\n")); goto bad; } icmp6len -= sizeof(*nd_na); nd6_option_init(nd_na + 1, icmp6len, &ndopts); if (nd6_options(&ndopts) < 0) { nd6log((LOG_INFO, "nd6_na_input: invalid ND option, ignored\n")); /* nd6_options have incremented stats */ goto freeit; } if (ndopts.nd_opts_tgt_lladdr) { lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1); lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3; } /* * This effectively disables the DAD check on a non-master CARP * address. */ if (ifp->if_carp) ifa = (*carp_iamatch6_p)(ifp, &taddr6); else ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6); /* * Target address matches one of my interface address. * * If my address is tentative, this means that there's somebody * already using the same address as mine. This indicates DAD failure. * This is defined in RFC 2462. * * Otherwise, process as defined in RFC 2461. */ if (ifa && (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_TENTATIVE)) { nd6_dad_na_input(ifa); ifa_free(ifa); goto freeit; } /* Just for safety, maybe unnecessary. */ if (ifa) { ifa_free(ifa); log(LOG_ERR, "nd6_na_input: duplicate IP6 address %s\n", ip6_sprintf(ip6bufs, &taddr6)); goto freeit; } if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) { nd6log((LOG_INFO, "nd6_na_input: lladdrlen mismatch for %s " "(if %d, NA packet %d)\n", ip6_sprintf(ip6bufs, &taddr6), ifp->if_addrlen, lladdrlen - 2)); goto bad; } /* * If no neighbor cache entry is found, NA SHOULD silently be * discarded. */ IF_AFDATA_RLOCK(ifp); ln = nd6_lookup(&taddr6, LLE_EXCLUSIVE, ifp); IF_AFDATA_RUNLOCK(ifp); if (ln == NULL) { goto freeit; } if (ln->ln_state == ND6_LLINFO_INCOMPLETE) { /* * If the link-layer has address, and no lladdr option came, * discard the packet. */ if (ifp->if_addrlen && lladdr == NULL) { goto freeit; } /* * Record link-layer address, and update the state. */ linkhdrsize = sizeof(linkhdr); if (lltable_calc_llheader(ifp, AF_INET6, lladdr, linkhdr, &linkhdrsize, &lladdr_off) != 0) return; if (lltable_try_set_entry_addr(ifp, ln, linkhdr, linkhdrsize, lladdr_off) == 0) { ln = NULL; goto freeit; } EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED); if (is_solicited) nd6_llinfo_setstate(ln, ND6_LLINFO_REACHABLE); else nd6_llinfo_setstate(ln, ND6_LLINFO_STALE); if ((ln->ln_router = is_router) != 0) { /* * This means a router's state has changed from * non-reachable to probably reachable, and might * affect the status of associated prefixes.. */ checklink = 1; } } else { int llchange; /* * Check if the link-layer address has changed or not. */ if (lladdr == NULL) llchange = 0; else { if (ln->la_flags & LLE_VALID) { if (bcmp(lladdr, ln->ll_addr, ifp->if_addrlen)) llchange = 1; else llchange = 0; } else llchange = 1; } /* * This is VERY complex. Look at it with care. * * override solicit lladdr llchange action * (L: record lladdr) * * 0 0 n -- (2c) * 0 0 y n (2b) L * 0 0 y y (1) REACHABLE->STALE * 0 1 n -- (2c) *->REACHABLE * 0 1 y n (2b) L *->REACHABLE * 0 1 y y (1) REACHABLE->STALE * 1 0 n -- (2a) * 1 0 y n (2a) L * 1 0 y y (2a) L *->STALE * 1 1 n -- (2a) *->REACHABLE * 1 1 y n (2a) L *->REACHABLE * 1 1 y y (2a) L *->REACHABLE */ if (!is_override && (lladdr != NULL && llchange)) { /* (1) */ /* * If state is REACHABLE, make it STALE. * no other updates should be done. */ if (ln->ln_state == ND6_LLINFO_REACHABLE) nd6_llinfo_setstate(ln, ND6_LLINFO_STALE); goto freeit; } else if (is_override /* (2a) */ || (!is_override && (lladdr != NULL && !llchange)) /* (2b) */ || lladdr == NULL) { /* (2c) */ /* * Update link-local address, if any. */ if (lladdr != NULL) { linkhdrsize = sizeof(linkhdr); if (lltable_calc_llheader(ifp, AF_INET6, lladdr, linkhdr, &linkhdrsize, &lladdr_off) != 0) goto freeit; if (lltable_try_set_entry_addr(ifp, ln, linkhdr, linkhdrsize, lladdr_off) == 0) { ln = NULL; goto freeit; } EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED); } /* * If solicited, make the state REACHABLE. * If not solicited and the link-layer address was * changed, make it STALE. */ if (is_solicited) nd6_llinfo_setstate(ln, ND6_LLINFO_REACHABLE); else { if (lladdr != NULL && llchange) nd6_llinfo_setstate(ln, ND6_LLINFO_STALE); } } if (ln->ln_router && !is_router) { /* * The peer dropped the router flag. * Remove the sender from the Default Router List and * update the Destination Cache entries. */ struct nd_defrouter *dr; - struct in6_addr *in6; struct ifnet *nd6_ifp; - in6 = &ln->r_l3addr.addr6; - nd6_ifp = lltable_get_ifp(ln->lle_tbl); - dr = defrouter_lookup(in6, nd6_ifp); - if (dr) - defrtrlist_del(dr); - else if (ND_IFINFO(nd6_ifp)->flags & - ND6_IFF_ACCEPT_RTADV) { - /* - * Even if the neighbor is not in the default - * router list, the neighbor may be used - * as a next hop for some destinations - * (e.g. redirect case). So we must - * call rt6_flush explicitly. - */ - rt6_flush(&ip6->ip6_src, ifp); + ND6_WLOCK(); + dr = defrouter_lookup_locked(&ln->r_l3addr.addr6, + nd6_ifp); + if (dr != NULL) { + /* releases the ND lock */ + defrouter_remove(dr); + dr = NULL; + } else { + ND6_WUNLOCK(); + if ((ND_IFINFO(nd6_ifp)->flags & ND6_IFF_ACCEPT_RTADV) != 0) { + /* + * Even if the neighbor is not in the default + * router list, the neighbor may be used + * as a next hop for some destinations + * (e.g. redirect case). So we must + * call rt6_flush explicitly. + */ + rt6_flush(&ip6->ip6_src, ifp); + } } } ln->ln_router = is_router; } /* XXX - QL * Does this matter? * rt->rt_flags &= ~RTF_REJECT; */ ln->la_asked = 0; if (ln->la_hold != NULL) nd6_grab_holdchain(ln, &chain, &sin6); freeit: if (ln != NULL) LLE_WUNLOCK(ln); if (chain != NULL) nd6_flush_holdchain(ifp, ifp, chain, &sin6); if (checklink) pfxlist_onlink_check(); m_freem(m); return; bad: if (ln != NULL) LLE_WUNLOCK(ln); ICMP6STAT_INC(icp6s_badna); m_freem(m); } /* * Neighbor advertisement output handling. * * Based on RFC 2461 * * the following items are not implemented yet: * - proxy advertisement delay rule (RFC2461 7.2.8, last paragraph, SHOULD) * - anycast advertisement delay rule (RFC2461 7.2.7, SHOULD) * * tlladdr - 1 if include target link-layer address * sdl0 - sockaddr_dl (= proxy NA) or NULL */ static void nd6_na_output_fib(struct ifnet *ifp, const struct in6_addr *daddr6_0, const struct in6_addr *taddr6, u_long flags, int tlladdr, struct sockaddr *sdl0, u_int fibnum) { struct mbuf *m; struct m_tag *mtag; struct ip6_hdr *ip6; struct nd_neighbor_advert *nd_na; struct ip6_moptions im6o; struct in6_addr daddr6, dst6, src6; uint32_t scopeid; int icmp6len, maxlen, error; caddr_t mac = NULL; daddr6 = *daddr6_0; /* make a local copy for modification */ /* estimate the size of message */ maxlen = sizeof(*ip6) + sizeof(*nd_na); maxlen += (sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7; KASSERT(max_linkhdr + maxlen <= MCLBYTES, ( "%s: max_linkhdr + maxlen > MCLBYTES (%d + %d > %d)", __func__, max_linkhdr, maxlen, MCLBYTES)); if (max_linkhdr + maxlen > MHLEN) m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); else m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) return; M_SETFIB(m, fibnum); if (IN6_IS_ADDR_MULTICAST(&daddr6)) { m->m_flags |= M_MCAST; im6o.im6o_multicast_ifp = ifp; im6o.im6o_multicast_hlim = 255; im6o.im6o_multicast_loop = 0; } icmp6len = sizeof(*nd_na); m->m_pkthdr.len = m->m_len = sizeof(struct ip6_hdr) + icmp6len; m->m_data += max_linkhdr; /* or M_ALIGN() equivalent? */ /* fill neighbor advertisement packet */ ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_flow = 0; ip6->ip6_vfc &= ~IPV6_VERSION_MASK; ip6->ip6_vfc |= IPV6_VERSION; ip6->ip6_nxt = IPPROTO_ICMPV6; ip6->ip6_hlim = 255; if (IN6_IS_ADDR_UNSPECIFIED(&daddr6)) { /* reply to DAD */ daddr6.s6_addr16[0] = IPV6_ADDR_INT16_MLL; daddr6.s6_addr16[1] = 0; daddr6.s6_addr32[1] = 0; daddr6.s6_addr32[2] = 0; daddr6.s6_addr32[3] = IPV6_ADDR_INT32_ONE; if (in6_setscope(&daddr6, ifp, NULL)) goto bad; flags &= ~ND_NA_FLAG_SOLICITED; } ip6->ip6_dst = daddr6; /* * Select a source whose scope is the same as that of the dest. */ in6_splitscope(&daddr6, &dst6, &scopeid); error = in6_selectsrc_addr(RT_DEFAULT_FIB, &dst6, scopeid, ifp, &src6, NULL); if (error) { char ip6buf[INET6_ADDRSTRLEN]; nd6log((LOG_DEBUG, "nd6_na_output: source can't be " "determined: dst=%s, error=%d\n", ip6_sprintf(ip6buf, &daddr6), error)); goto bad; } ip6->ip6_src = src6; nd_na = (struct nd_neighbor_advert *)(ip6 + 1); nd_na->nd_na_type = ND_NEIGHBOR_ADVERT; nd_na->nd_na_code = 0; nd_na->nd_na_target = *taddr6; in6_clearscope(&nd_na->nd_na_target); /* XXX */ /* * "tlladdr" indicates NS's condition for adding tlladdr or not. * see nd6_ns_input() for details. * Basically, if NS packet is sent to unicast/anycast addr, * target lladdr option SHOULD NOT be included. */ if (tlladdr) { /* * sdl0 != NULL indicates proxy NA. If we do proxy, use * lladdr in sdl0. If we are not proxying (sending NA for * my address) use lladdr configured for the interface. */ if (sdl0 == NULL) { if (ifp->if_carp) mac = (*carp_macmatch6_p)(ifp, m, taddr6); if (mac == NULL) mac = nd6_ifptomac(ifp); } else if (sdl0->sa_family == AF_LINK) { struct sockaddr_dl *sdl; sdl = (struct sockaddr_dl *)sdl0; if (sdl->sdl_alen == ifp->if_addrlen) mac = LLADDR(sdl); } } if (tlladdr && mac) { int optlen = sizeof(struct nd_opt_hdr) + ifp->if_addrlen; struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)(nd_na + 1); /* roundup to 8 bytes alignment! */ optlen = (optlen + 7) & ~7; m->m_pkthdr.len += optlen; m->m_len += optlen; icmp6len += optlen; bzero((caddr_t)nd_opt, optlen); nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; nd_opt->nd_opt_len = optlen >> 3; bcopy(mac, (caddr_t)(nd_opt + 1), ifp->if_addrlen); } else flags &= ~ND_NA_FLAG_OVERRIDE; ip6->ip6_plen = htons((u_short)icmp6len); nd_na->nd_na_flags_reserved = flags; nd_na->nd_na_cksum = 0; nd_na->nd_na_cksum = in6_cksum(m, IPPROTO_ICMPV6, sizeof(struct ip6_hdr), icmp6len); if (send_sendso_input_hook != NULL) { mtag = m_tag_get(PACKET_TAG_ND_OUTGOING, sizeof(unsigned short), M_NOWAIT); if (mtag == NULL) goto bad; *(unsigned short *)(mtag + 1) = nd_na->nd_na_type; m_tag_prepend(m, mtag); } ip6_output(m, NULL, NULL, 0, &im6o, NULL, NULL); icmp6_ifstat_inc(ifp, ifs6_out_msg); icmp6_ifstat_inc(ifp, ifs6_out_neighboradvert); ICMP6STAT_INC(icp6s_outhist[ND_NEIGHBOR_ADVERT]); return; bad: m_freem(m); } #ifndef BURN_BRIDGES void nd6_na_output(struct ifnet *ifp, const struct in6_addr *daddr6_0, const struct in6_addr *taddr6, u_long flags, int tlladdr, struct sockaddr *sdl0) { nd6_na_output_fib(ifp, daddr6_0, taddr6, flags, tlladdr, sdl0, RT_DEFAULT_FIB); } #endif caddr_t nd6_ifptomac(struct ifnet *ifp) { switch (ifp->if_type) { case IFT_ARCNET: case IFT_ETHER: case IFT_FDDI: case IFT_IEEE1394: case IFT_L2VLAN: case IFT_IEEE80211: case IFT_INFINIBAND: case IFT_BRIDGE: case IFT_ISO88025: return IF_LLADDR(ifp); default: return NULL; } } struct dadq { TAILQ_ENTRY(dadq) dad_list; struct ifaddr *dad_ifa; int dad_count; /* max NS to send */ int dad_ns_tcount; /* # of trials to send NS */ int dad_ns_ocount; /* NS sent so far */ int dad_ns_icount; int dad_na_icount; int dad_ns_lcount; /* looped back NS */ int dad_loopbackprobe; /* probing state for loopback detection */ struct callout dad_timer_ch; struct vnet *dad_vnet; u_int dad_refcnt; #define ND_OPT_NONCE_LEN32 \ ((ND_OPT_NONCE_LEN + sizeof(uint32_t) - 1)/sizeof(uint32_t)) uint32_t dad_nonce[ND_OPT_NONCE_LEN32]; }; static VNET_DEFINE(TAILQ_HEAD(, dadq), dadq); static VNET_DEFINE(struct rwlock, dad_rwlock); #define V_dadq VNET(dadq) #define V_dad_rwlock VNET(dad_rwlock) #define DADQ_RLOCK() rw_rlock(&V_dad_rwlock) #define DADQ_RUNLOCK() rw_runlock(&V_dad_rwlock) #define DADQ_WLOCK() rw_wlock(&V_dad_rwlock) #define DADQ_WUNLOCK() rw_wunlock(&V_dad_rwlock) static void nd6_dad_add(struct dadq *dp) { DADQ_WLOCK(); TAILQ_INSERT_TAIL(&V_dadq, dp, dad_list); DADQ_WUNLOCK(); } static void nd6_dad_del(struct dadq *dp) { DADQ_WLOCK(); TAILQ_REMOVE(&V_dadq, dp, dad_list); DADQ_WUNLOCK(); nd6_dad_rele(dp); } static struct dadq * nd6_dad_find(struct ifaddr *ifa, struct nd_opt_nonce *n) { struct dadq *dp; DADQ_RLOCK(); TAILQ_FOREACH(dp, &V_dadq, dad_list) { if (dp->dad_ifa != ifa) continue; /* * Skip if the nonce matches the received one. * +2 in the length is required because of type and * length fields are included in a header. */ if (n != NULL && n->nd_opt_nonce_len == (ND_OPT_NONCE_LEN + 2) / 8 && memcmp(&n->nd_opt_nonce[0], &dp->dad_nonce[0], ND_OPT_NONCE_LEN) == 0) { dp->dad_ns_lcount++; continue; } refcount_acquire(&dp->dad_refcnt); break; } DADQ_RUNLOCK(); return (dp); } static void nd6_dad_starttimer(struct dadq *dp, int ticks, int send_ns) { if (send_ns != 0) nd6_dad_ns_output(dp); callout_reset(&dp->dad_timer_ch, ticks, (void (*)(void *))nd6_dad_timer, (void *)dp); } static void nd6_dad_stoptimer(struct dadq *dp) { callout_drain(&dp->dad_timer_ch); } static void nd6_dad_rele(struct dadq *dp) { if (refcount_release(&dp->dad_refcnt)) { ifa_free(dp->dad_ifa); free(dp, M_IP6NDP); } } void nd6_dad_init(void) { rw_init(&V_dad_rwlock, "nd6 DAD queue"); TAILQ_INIT(&V_dadq); } /* * Start Duplicate Address Detection (DAD) for specified interface address. */ void nd6_dad_start(struct ifaddr *ifa, int delay) { struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa; struct dadq *dp; char ip6buf[INET6_ADDRSTRLEN]; int send_ns; /* * If we don't need DAD, don't do it. * There are several cases: * - DAD is disabled (ip6_dad_count == 0) * - the interface address is anycast */ if (!(ia->ia6_flags & IN6_IFF_TENTATIVE)) { log(LOG_DEBUG, "nd6_dad_start: called with non-tentative address " "%s(%s)\n", ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr), ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???"); return; } if (ia->ia6_flags & IN6_IFF_ANYCAST) { ia->ia6_flags &= ~IN6_IFF_TENTATIVE; return; } if (!V_ip6_dad_count) { ia->ia6_flags &= ~IN6_IFF_TENTATIVE; return; } if (ifa->ifa_ifp == NULL) panic("nd6_dad_start: ifa->ifa_ifp == NULL"); if (ND_IFINFO(ifa->ifa_ifp)->flags & ND6_IFF_NO_DAD) { ia->ia6_flags &= ~IN6_IFF_TENTATIVE; return; } if (!(ifa->ifa_ifp->if_flags & IFF_UP) || !(ifa->ifa_ifp->if_drv_flags & IFF_DRV_RUNNING) || (ND_IFINFO(ifa->ifa_ifp)->flags & ND6_IFF_IFDISABLED)) { ia->ia6_flags |= IN6_IFF_TENTATIVE; return; } if ((dp = nd6_dad_find(ifa, NULL)) != NULL) { /* * DAD is already in progress. Let the existing entry * finish it. */ nd6_dad_rele(dp); return; } dp = malloc(sizeof(*dp), M_IP6NDP, M_NOWAIT | M_ZERO); if (dp == NULL) { log(LOG_ERR, "nd6_dad_start: memory allocation failed for " "%s(%s)\n", ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr), ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???"); return; } callout_init(&dp->dad_timer_ch, 0); #ifdef VIMAGE dp->dad_vnet = curvnet; #endif nd6log((LOG_DEBUG, "%s: starting DAD for %s\n", if_name(ifa->ifa_ifp), ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr))); /* * Send NS packet for DAD, ip6_dad_count times. * Note that we must delay the first transmission, if this is the * first packet to be sent from the interface after interface * (re)initialization. */ dp->dad_ifa = ifa; ifa_ref(dp->dad_ifa); dp->dad_count = V_ip6_dad_count; dp->dad_ns_icount = dp->dad_na_icount = 0; dp->dad_ns_ocount = dp->dad_ns_tcount = 0; dp->dad_ns_lcount = dp->dad_loopbackprobe = 0; refcount_init(&dp->dad_refcnt, 1); nd6_dad_add(dp); send_ns = 0; if (delay == 0) { send_ns = 1; delay = (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000; } nd6_dad_starttimer(dp, delay, send_ns); } /* * terminate DAD unconditionally. used for address removals. */ void nd6_dad_stop(struct ifaddr *ifa) { struct dadq *dp; dp = nd6_dad_find(ifa, NULL); if (!dp) { /* DAD wasn't started yet */ return; } nd6_dad_stoptimer(dp); /* * The DAD queue entry may have been removed by nd6_dad_timer() while * we were waiting for it to stop, so re-do the lookup. */ nd6_dad_rele(dp); if (nd6_dad_find(ifa, NULL) == NULL) return; nd6_dad_del(dp); nd6_dad_rele(dp); } static void nd6_dad_timer(struct dadq *dp) { CURVNET_SET(dp->dad_vnet); struct ifaddr *ifa = dp->dad_ifa; struct ifnet *ifp = dp->dad_ifa->ifa_ifp; struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa; char ip6buf[INET6_ADDRSTRLEN]; /* Sanity check */ if (ia == NULL) { log(LOG_ERR, "nd6_dad_timer: called with null parameter\n"); goto err; } if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) { /* Do not need DAD for ifdisabled interface. */ log(LOG_ERR, "nd6_dad_timer: cancel DAD on %s because of " "ND6_IFF_IFDISABLED.\n", ifp->if_xname); goto err; } if (ia->ia6_flags & IN6_IFF_DUPLICATED) { log(LOG_ERR, "nd6_dad_timer: called with duplicated address " "%s(%s)\n", ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr), ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???"); goto err; } if ((ia->ia6_flags & IN6_IFF_TENTATIVE) == 0) { log(LOG_ERR, "nd6_dad_timer: called with non-tentative address " "%s(%s)\n", ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr), ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???"); goto err; } /* Stop DAD if the interface is down even after dad_maxtry attempts. */ if ((dp->dad_ns_tcount > V_dad_maxtry) && (((ifp->if_flags & IFF_UP) == 0) || ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0))) { nd6log((LOG_INFO, "%s: could not run DAD " "because the interface was down or not running.\n", if_name(ifa->ifa_ifp))); goto err; } /* Need more checks? */ if (dp->dad_ns_ocount < dp->dad_count) { /* * We have more NS to go. Send NS packet for DAD. */ nd6_dad_starttimer(dp, (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000, 1); goto done; } else { /* * We have transmitted sufficient number of DAD packets. * See what we've got. */ if (dp->dad_ns_icount > 0 || dp->dad_na_icount > 0) /* We've seen NS or NA, means DAD has failed. */ nd6_dad_duplicated(ifa, dp); else if (V_dad_enhanced != 0 && dp->dad_ns_lcount > 0 && dp->dad_ns_lcount > dp->dad_loopbackprobe) { /* * Sec. 4.1 in RFC 7527 requires transmission of * additional probes until the loopback condition * becomes clear when a looped back probe is detected. */ log(LOG_ERR, "%s: a looped back NS message is " "detected during DAD for %s. " "Another DAD probes are being sent.\n", if_name(ifa->ifa_ifp), ip6_sprintf(ip6buf, IFA_IN6(ifa))); dp->dad_loopbackprobe = dp->dad_ns_lcount; /* * Send an NS immediately and increase dad_count by * V_nd6_mmaxtries - 1. */ dp->dad_count = dp->dad_ns_ocount + V_nd6_mmaxtries - 1; nd6_dad_starttimer(dp, (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000, 1); goto done; } else { /* * We are done with DAD. No NA came, no NS came. * No duplicate address found. Check IFDISABLED flag * again in case that it is changed between the * beginning of this function and here. */ if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) == 0) ia->ia6_flags &= ~IN6_IFF_TENTATIVE; nd6log((LOG_DEBUG, "%s: DAD complete for %s - no duplicates found\n", if_name(ifa->ifa_ifp), ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr))); if (dp->dad_ns_lcount > 0) log(LOG_ERR, "%s: DAD completed while " "a looped back NS message is detected " "during DAD for %s.\n", if_name(ifa->ifa_ifp), ip6_sprintf(ip6buf, IFA_IN6(ifa))); } } err: nd6_dad_del(dp); done: CURVNET_RESTORE(); } static void nd6_dad_duplicated(struct ifaddr *ifa, struct dadq *dp) { struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa; struct ifnet *ifp; char ip6buf[INET6_ADDRSTRLEN]; log(LOG_ERR, "%s: DAD detected duplicate IPv6 address %s: " "NS in/out/loopback=%d/%d/%d, NA in=%d\n", if_name(ifa->ifa_ifp), ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr), dp->dad_ns_icount, dp->dad_ns_ocount, dp->dad_ns_lcount, dp->dad_na_icount); ia->ia6_flags &= ~IN6_IFF_TENTATIVE; ia->ia6_flags |= IN6_IFF_DUPLICATED; ifp = ifa->ifa_ifp; log(LOG_ERR, "%s: DAD complete for %s - duplicate found\n", if_name(ifp), ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)); log(LOG_ERR, "%s: manual intervention required\n", if_name(ifp)); /* * If the address is a link-local address formed from an interface * identifier based on the hardware address which is supposed to be * uniquely assigned (e.g., EUI-64 for an Ethernet interface), IP * operation on the interface SHOULD be disabled. * [RFC 4862, Section 5.4.5] */ if (IN6_IS_ADDR_LINKLOCAL(&ia->ia_addr.sin6_addr)) { struct in6_addr in6; /* * To avoid over-reaction, we only apply this logic when we are * very sure that hardware addresses are supposed to be unique. */ switch (ifp->if_type) { case IFT_ETHER: case IFT_FDDI: case IFT_ATM: case IFT_IEEE1394: case IFT_IEEE80211: case IFT_INFINIBAND: in6 = ia->ia_addr.sin6_addr; if (in6_get_hw_ifid(ifp, &in6) == 0 && IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr, &in6)) { ND_IFINFO(ifp)->flags |= ND6_IFF_IFDISABLED; log(LOG_ERR, "%s: possible hardware address " "duplication detected, disable IPv6\n", if_name(ifp)); } break; } } } static void nd6_dad_ns_output(struct dadq *dp) { struct in6_ifaddr *ia = (struct in6_ifaddr *)dp->dad_ifa; struct ifnet *ifp = dp->dad_ifa->ifa_ifp; int i; dp->dad_ns_tcount++; if ((ifp->if_flags & IFF_UP) == 0) { return; } if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { return; } dp->dad_ns_ocount++; if (V_dad_enhanced != 0) { for (i = 0; i < ND_OPT_NONCE_LEN32; i++) dp->dad_nonce[i] = arc4random(); /* * XXXHRS: Note that in the case that * DupAddrDetectTransmits > 1, multiple NS messages with * different nonces can be looped back in an unexpected * order. The current implementation recognizes only * the latest nonce on the sender side. Practically it * should work well in almost all cases. */ } nd6_ns_output(ifp, NULL, NULL, &ia->ia_addr.sin6_addr, (uint8_t *)&dp->dad_nonce[0]); } static void nd6_dad_ns_input(struct ifaddr *ifa, struct nd_opt_nonce *ndopt_nonce) { struct in6_ifaddr *ia; struct ifnet *ifp; const struct in6_addr *taddr6; struct dadq *dp; if (ifa == NULL) panic("ifa == NULL in nd6_dad_ns_input"); ia = (struct in6_ifaddr *)ifa; ifp = ifa->ifa_ifp; taddr6 = &ia->ia_addr.sin6_addr; /* Ignore Nonce option when Enhanced DAD is disabled. */ if (V_dad_enhanced == 0) ndopt_nonce = NULL; dp = nd6_dad_find(ifa, ndopt_nonce); if (dp == NULL) return; dp->dad_ns_icount++; nd6_dad_rele(dp); } static void nd6_dad_na_input(struct ifaddr *ifa) { struct dadq *dp; if (ifa == NULL) panic("ifa == NULL in nd6_dad_na_input"); dp = nd6_dad_find(ifa, NULL); if (dp != NULL) { dp->dad_na_icount++; nd6_dad_rele(dp); } } Index: head/sys/netinet6/nd6_rtr.c =================================================================== --- head/sys/netinet6/nd6_rtr.c (revision 296062) +++ head/sys/netinet6/nd6_rtr.c (revision 296063) @@ -1,2127 +1,2256 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: nd6_rtr.c,v 1.111 2001/04/27 01:37:15 jinmei Exp $ */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include +#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int rtpref(struct nd_defrouter *); static struct nd_defrouter *defrtrlist_update(struct nd_defrouter *); static int prelist_update(struct nd_prefixctl *, struct nd_defrouter *, struct mbuf *, int); static struct in6_ifaddr *in6_ifadd(struct nd_prefixctl *, int); static struct nd_pfxrouter *pfxrtr_lookup(struct nd_prefix *, struct nd_defrouter *); static void pfxrtr_add(struct nd_prefix *, struct nd_defrouter *); static void pfxrtr_del(struct nd_pfxrouter *); static struct nd_pfxrouter *find_pfxlist_reachable_router (struct nd_prefix *); static void defrouter_delreq(struct nd_defrouter *); static void nd6_rtmsg(int, struct rtentry *); static int in6_init_prefix_ltimes(struct nd_prefix *); static void in6_init_address_ltimes(struct nd_prefix *, struct in6_addrlifetime *); static int nd6_prefix_onlink(struct nd_prefix *); static int nd6_prefix_offlink(struct nd_prefix *); static int rt6_deleteroute(const struct rtentry *, void *); VNET_DECLARE(int, nd6_recalc_reachtm_interval); #define V_nd6_recalc_reachtm_interval VNET(nd6_recalc_reachtm_interval) static VNET_DEFINE(struct ifnet *, nd6_defifp); VNET_DEFINE(int, nd6_defifindex); #define V_nd6_defifp VNET(nd6_defifp) VNET_DEFINE(int, ip6_use_tempaddr) = 0; VNET_DEFINE(int, ip6_desync_factor); VNET_DEFINE(u_int32_t, ip6_temp_preferred_lifetime) = DEF_TEMP_PREFERRED_LIFETIME; VNET_DEFINE(u_int32_t, ip6_temp_valid_lifetime) = DEF_TEMP_VALID_LIFETIME; VNET_DEFINE(int, ip6_temp_regen_advance) = TEMPADDR_REGEN_ADVANCE; /* RTPREF_MEDIUM has to be 0! */ #define RTPREF_HIGH 1 #define RTPREF_MEDIUM 0 #define RTPREF_LOW (-1) #define RTPREF_RESERVED (-2) #define RTPREF_INVALID (-3) /* internal */ /* * Receive Router Solicitation Message - just for routers. * Router solicitation/advertisement is mostly managed by userland program * (rtadvd) so here we have no function like nd6_ra_output(). * * Based on RFC 2461 */ void nd6_rs_input(struct mbuf *m, int off, int icmp6len) { struct ifnet *ifp = m->m_pkthdr.rcvif; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct nd_router_solicit *nd_rs; struct in6_addr saddr6 = ip6->ip6_src; char *lladdr = NULL; int lladdrlen = 0; union nd_opts ndopts; char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; /* * Accept RS only when V_ip6_forwarding=1 and the interface has * no ND6_IFF_ACCEPT_RTADV. */ if (!V_ip6_forwarding || ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) goto freeit; /* Sanity checks */ if (ip6->ip6_hlim != 255) { nd6log((LOG_ERR, "nd6_rs_input: invalid hlim (%d) from %s to %s on %s\n", ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src), ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp))); goto bad; } /* * Don't update the neighbor cache, if src = ::. * This indicates that the src has no IP address assigned yet. */ if (IN6_IS_ADDR_UNSPECIFIED(&saddr6)) goto freeit; #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, icmp6len,); nd_rs = (struct nd_router_solicit *)((caddr_t)ip6 + off); #else IP6_EXTHDR_GET(nd_rs, struct nd_router_solicit *, m, off, icmp6len); if (nd_rs == NULL) { ICMP6STAT_INC(icp6s_tooshort); return; } #endif icmp6len -= sizeof(*nd_rs); nd6_option_init(nd_rs + 1, icmp6len, &ndopts); if (nd6_options(&ndopts) < 0) { nd6log((LOG_INFO, "nd6_rs_input: invalid ND option, ignored\n")); /* nd6_options have incremented stats */ goto freeit; } if (ndopts.nd_opts_src_lladdr) { lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1); lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3; } if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) { nd6log((LOG_INFO, "nd6_rs_input: lladdrlen mismatch for %s " "(if %d, RS packet %d)\n", ip6_sprintf(ip6bufs, &saddr6), ifp->if_addrlen, lladdrlen - 2)); goto bad; } nd6_cache_lladdr(ifp, &saddr6, lladdr, lladdrlen, ND_ROUTER_SOLICIT, 0); freeit: m_freem(m); return; bad: ICMP6STAT_INC(icp6s_badrs); m_freem(m); } /* * Receive Router Advertisement Message. * * Based on RFC 2461 * TODO: on-link bit on prefix information * TODO: ND_RA_FLAG_{OTHER,MANAGED} processing */ void nd6_ra_input(struct mbuf *m, int off, int icmp6len) { struct ifnet *ifp = m->m_pkthdr.rcvif; struct nd_ifinfo *ndi = ND_IFINFO(ifp); struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct nd_router_advert *nd_ra; struct in6_addr saddr6 = ip6->ip6_src; int mcast = 0; union nd_opts ndopts; struct nd_defrouter *dr; char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; + dr = NULL; + /* * We only accept RAs only when the per-interface flag * ND6_IFF_ACCEPT_RTADV is on the receiving interface. */ if (!(ndi->flags & ND6_IFF_ACCEPT_RTADV)) goto freeit; if (ip6->ip6_hlim != 255) { nd6log((LOG_ERR, "nd6_ra_input: invalid hlim (%d) from %s to %s on %s\n", ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src), ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp))); goto bad; } if (!IN6_IS_ADDR_LINKLOCAL(&saddr6)) { nd6log((LOG_ERR, "nd6_ra_input: src %s is not link-local\n", ip6_sprintf(ip6bufs, &saddr6))); goto bad; } #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, icmp6len,); nd_ra = (struct nd_router_advert *)((caddr_t)ip6 + off); #else IP6_EXTHDR_GET(nd_ra, struct nd_router_advert *, m, off, icmp6len); if (nd_ra == NULL) { ICMP6STAT_INC(icp6s_tooshort); return; } #endif icmp6len -= sizeof(*nd_ra); nd6_option_init(nd_ra + 1, icmp6len, &ndopts); if (nd6_options(&ndopts) < 0) { nd6log((LOG_INFO, "nd6_ra_input: invalid ND option, ignored\n")); /* nd6_options have incremented stats */ goto freeit; } { struct nd_defrouter dr0; u_int32_t advreachable = nd_ra->nd_ra_reachable; /* remember if this is a multicasted advertisement */ if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) mcast = 1; bzero(&dr0, sizeof(dr0)); dr0.rtaddr = saddr6; dr0.raflags = nd_ra->nd_ra_flags_reserved; /* * Effectively-disable routes from RA messages when * ND6_IFF_NO_RADR enabled on the receiving interface or * (ip6.forwarding == 1 && ip6.rfc6204w3 != 1). */ if (ndi->flags & ND6_IFF_NO_RADR) dr0.rtlifetime = 0; else if (V_ip6_forwarding && !V_ip6_rfc6204w3) dr0.rtlifetime = 0; else dr0.rtlifetime = ntohs(nd_ra->nd_ra_router_lifetime); dr0.expire = time_uptime + dr0.rtlifetime; dr0.ifp = ifp; /* unspecified or not? (RFC 2461 6.3.4) */ if (advreachable) { advreachable = ntohl(advreachable); if (advreachable <= MAX_REACHABLE_TIME && ndi->basereachable != advreachable) { ndi->basereachable = advreachable; ndi->reachable = ND_COMPUTE_RTIME(ndi->basereachable); ndi->recalctm = V_nd6_recalc_reachtm_interval; /* reset */ } } if (nd_ra->nd_ra_retransmit) ndi->retrans = ntohl(nd_ra->nd_ra_retransmit); if (nd_ra->nd_ra_curhoplimit) { if (ndi->chlim < nd_ra->nd_ra_curhoplimit) ndi->chlim = nd_ra->nd_ra_curhoplimit; else if (ndi->chlim != nd_ra->nd_ra_curhoplimit) { log(LOG_ERR, "RA with a lower CurHopLimit sent from " "%s on %s (current = %d, received = %d). " "Ignored.\n", ip6_sprintf(ip6bufs, &ip6->ip6_src), if_name(ifp), ndi->chlim, nd_ra->nd_ra_curhoplimit); } } dr = defrtrlist_update(&dr0); } /* * prefix */ if (ndopts.nd_opts_pi) { struct nd_opt_hdr *pt; struct nd_opt_prefix_info *pi = NULL; struct nd_prefixctl pr; for (pt = (struct nd_opt_hdr *)ndopts.nd_opts_pi; pt <= (struct nd_opt_hdr *)ndopts.nd_opts_pi_end; pt = (struct nd_opt_hdr *)((caddr_t)pt + (pt->nd_opt_len << 3))) { if (pt->nd_opt_type != ND_OPT_PREFIX_INFORMATION) continue; pi = (struct nd_opt_prefix_info *)pt; if (pi->nd_opt_pi_len != 4) { nd6log((LOG_INFO, "nd6_ra_input: invalid option " "len %d for prefix information option, " "ignored\n", pi->nd_opt_pi_len)); continue; } if (128 < pi->nd_opt_pi_prefix_len) { nd6log((LOG_INFO, "nd6_ra_input: invalid prefix " "len %d for prefix information option, " "ignored\n", pi->nd_opt_pi_prefix_len)); continue; } if (IN6_IS_ADDR_MULTICAST(&pi->nd_opt_pi_prefix) || IN6_IS_ADDR_LINKLOCAL(&pi->nd_opt_pi_prefix)) { nd6log((LOG_INFO, "nd6_ra_input: invalid prefix " "%s, ignored\n", ip6_sprintf(ip6bufs, &pi->nd_opt_pi_prefix))); continue; } bzero(&pr, sizeof(pr)); pr.ndpr_prefix.sin6_family = AF_INET6; pr.ndpr_prefix.sin6_len = sizeof(pr.ndpr_prefix); pr.ndpr_prefix.sin6_addr = pi->nd_opt_pi_prefix; pr.ndpr_ifp = (struct ifnet *)m->m_pkthdr.rcvif; pr.ndpr_raf_onlink = (pi->nd_opt_pi_flags_reserved & ND_OPT_PI_FLAG_ONLINK) ? 1 : 0; pr.ndpr_raf_auto = (pi->nd_opt_pi_flags_reserved & ND_OPT_PI_FLAG_AUTO) ? 1 : 0; pr.ndpr_plen = pi->nd_opt_pi_prefix_len; pr.ndpr_vltime = ntohl(pi->nd_opt_pi_valid_time); pr.ndpr_pltime = ntohl(pi->nd_opt_pi_preferred_time); (void)prelist_update(&pr, dr, m, mcast); } } + if (dr != NULL) { + defrouter_rele(dr); + dr = NULL; + } /* * MTU */ if (ndopts.nd_opts_mtu && ndopts.nd_opts_mtu->nd_opt_mtu_len == 1) { u_long mtu; u_long maxmtu; mtu = (u_long)ntohl(ndopts.nd_opts_mtu->nd_opt_mtu_mtu); /* lower bound */ if (mtu < IPV6_MMTU) { nd6log((LOG_INFO, "nd6_ra_input: bogus mtu option " "mtu=%lu sent from %s, ignoring\n", mtu, ip6_sprintf(ip6bufs, &ip6->ip6_src))); goto skip; } /* upper bound */ maxmtu = (ndi->maxmtu && ndi->maxmtu < ifp->if_mtu) ? ndi->maxmtu : ifp->if_mtu; if (mtu <= maxmtu) { int change = (ndi->linkmtu != mtu); ndi->linkmtu = mtu; if (change) /* in6_maxmtu may change */ in6_setmaxmtu(); } else { nd6log((LOG_INFO, "nd6_ra_input: bogus mtu " "mtu=%lu sent from %s; " "exceeds maxmtu %lu, ignoring\n", mtu, ip6_sprintf(ip6bufs, &ip6->ip6_src), maxmtu)); } } skip: /* * Source link layer address */ { char *lladdr = NULL; int lladdrlen = 0; if (ndopts.nd_opts_src_lladdr) { lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1); lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3; } if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) { nd6log((LOG_INFO, "nd6_ra_input: lladdrlen mismatch for %s " "(if %d, RA packet %d)\n", ip6_sprintf(ip6bufs, &saddr6), ifp->if_addrlen, lladdrlen - 2)); goto bad; } nd6_cache_lladdr(ifp, &saddr6, lladdr, lladdrlen, ND_ROUTER_ADVERT, 0); /* * Installing a link-layer address might change the state of the * router's neighbor cache, which might also affect our on-link * detection of adveritsed prefixes. */ pfxlist_onlink_check(); } freeit: m_freem(m); return; bad: ICMP6STAT_INC(icp6s_badra); m_freem(m); } -/* - * default router list proccessing sub routines - */ - /* tell the change to user processes watching the routing socket. */ static void nd6_rtmsg(int cmd, struct rtentry *rt) { struct rt_addrinfo info; struct ifnet *ifp; struct ifaddr *ifa; bzero((caddr_t)&info, sizeof(info)); info.rti_info[RTAX_DST] = rt_key(rt); info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; info.rti_info[RTAX_NETMASK] = rt_mask(rt); ifp = rt->rt_ifp; if (ifp != NULL) { IF_ADDR_RLOCK(ifp); ifa = TAILQ_FIRST(&ifp->if_addrhead); info.rti_info[RTAX_IFP] = ifa->ifa_addr; ifa_ref(ifa); IF_ADDR_RUNLOCK(ifp); info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; } else ifa = NULL; rt_missmsg_fib(cmd, &info, rt->rt_flags, 0, rt->rt_fibnum); if (ifa != NULL) ifa_free(ifa); } +/* + * default router list proccessing sub routines + */ + static void defrouter_addreq(struct nd_defrouter *new) { struct sockaddr_in6 def, mask, gate; struct rtentry *newrt = NULL; int error; bzero(&def, sizeof(def)); bzero(&mask, sizeof(mask)); bzero(&gate, sizeof(gate)); def.sin6_len = mask.sin6_len = gate.sin6_len = sizeof(struct sockaddr_in6); def.sin6_family = gate.sin6_family = AF_INET6; gate.sin6_addr = new->rtaddr; error = in6_rtrequest(RTM_ADD, (struct sockaddr *)&def, (struct sockaddr *)&gate, (struct sockaddr *)&mask, RTF_GATEWAY, &newrt, RT_DEFAULT_FIB); if (newrt) { nd6_rtmsg(RTM_ADD, newrt); /* tell user process */ RTFREE(newrt); } if (error == 0) new->installed = 1; } struct nd_defrouter * -defrouter_lookup(struct in6_addr *addr, struct ifnet *ifp) +defrouter_lookup_locked(struct in6_addr *addr, struct ifnet *ifp) { struct nd_defrouter *dr; - TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) { - if (dr->ifp == ifp && IN6_ARE_ADDR_EQUAL(addr, &dr->rtaddr)) + ND6_LOCK_ASSERT(); + TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) + if (dr->ifp == ifp && IN6_ARE_ADDR_EQUAL(addr, &dr->rtaddr)) { + defrouter_ref(dr); return (dr); - } + } + return (NULL); +} - return (NULL); /* search failed */ +struct nd_defrouter * +defrouter_lookup(struct in6_addr *addr, struct ifnet *ifp) +{ + struct nd_defrouter *dr; + + ND6_RLOCK(); + dr = defrouter_lookup_locked(addr, ifp); + ND6_RUNLOCK(); + return (dr); } +void +defrouter_ref(struct nd_defrouter *dr) +{ + + refcount_acquire(&dr->refcnt); +} + +void +defrouter_rele(struct nd_defrouter *dr) +{ + + if (refcount_release(&dr->refcnt)) + free(dr, M_IP6NDP); +} + /* * Remove the default route for a given router. * This is just a subroutine function for defrouter_select(), and should * not be called from anywhere else. */ static void defrouter_delreq(struct nd_defrouter *dr) { struct sockaddr_in6 def, mask, gate; struct rtentry *oldrt = NULL; bzero(&def, sizeof(def)); bzero(&mask, sizeof(mask)); bzero(&gate, sizeof(gate)); def.sin6_len = mask.sin6_len = gate.sin6_len = sizeof(struct sockaddr_in6); def.sin6_family = gate.sin6_family = AF_INET6; gate.sin6_addr = dr->rtaddr; in6_rtrequest(RTM_DELETE, (struct sockaddr *)&def, (struct sockaddr *)&gate, (struct sockaddr *)&mask, RTF_GATEWAY, &oldrt, RT_DEFAULT_FIB); if (oldrt) { nd6_rtmsg(RTM_DELETE, oldrt); RTFREE(oldrt); } dr->installed = 0; } /* - * remove all default routes from default router list + * Remove all default routes from default router list. */ void defrouter_reset(void) { - struct nd_defrouter *dr; + struct nd_defrouter *dr, **dra; + int count, i; + count = i = 0; + + /* + * We can't delete routes with the ND lock held, so make a copy of the + * current default router list and use that when deleting routes. + */ + ND6_RLOCK(); TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) - defrouter_delreq(dr); + count++; + ND6_RUNLOCK(); + dra = malloc(count * sizeof(*dra), M_TEMP, M_WAITOK | M_ZERO); + + ND6_RLOCK(); + TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) { + if (i == count) + break; + defrouter_ref(dr); + dra[i++] = dr; + } + ND6_RUNLOCK(); + + for (i = 0; i < count && dra[i] != NULL; i++) { + defrouter_delreq(dra[i]); + defrouter_rele(dra[i]); + } + free(dra, M_TEMP); + /* * XXX should we also nuke any default routers in the kernel, by * going through them by rtalloc1()? */ } +/* + * Remove a router from the global list and free it. + * + * The ND lock must be held and is released before returning. The caller must + * hold a reference on the router object. + */ void -defrtrlist_del(struct nd_defrouter *dr) +defrouter_remove(struct nd_defrouter *dr) { + + ND6_WLOCK_ASSERT(); + KASSERT(dr->refcnt >= 2, ("unexpected refcount 0x%x", dr->refcnt)); + + defrouter_unlink(dr, NULL); + ND6_WUNLOCK(); + defrouter_del(dr); + defrouter_rele(dr); +} + +/* + * Remove a router from the global list and optionally stash it in a + * caller-supplied queue. + * + * The ND lock must be held. + */ +void +defrouter_unlink(struct nd_defrouter *dr, struct nd_drhead *drq) +{ + + ND6_WLOCK_ASSERT(); + TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry); + if (drq != NULL) + TAILQ_INSERT_TAIL(drq, dr, dr_entry); +} + +void +defrouter_del(struct nd_defrouter *dr) +{ struct nd_defrouter *deldr = NULL; struct nd_prefix *pr; + ND6_UNLOCK_ASSERT(); + /* * Flush all the routing table entries that use the router * as a next hop. */ if (ND_IFINFO(dr->ifp)->flags & ND6_IFF_ACCEPT_RTADV) rt6_flush(&dr->rtaddr, dr->ifp); if (dr->installed) { deldr = dr; defrouter_delreq(dr); } - TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry); /* * Also delete all the pointers to the router in each prefix lists. */ LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) { struct nd_pfxrouter *pfxrtr; if ((pfxrtr = pfxrtr_lookup(pr, dr)) != NULL) pfxrtr_del(pfxrtr); } pfxlist_onlink_check(); /* * If the router is the primary one, choose a new one. * Note that defrouter_select() will remove the current gateway * from the routing table. */ if (deldr) defrouter_select(); - free(dr, M_IP6NDP); + /* + * Release the list reference. + */ + defrouter_rele(dr); } /* * Default Router Selection according to Section 6.3.6 of RFC 2461 and * draft-ietf-ipngwg-router-selection: * 1) Routers that are reachable or probably reachable should be preferred. * If we have more than one (probably) reachable router, prefer ones * with the highest router preference. * 2) When no routers on the list are known to be reachable or * probably reachable, routers SHOULD be selected in a round-robin * fashion, regardless of router preference values. * 3) If the Default Router List is empty, assume that all * destinations are on-link. * * We assume nd_defrouter is sorted by router preference value. * Since the code below covers both with and without router preference cases, * we do not need to classify the cases by ifdef. * * At this moment, we do not try to install more than one default router, * even when the multipath routing is available, because we're not sure about * the benefits for stub hosts comparing to the risk of making the code * complicated and the possibility of introducing bugs. */ void defrouter_select(void) { - struct nd_defrouter *dr, *selected_dr = NULL, *installed_dr = NULL; + struct nd_defrouter *dr, *selected_dr, *installed_dr; struct llentry *ln = NULL; + ND6_RLOCK(); /* * Let's handle easy case (3) first: * If default router list is empty, there's nothing to be done. */ - if (TAILQ_EMPTY(&V_nd_defrouter)) + if (TAILQ_EMPTY(&V_nd_defrouter)) { + ND6_RUNLOCK(); return; + } /* * Search for a (probably) reachable router from the list. * We just pick up the first reachable one (if any), assuming that * the ordering rule of the list described in defrtrlist_update(). */ + selected_dr = installed_dr = NULL; TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) { IF_AFDATA_RLOCK(dr->ifp); if (selected_dr == NULL && (ln = nd6_lookup(&dr->rtaddr, 0, dr->ifp)) && ND6_IS_LLINFO_PROBREACH(ln)) { selected_dr = dr; + defrouter_ref(selected_dr); } IF_AFDATA_RUNLOCK(dr->ifp); if (ln != NULL) { LLE_RUNLOCK(ln); ln = NULL; } - if (dr->installed && installed_dr == NULL) - installed_dr = dr; - else if (dr->installed && installed_dr) { - /* this should not happen. warn for diagnosis. */ - log(LOG_ERR, "defrouter_select: more than one router" - " is installed\n"); + if (dr->installed) { + if (installed_dr == NULL) { + installed_dr = dr; + defrouter_ref(installed_dr); + } else { + /* this should not happen. warn for diagnosis. */ + log(LOG_ERR, + "defrouter_select: more than one router is installed\n"); + } } } /* * If none of the default routers was found to be reachable, * round-robin the list regardless of preference. * Otherwise, if we have an installed router, check if the selected * (reachable) router should really be preferred to the installed one. * We only prefer the new router when the old one is not reachable * or when the new one has a really higher preference value. */ if (selected_dr == NULL) { - if (installed_dr == NULL || !TAILQ_NEXT(installed_dr, dr_entry)) + if (installed_dr == NULL || + TAILQ_NEXT(installed_dr, dr_entry) == NULL) selected_dr = TAILQ_FIRST(&V_nd_defrouter); else selected_dr = TAILQ_NEXT(installed_dr, dr_entry); - } else if (installed_dr) { + defrouter_ref(selected_dr); + } else if (installed_dr != NULL) { IF_AFDATA_RLOCK(installed_dr->ifp); if ((ln = nd6_lookup(&installed_dr->rtaddr, 0, installed_dr->ifp)) && ND6_IS_LLINFO_PROBREACH(ln) && rtpref(selected_dr) <= rtpref(installed_dr)) { + defrouter_rele(selected_dr); selected_dr = installed_dr; } IF_AFDATA_RUNLOCK(installed_dr->ifp); if (ln != NULL) LLE_RUNLOCK(ln); } + ND6_RUNLOCK(); /* * If the selected router is different than the installed one, * remove the installed router and install the selected one. * Note that the selected router is never NULL here. */ if (installed_dr != selected_dr) { - if (installed_dr) + if (installed_dr != NULL) { defrouter_delreq(installed_dr); + defrouter_rele(installed_dr); + } defrouter_addreq(selected_dr); } + defrouter_rele(selected_dr); } /* * for default router selection * regards router-preference field as a 2-bit signed integer */ static int rtpref(struct nd_defrouter *dr) { switch (dr->raflags & ND_RA_FLAG_RTPREF_MASK) { case ND_RA_FLAG_RTPREF_HIGH: return (RTPREF_HIGH); case ND_RA_FLAG_RTPREF_MEDIUM: case ND_RA_FLAG_RTPREF_RSV: return (RTPREF_MEDIUM); case ND_RA_FLAG_RTPREF_LOW: return (RTPREF_LOW); default: /* * This case should never happen. If it did, it would mean a * serious bug of kernel internal. We thus always bark here. * Or, can we even panic? */ log(LOG_ERR, "rtpref: impossible RA flag %x\n", dr->raflags); return (RTPREF_INVALID); } /* NOTREACHED */ } static struct nd_defrouter * defrtrlist_update(struct nd_defrouter *new) { struct nd_defrouter *dr, *n; int oldpref; - if ((dr = defrouter_lookup(&new->rtaddr, new->ifp)) != NULL) { - /* entry exists */ + ND6_WLOCK(); + if ((dr = defrouter_lookup_locked(&new->rtaddr, new->ifp)) != NULL) { if (new->rtlifetime == 0) { - defrtrlist_del(dr); + /* releases the ND lock */ + defrouter_remove(dr); return (NULL); } oldpref = rtpref(dr); /* override */ dr->raflags = new->raflags; /* XXX flag check */ dr->rtlifetime = new->rtlifetime; dr->expire = new->expire; /* * If the preference does not change, there's no need * to sort the entries. Also make sure the selected * router is still installed in the kernel. */ - if (dr->installed && rtpref(new) == oldpref) + if (dr->installed && rtpref(new) == oldpref) { + ND6_WUNLOCK(); return (dr); + } /* * The preferred router may have changed, so relocate this * router. */ TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry); n = dr; goto insert; } /* entry does not exist */ - if (new->rtlifetime == 0) + if (new->rtlifetime == 0) { + ND6_WUNLOCK(); return (NULL); + } n = malloc(sizeof(*n), M_IP6NDP, M_NOWAIT | M_ZERO); - if (n == NULL) + if (n == NULL) { + ND6_WUNLOCK(); return (NULL); + } memcpy(n, new, sizeof(*n)); + /* Initialize with an extra reference for the caller. */ + refcount_init(&n->refcnt, 2); insert: /* * Insert the new router in the Default Router List; * The Default Router List should be in the descending order * of router-preferece. Routers with the same preference are * sorted in the arriving time order. */ /* insert at the end of the group */ TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) { if (rtpref(n) > rtpref(dr)) break; } - if (dr) + if (dr != NULL) TAILQ_INSERT_BEFORE(dr, n, dr_entry); else TAILQ_INSERT_TAIL(&V_nd_defrouter, n, dr_entry); + ND6_WUNLOCK(); defrouter_select(); return (n); } static struct nd_pfxrouter * pfxrtr_lookup(struct nd_prefix *pr, struct nd_defrouter *dr) { struct nd_pfxrouter *search; LIST_FOREACH(search, &pr->ndpr_advrtrs, pfr_entry) { if (search->router == dr) break; } return (search); } static void pfxrtr_add(struct nd_prefix *pr, struct nd_defrouter *dr) { struct nd_pfxrouter *new; new = malloc(sizeof(*new), M_IP6NDP, M_NOWAIT | M_ZERO); if (new == NULL) return; new->router = dr; + defrouter_ref(dr); LIST_INSERT_HEAD(&pr->ndpr_advrtrs, new, pfr_entry); pfxlist_onlink_check(); } static void pfxrtr_del(struct nd_pfxrouter *pfr) { + LIST_REMOVE(pfr, pfr_entry); + defrouter_rele(pfr->router); free(pfr, M_IP6NDP); } struct nd_prefix * nd6_prefix_lookup(struct nd_prefixctl *key) { struct nd_prefix *search; LIST_FOREACH(search, &V_nd_prefix, ndpr_entry) { if (key->ndpr_ifp == search->ndpr_ifp && key->ndpr_plen == search->ndpr_plen && in6_are_prefix_equal(&key->ndpr_prefix.sin6_addr, &search->ndpr_prefix.sin6_addr, key->ndpr_plen)) { break; } } return (search); } int nd6_prelist_add(struct nd_prefixctl *pr, struct nd_defrouter *dr, struct nd_prefix **newp) { struct nd_prefix *new = NULL; int error = 0; char ip6buf[INET6_ADDRSTRLEN]; new = malloc(sizeof(*new), M_IP6NDP, M_NOWAIT | M_ZERO); if (new == NULL) return (ENOMEM); new->ndpr_ifp = pr->ndpr_ifp; new->ndpr_prefix = pr->ndpr_prefix; new->ndpr_plen = pr->ndpr_plen; new->ndpr_vltime = pr->ndpr_vltime; new->ndpr_pltime = pr->ndpr_pltime; new->ndpr_flags = pr->ndpr_flags; if ((error = in6_init_prefix_ltimes(new)) != 0) { free(new, M_IP6NDP); return(error); } new->ndpr_lastupdate = time_uptime; if (newp != NULL) *newp = new; /* initialization */ LIST_INIT(&new->ndpr_advrtrs); in6_prefixlen2mask(&new->ndpr_mask, new->ndpr_plen); /* make prefix in the canonical form */ IN6_MASK_ADDR(&new->ndpr_prefix.sin6_addr, &new->ndpr_mask); /* link ndpr_entry to nd_prefix list */ LIST_INSERT_HEAD(&V_nd_prefix, new, ndpr_entry); /* ND_OPT_PI_FLAG_ONLINK processing */ if (new->ndpr_raf_onlink) { int e; if ((e = nd6_prefix_onlink(new)) != 0) { nd6log((LOG_ERR, "nd6_prelist_add: failed to make " "the prefix %s/%d on-link on %s (errno=%d)\n", ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr), pr->ndpr_plen, if_name(pr->ndpr_ifp), e)); /* proceed anyway. XXX: is it correct? */ } } if (dr) pfxrtr_add(new, dr); return 0; } void prelist_remove(struct nd_prefix *pr) { struct nd_pfxrouter *pfr, *next; int e; char ip6buf[INET6_ADDRSTRLEN]; /* make sure to invalidate the prefix until it is really freed. */ pr->ndpr_vltime = 0; pr->ndpr_pltime = 0; /* * Though these flags are now meaningless, we'd rather keep the value * of pr->ndpr_raf_onlink and pr->ndpr_raf_auto not to confuse users * when executing "ndp -p". */ if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0 && (e = nd6_prefix_offlink(pr)) != 0) { nd6log((LOG_ERR, "prelist_remove: failed to make %s/%d offlink " "on %s, errno=%d\n", ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr), pr->ndpr_plen, if_name(pr->ndpr_ifp), e)); /* what should we do? */ } if (pr->ndpr_refcnt > 0) return; /* notice here? */ /* unlink ndpr_entry from nd_prefix list */ LIST_REMOVE(pr, ndpr_entry); /* free list of routers that advertised the prefix */ LIST_FOREACH_SAFE(pfr, &pr->ndpr_advrtrs, pfr_entry, next) { pfxrtr_del(pfr); } free(pr, M_IP6NDP); pfxlist_onlink_check(); } /* * dr - may be NULL */ static int prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr, struct mbuf *m, int mcast) { struct in6_ifaddr *ia6 = NULL, *ia6_match = NULL; struct ifaddr *ifa; struct ifnet *ifp = new->ndpr_ifp; struct nd_prefix *pr; int error = 0; int newprefix = 0; int auth; struct in6_addrlifetime lt6_tmp; char ip6buf[INET6_ADDRSTRLEN]; auth = 0; if (m) { /* * Authenticity for NA consists authentication for * both IP header and IP datagrams, doesn't it ? */ #if defined(M_AUTHIPHDR) && defined(M_AUTHIPDGM) auth = ((m->m_flags & M_AUTHIPHDR) && (m->m_flags & M_AUTHIPDGM)); #endif } if ((pr = nd6_prefix_lookup(new)) != NULL) { /* * nd6_prefix_lookup() ensures that pr and new have the same * prefix on a same interface. */ /* * Update prefix information. Note that the on-link (L) bit * and the autonomous (A) bit should NOT be changed from 1 * to 0. */ if (new->ndpr_raf_onlink == 1) pr->ndpr_raf_onlink = 1; if (new->ndpr_raf_auto == 1) pr->ndpr_raf_auto = 1; if (new->ndpr_raf_onlink) { pr->ndpr_vltime = new->ndpr_vltime; pr->ndpr_pltime = new->ndpr_pltime; (void)in6_init_prefix_ltimes(pr); /* XXX error case? */ pr->ndpr_lastupdate = time_uptime; } if (new->ndpr_raf_onlink && (pr->ndpr_stateflags & NDPRF_ONLINK) == 0) { int e; if ((e = nd6_prefix_onlink(pr)) != 0) { nd6log((LOG_ERR, "prelist_update: failed to make " "the prefix %s/%d on-link on %s " "(errno=%d)\n", ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr), pr->ndpr_plen, if_name(pr->ndpr_ifp), e)); /* proceed anyway. XXX: is it correct? */ } } if (dr && pfxrtr_lookup(pr, dr) == NULL) pfxrtr_add(pr, dr); } else { struct nd_prefix *newpr = NULL; newprefix = 1; if (new->ndpr_vltime == 0) goto end; if (new->ndpr_raf_onlink == 0 && new->ndpr_raf_auto == 0) goto end; error = nd6_prelist_add(new, dr, &newpr); if (error != 0 || newpr == NULL) { nd6log((LOG_NOTICE, "prelist_update: " "nd6_prelist_add failed for %s/%d on %s " "errno=%d, returnpr=%p\n", ip6_sprintf(ip6buf, &new->ndpr_prefix.sin6_addr), new->ndpr_plen, if_name(new->ndpr_ifp), error, newpr)); goto end; /* we should just give up in this case. */ } /* * XXX: from the ND point of view, we can ignore a prefix * with the on-link bit being zero. However, we need a * prefix structure for references from autoconfigured * addresses. Thus, we explicitly make sure that the prefix * itself expires now. */ if (newpr->ndpr_raf_onlink == 0) { newpr->ndpr_vltime = 0; newpr->ndpr_pltime = 0; in6_init_prefix_ltimes(newpr); } pr = newpr; } /* * Address autoconfiguration based on Section 5.5.3 of RFC 2462. * Note that pr must be non NULL at this point. */ /* 5.5.3 (a). Ignore the prefix without the A bit set. */ if (!new->ndpr_raf_auto) goto end; /* * 5.5.3 (b). the link-local prefix should have been ignored in * nd6_ra_input. */ /* 5.5.3 (c). Consistency check on lifetimes: pltime <= vltime. */ if (new->ndpr_pltime > new->ndpr_vltime) { error = EINVAL; /* XXX: won't be used */ goto end; } /* * 5.5.3 (d). If the prefix advertised is not equal to the prefix of * an address configured by stateless autoconfiguration already in the * list of addresses associated with the interface, and the Valid * Lifetime is not 0, form an address. We first check if we have * a matching prefix. * Note: we apply a clarification in rfc2462bis-02 here. We only * consider autoconfigured addresses while RFC2462 simply said * "address". */ IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { struct in6_ifaddr *ifa6; u_int32_t remaininglifetime; if (ifa->ifa_addr->sa_family != AF_INET6) continue; ifa6 = (struct in6_ifaddr *)ifa; /* * We only consider autoconfigured addresses as per rfc2462bis. */ if (!(ifa6->ia6_flags & IN6_IFF_AUTOCONF)) continue; /* * Spec is not clear here, but I believe we should concentrate * on unicast (i.e. not anycast) addresses. * XXX: other ia6_flags? detached or duplicated? */ if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0) continue; /* * Ignore the address if it is not associated with a prefix * or is associated with a prefix that is different from this * one. (pr is never NULL here) */ if (ifa6->ia6_ndpr != pr) continue; if (ia6_match == NULL) /* remember the first one */ ia6_match = ifa6; /* * An already autoconfigured address matched. Now that we * are sure there is at least one matched address, we can * proceed to 5.5.3. (e): update the lifetimes according to the * "two hours" rule and the privacy extension. * We apply some clarifications in rfc2462bis: * - use remaininglifetime instead of storedlifetime as a * variable name * - remove the dead code in the "two-hour" rule */ #define TWOHOUR (120*60) lt6_tmp = ifa6->ia6_lifetime; if (lt6_tmp.ia6t_vltime == ND6_INFINITE_LIFETIME) remaininglifetime = ND6_INFINITE_LIFETIME; else if (time_uptime - ifa6->ia6_updatetime > lt6_tmp.ia6t_vltime) { /* * The case of "invalid" address. We should usually * not see this case. */ remaininglifetime = 0; } else remaininglifetime = lt6_tmp.ia6t_vltime - (time_uptime - ifa6->ia6_updatetime); /* when not updating, keep the current stored lifetime. */ lt6_tmp.ia6t_vltime = remaininglifetime; if (TWOHOUR < new->ndpr_vltime || remaininglifetime < new->ndpr_vltime) { lt6_tmp.ia6t_vltime = new->ndpr_vltime; } else if (remaininglifetime <= TWOHOUR) { if (auth) { lt6_tmp.ia6t_vltime = new->ndpr_vltime; } } else { /* * new->ndpr_vltime <= TWOHOUR && * TWOHOUR < remaininglifetime */ lt6_tmp.ia6t_vltime = TWOHOUR; } /* The 2 hour rule is not imposed for preferred lifetime. */ lt6_tmp.ia6t_pltime = new->ndpr_pltime; in6_init_address_ltimes(pr, <6_tmp); /* * We need to treat lifetimes for temporary addresses * differently, according to * draft-ietf-ipv6-privacy-addrs-v2-01.txt 3.3 (1); * we only update the lifetimes when they are in the maximum * intervals. */ if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0) { u_int32_t maxvltime, maxpltime; if (V_ip6_temp_valid_lifetime > (u_int32_t)((time_uptime - ifa6->ia6_createtime) + V_ip6_desync_factor)) { maxvltime = V_ip6_temp_valid_lifetime - (time_uptime - ifa6->ia6_createtime) - V_ip6_desync_factor; } else maxvltime = 0; if (V_ip6_temp_preferred_lifetime > (u_int32_t)((time_uptime - ifa6->ia6_createtime) + V_ip6_desync_factor)) { maxpltime = V_ip6_temp_preferred_lifetime - (time_uptime - ifa6->ia6_createtime) - V_ip6_desync_factor; } else maxpltime = 0; if (lt6_tmp.ia6t_vltime == ND6_INFINITE_LIFETIME || lt6_tmp.ia6t_vltime > maxvltime) { lt6_tmp.ia6t_vltime = maxvltime; } if (lt6_tmp.ia6t_pltime == ND6_INFINITE_LIFETIME || lt6_tmp.ia6t_pltime > maxpltime) { lt6_tmp.ia6t_pltime = maxpltime; } } ifa6->ia6_lifetime = lt6_tmp; ifa6->ia6_updatetime = time_uptime; } IF_ADDR_RUNLOCK(ifp); if (ia6_match == NULL && new->ndpr_vltime) { int ifidlen; /* * 5.5.3 (d) (continued) * No address matched and the valid lifetime is non-zero. * Create a new address. */ /* * Prefix Length check: * If the sum of the prefix length and interface identifier * length does not equal 128 bits, the Prefix Information * option MUST be ignored. The length of the interface * identifier is defined in a separate link-type specific * document. */ ifidlen = in6_if2idlen(ifp); if (ifidlen < 0) { /* this should not happen, so we always log it. */ log(LOG_ERR, "prelist_update: IFID undefined (%s)\n", if_name(ifp)); goto end; } if (ifidlen + pr->ndpr_plen != 128) { nd6log((LOG_INFO, "prelist_update: invalid prefixlen " "%d for %s, ignored\n", pr->ndpr_plen, if_name(ifp))); goto end; } if ((ia6 = in6_ifadd(new, mcast)) != NULL) { /* * note that we should use pr (not new) for reference. */ pr->ndpr_refcnt++; ia6->ia6_ndpr = pr; /* * RFC 3041 3.3 (2). * When a new public address is created as described * in RFC2462, also create a new temporary address. * * RFC 3041 3.5. * When an interface connects to a new link, a new * randomized interface identifier should be generated * immediately together with a new set of temporary * addresses. Thus, we specifiy 1 as the 2nd arg of * in6_tmpifadd(). */ if (V_ip6_use_tempaddr) { int e; if ((e = in6_tmpifadd(ia6, 1, 1)) != 0) { nd6log((LOG_NOTICE, "prelist_update: " "failed to create a temporary " "address, errno=%d\n", e)); } } ifa_free(&ia6->ia_ifa); /* * A newly added address might affect the status * of other addresses, so we check and update it. * XXX: what if address duplication happens? */ pfxlist_onlink_check(); } else { /* just set an error. do not bark here. */ error = EADDRNOTAVAIL; /* XXX: might be unused. */ } } end: return error; } /* * A supplement function used in the on-link detection below; * detect if a given prefix has a (probably) reachable advertising router. * XXX: lengthy function name... */ static struct nd_pfxrouter * find_pfxlist_reachable_router(struct nd_prefix *pr) { struct nd_pfxrouter *pfxrtr; struct llentry *ln; int canreach; LIST_FOREACH(pfxrtr, &pr->ndpr_advrtrs, pfr_entry) { IF_AFDATA_RLOCK(pfxrtr->router->ifp); ln = nd6_lookup(&pfxrtr->router->rtaddr, 0, pfxrtr->router->ifp); IF_AFDATA_RUNLOCK(pfxrtr->router->ifp); if (ln == NULL) continue; canreach = ND6_IS_LLINFO_PROBREACH(ln); LLE_RUNLOCK(ln); if (canreach) break; } return (pfxrtr); } /* * Check if each prefix in the prefix list has at least one available router * that advertised the prefix (a router is "available" if its neighbor cache * entry is reachable or probably reachable). * If the check fails, the prefix may be off-link, because, for example, * we have moved from the network but the lifetime of the prefix has not * expired yet. So we should not use the prefix if there is another prefix * that has an available router. * But, if there is no prefix that has an available router, we still regards * all the prefixes as on-link. This is because we can't tell if all the * routers are simply dead or if we really moved from the network and there * is no router around us. */ void pfxlist_onlink_check() { struct nd_prefix *pr; struct in6_ifaddr *ifa; struct nd_defrouter *dr; struct nd_pfxrouter *pfxrtr = NULL; /* * Check if there is a prefix that has a reachable advertising * router. */ LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) { if (pr->ndpr_raf_onlink && find_pfxlist_reachable_router(pr)) break; } /* * If we have no such prefix, check whether we still have a router * that does not advertise any prefixes. */ if (pr == NULL) { + ND6_RLOCK(); TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) { struct nd_prefix *pr0; LIST_FOREACH(pr0, &V_nd_prefix, ndpr_entry) { if ((pfxrtr = pfxrtr_lookup(pr0, dr)) != NULL) break; } if (pfxrtr != NULL) break; } + ND6_RUNLOCK(); } if (pr != NULL || (!TAILQ_EMPTY(&V_nd_defrouter) && pfxrtr == NULL)) { /* * There is at least one prefix that has a reachable router, * or at least a router which probably does not advertise * any prefixes. The latter would be the case when we move * to a new link where we have a router that does not provide * prefixes and we configure an address by hand. * Detach prefixes which have no reachable advertising * router, and attach other prefixes. */ LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) { /* XXX: a link-local prefix should never be detached */ if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr)) continue; /* * we aren't interested in prefixes without the L bit * set. */ if (pr->ndpr_raf_onlink == 0) continue; if (pr->ndpr_raf_auto == 0) continue; if ((pr->ndpr_stateflags & NDPRF_DETACHED) == 0 && find_pfxlist_reachable_router(pr) == NULL) pr->ndpr_stateflags |= NDPRF_DETACHED; if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0 && find_pfxlist_reachable_router(pr) != 0) pr->ndpr_stateflags &= ~NDPRF_DETACHED; } } else { /* there is no prefix that has a reachable router */ LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) { if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr)) continue; if (pr->ndpr_raf_onlink == 0) continue; if (pr->ndpr_raf_auto == 0) continue; if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0) pr->ndpr_stateflags &= ~NDPRF_DETACHED; } } /* * Remove each interface route associated with a (just) detached * prefix, and reinstall the interface route for a (just) attached * prefix. Note that all attempt of reinstallation does not * necessarily success, when a same prefix is shared among multiple * interfaces. Such cases will be handled in nd6_prefix_onlink, * so we don't have to care about them. */ LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) { int e; char ip6buf[INET6_ADDRSTRLEN]; if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr)) continue; if (pr->ndpr_raf_onlink == 0) continue; if (pr->ndpr_raf_auto == 0) continue; if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0 && (pr->ndpr_stateflags & NDPRF_ONLINK) != 0) { if ((e = nd6_prefix_offlink(pr)) != 0) { nd6log((LOG_ERR, "pfxlist_onlink_check: failed to " "make %s/%d offlink, errno=%d\n", ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr), pr->ndpr_plen, e)); } } if ((pr->ndpr_stateflags & NDPRF_DETACHED) == 0 && (pr->ndpr_stateflags & NDPRF_ONLINK) == 0 && pr->ndpr_raf_onlink) { if ((e = nd6_prefix_onlink(pr)) != 0) { nd6log((LOG_ERR, "pfxlist_onlink_check: failed to " "make %s/%d onlink, errno=%d\n", ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr), pr->ndpr_plen, e)); } } } /* * Changes on the prefix status might affect address status as well. * Make sure that all addresses derived from an attached prefix are * attached, and that all addresses derived from a detached prefix are * detached. Note, however, that a manually configured address should * always be attached. * The precise detection logic is same as the one for prefixes. * * XXXRW: in6_ifaddrhead locking. */ TAILQ_FOREACH(ifa, &V_in6_ifaddrhead, ia_link) { if (!(ifa->ia6_flags & IN6_IFF_AUTOCONF)) continue; if (ifa->ia6_ndpr == NULL) { /* * This can happen when we first configure the address * (i.e. the address exists, but the prefix does not). * XXX: complicated relationships... */ continue; } if (find_pfxlist_reachable_router(ifa->ia6_ndpr)) break; } if (ifa) { TAILQ_FOREACH(ifa, &V_in6_ifaddrhead, ia_link) { if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0) continue; if (ifa->ia6_ndpr == NULL) /* XXX: see above. */ continue; if (find_pfxlist_reachable_router(ifa->ia6_ndpr)) { if (ifa->ia6_flags & IN6_IFF_DETACHED) { ifa->ia6_flags &= ~IN6_IFF_DETACHED; ifa->ia6_flags |= IN6_IFF_TENTATIVE; nd6_dad_start((struct ifaddr *)ifa, 0); } } else { ifa->ia6_flags |= IN6_IFF_DETACHED; } } } else { TAILQ_FOREACH(ifa, &V_in6_ifaddrhead, ia_link) { if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0) continue; if (ifa->ia6_flags & IN6_IFF_DETACHED) { ifa->ia6_flags &= ~IN6_IFF_DETACHED; ifa->ia6_flags |= IN6_IFF_TENTATIVE; /* Do we need a delay in this case? */ nd6_dad_start((struct ifaddr *)ifa, 0); } } } } static int nd6_prefix_onlink_rtrequest(struct nd_prefix *pr, struct ifaddr *ifa) { static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK}; struct rib_head *rnh; struct rtentry *rt; struct sockaddr_in6 mask6; u_long rtflags; int error, a_failure, fibnum; /* * in6_ifinit() sets nd6_rtrequest to ifa_rtrequest for all ifaddrs. * ifa->ifa_rtrequest = nd6_rtrequest; */ bzero(&mask6, sizeof(mask6)); mask6.sin6_len = sizeof(mask6); mask6.sin6_addr = pr->ndpr_mask; rtflags = (ifa->ifa_flags & ~IFA_RTSELF) | RTF_UP; a_failure = 0; for (fibnum = 0; fibnum < rt_numfibs; fibnum++) { rt = NULL; error = in6_rtrequest(RTM_ADD, (struct sockaddr *)&pr->ndpr_prefix, ifa->ifa_addr, (struct sockaddr *)&mask6, rtflags, &rt, fibnum); if (error == 0) { KASSERT(rt != NULL, ("%s: in6_rtrequest return no " "error(%d) but rt is NULL, pr=%p, ifa=%p", __func__, error, pr, ifa)); rnh = rt_tables_get_rnh(rt->rt_fibnum, AF_INET6); /* XXX what if rhn == NULL? */ RIB_WLOCK(rnh); RT_LOCK(rt); if (rt_setgate(rt, rt_key(rt), (struct sockaddr *)&null_sdl) == 0) { struct sockaddr_dl *dl; dl = (struct sockaddr_dl *)rt->rt_gateway; dl->sdl_type = rt->rt_ifp->if_type; dl->sdl_index = rt->rt_ifp->if_index; } RIB_WUNLOCK(rnh); nd6_rtmsg(RTM_ADD, rt); RT_UNLOCK(rt); pr->ndpr_stateflags |= NDPRF_ONLINK; } else { char ip6buf[INET6_ADDRSTRLEN]; char ip6bufg[INET6_ADDRSTRLEN]; char ip6bufm[INET6_ADDRSTRLEN]; struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)ifa->ifa_addr; nd6log((LOG_ERR, "nd6_prefix_onlink: failed to add " "route for a prefix (%s/%d) on %s, gw=%s, mask=%s, " "flags=%lx errno = %d\n", ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr), pr->ndpr_plen, if_name(pr->ndpr_ifp), ip6_sprintf(ip6bufg, &sin6->sin6_addr), ip6_sprintf(ip6bufm, &mask6.sin6_addr), rtflags, error)); /* Save last error to return, see rtinit(). */ a_failure = error; } if (rt != NULL) { RT_LOCK(rt); RT_REMREF(rt); RT_UNLOCK(rt); } } /* Return the last error we got. */ return (a_failure); } static int nd6_prefix_onlink(struct nd_prefix *pr) { struct ifaddr *ifa; struct ifnet *ifp = pr->ndpr_ifp; struct nd_prefix *opr; int error = 0; char ip6buf[INET6_ADDRSTRLEN]; /* sanity check */ if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0) { nd6log((LOG_ERR, "nd6_prefix_onlink: %s/%d is already on-link\n", ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr), pr->ndpr_plen)); return (EEXIST); } /* * Add the interface route associated with the prefix. Before * installing the route, check if there's the same prefix on another * interface, and the prefix has already installed the interface route. * Although such a configuration is expected to be rare, we explicitly * allow it. */ LIST_FOREACH(opr, &V_nd_prefix, ndpr_entry) { if (opr == pr) continue; if ((opr->ndpr_stateflags & NDPRF_ONLINK) == 0) continue; if (opr->ndpr_plen == pr->ndpr_plen && in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr, &opr->ndpr_prefix.sin6_addr, pr->ndpr_plen)) return (0); } /* * We prefer link-local addresses as the associated interface address. */ /* search for a link-local addr */ ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY | IN6_IFF_ANYCAST); if (ifa == NULL) { /* XXX: freebsd does not have ifa_ifwithaf */ IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family == AF_INET6) break; } if (ifa != NULL) ifa_ref(ifa); IF_ADDR_RUNLOCK(ifp); /* should we care about ia6_flags? */ } if (ifa == NULL) { /* * This can still happen, when, for example, we receive an RA * containing a prefix with the L bit set and the A bit clear, * after removing all IPv6 addresses on the receiving * interface. This should, of course, be rare though. */ nd6log((LOG_NOTICE, "nd6_prefix_onlink: failed to find any ifaddr" " to add route for a prefix(%s/%d) on %s\n", ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr), pr->ndpr_plen, if_name(ifp))); return (0); } error = nd6_prefix_onlink_rtrequest(pr, ifa); if (ifa != NULL) ifa_free(ifa); return (error); } static int nd6_prefix_offlink(struct nd_prefix *pr) { int error = 0; struct ifnet *ifp = pr->ndpr_ifp; struct nd_prefix *opr; struct sockaddr_in6 sa6, mask6; struct rtentry *rt; char ip6buf[INET6_ADDRSTRLEN]; int fibnum, a_failure; /* sanity check */ if ((pr->ndpr_stateflags & NDPRF_ONLINK) == 0) { nd6log((LOG_ERR, "nd6_prefix_offlink: %s/%d is already off-link\n", ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr), pr->ndpr_plen)); return (EEXIST); } bzero(&sa6, sizeof(sa6)); sa6.sin6_family = AF_INET6; sa6.sin6_len = sizeof(sa6); bcopy(&pr->ndpr_prefix.sin6_addr, &sa6.sin6_addr, sizeof(struct in6_addr)); bzero(&mask6, sizeof(mask6)); mask6.sin6_family = AF_INET6; mask6.sin6_len = sizeof(sa6); bcopy(&pr->ndpr_mask, &mask6.sin6_addr, sizeof(struct in6_addr)); a_failure = 0; for (fibnum = 0; fibnum < rt_numfibs; fibnum++) { rt = NULL; error = in6_rtrequest(RTM_DELETE, (struct sockaddr *)&sa6, NULL, (struct sockaddr *)&mask6, 0, &rt, fibnum); if (error == 0) { /* report the route deletion to the routing socket. */ if (rt != NULL) nd6_rtmsg(RTM_DELETE, rt); } else { /* Save last error to return, see rtinit(). */ a_failure = error; } if (rt != NULL) { RTFREE(rt); } } error = a_failure; a_failure = 1; if (error == 0) { pr->ndpr_stateflags &= ~NDPRF_ONLINK; /* * There might be the same prefix on another interface, * the prefix which could not be on-link just because we have * the interface route (see comments in nd6_prefix_onlink). * If there's one, try to make the prefix on-link on the * interface. */ LIST_FOREACH(opr, &V_nd_prefix, ndpr_entry) { if (opr == pr) continue; if ((opr->ndpr_stateflags & NDPRF_ONLINK) != 0) continue; /* * KAME specific: detached prefixes should not be * on-link. */ if ((opr->ndpr_stateflags & NDPRF_DETACHED) != 0) continue; if (opr->ndpr_plen == pr->ndpr_plen && in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr, &opr->ndpr_prefix.sin6_addr, pr->ndpr_plen)) { int e; if ((e = nd6_prefix_onlink(opr)) != 0) { nd6log((LOG_ERR, "nd6_prefix_offlink: failed to " "recover a prefix %s/%d from %s " "to %s (errno = %d)\n", ip6_sprintf(ip6buf, &opr->ndpr_prefix.sin6_addr), opr->ndpr_plen, if_name(ifp), if_name(opr->ndpr_ifp), e)); } else a_failure = 0; } } } else { /* XXX: can we still set the NDPRF_ONLINK flag? */ nd6log((LOG_ERR, "nd6_prefix_offlink: failed to delete route: " "%s/%d on %s (errno = %d)\n", ip6_sprintf(ip6buf, &sa6.sin6_addr), pr->ndpr_plen, if_name(ifp), error)); } if (a_failure) lltable_prefix_free(AF_INET6, (struct sockaddr *)&sa6, (struct sockaddr *)&mask6, LLE_STATIC); return (error); } static struct in6_ifaddr * in6_ifadd(struct nd_prefixctl *pr, int mcast) { struct ifnet *ifp = pr->ndpr_ifp; struct ifaddr *ifa; struct in6_aliasreq ifra; struct in6_ifaddr *ia, *ib; int error, plen0; struct in6_addr mask; int prefixlen = pr->ndpr_plen; int updateflags; char ip6buf[INET6_ADDRSTRLEN]; in6_prefixlen2mask(&mask, prefixlen); /* * find a link-local address (will be interface ID). * Is it really mandatory? Theoretically, a global or a site-local * address can be configured without a link-local address, if we * have a unique interface identifier... * * it is not mandatory to have a link-local address, we can generate * interface identifier on the fly. we do this because: * (1) it should be the easiest way to find interface identifier. * (2) RFC2462 5.4 suggesting the use of the same interface identifier * for multiple addresses on a single interface, and possible shortcut * of DAD. we omitted DAD for this reason in the past. * (3) a user can prevent autoconfiguration of global address * by removing link-local address by hand (this is partly because we * don't have other way to control the use of IPv6 on an interface. * this has been our design choice - cf. NRL's "ifconfig auto"). * (4) it is easier to manage when an interface has addresses * with the same interface identifier, than to have multiple addresses * with different interface identifiers. */ ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, 0); /* 0 is OK? */ if (ifa) ib = (struct in6_ifaddr *)ifa; else return NULL; /* prefixlen + ifidlen must be equal to 128 */ plen0 = in6_mask2len(&ib->ia_prefixmask.sin6_addr, NULL); if (prefixlen != plen0) { ifa_free(ifa); nd6log((LOG_INFO, "in6_ifadd: wrong prefixlen for %s " "(prefix=%d ifid=%d)\n", if_name(ifp), prefixlen, 128 - plen0)); return NULL; } /* make ifaddr */ in6_prepare_ifra(&ifra, &pr->ndpr_prefix.sin6_addr, &mask); IN6_MASK_ADDR(&ifra.ifra_addr.sin6_addr, &mask); /* interface ID */ ifra.ifra_addr.sin6_addr.s6_addr32[0] |= (ib->ia_addr.sin6_addr.s6_addr32[0] & ~mask.s6_addr32[0]); ifra.ifra_addr.sin6_addr.s6_addr32[1] |= (ib->ia_addr.sin6_addr.s6_addr32[1] & ~mask.s6_addr32[1]); ifra.ifra_addr.sin6_addr.s6_addr32[2] |= (ib->ia_addr.sin6_addr.s6_addr32[2] & ~mask.s6_addr32[2]); ifra.ifra_addr.sin6_addr.s6_addr32[3] |= (ib->ia_addr.sin6_addr.s6_addr32[3] & ~mask.s6_addr32[3]); ifa_free(ifa); /* lifetimes. */ ifra.ifra_lifetime.ia6t_vltime = pr->ndpr_vltime; ifra.ifra_lifetime.ia6t_pltime = pr->ndpr_pltime; /* XXX: scope zone ID? */ ifra.ifra_flags |= IN6_IFF_AUTOCONF; /* obey autoconf */ /* * Make sure that we do not have this address already. This should * usually not happen, but we can still see this case, e.g., if we * have manually configured the exact address to be configured. */ ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr); if (ifa != NULL) { ifa_free(ifa); /* this should be rare enough to make an explicit log */ log(LOG_INFO, "in6_ifadd: %s is already configured\n", ip6_sprintf(ip6buf, &ifra.ifra_addr.sin6_addr)); return (NULL); } /* * Allocate ifaddr structure, link into chain, etc. * If we are going to create a new address upon receiving a multicasted * RA, we need to impose a random delay before starting DAD. * [draft-ietf-ipv6-rfc2462bis-02.txt, Section 5.4.2] */ updateflags = 0; if (mcast) updateflags |= IN6_IFAUPDATE_DADDELAY; if ((error = in6_update_ifa(ifp, &ifra, NULL, updateflags)) != 0) { nd6log((LOG_ERR, "in6_ifadd: failed to make ifaddr %s on %s (errno=%d)\n", ip6_sprintf(ip6buf, &ifra.ifra_addr.sin6_addr), if_name(ifp), error)); return (NULL); /* ifaddr must not have been allocated. */ } ia = in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr); /* * XXXRW: Assumption of non-NULLness here might not be true with * fine-grained locking -- should we validate it? Or just return * earlier ifa rather than looking it up again? */ return (ia); /* this is always non-NULL and referenced. */ } /* * ia0 - corresponding public address */ int in6_tmpifadd(const struct in6_ifaddr *ia0, int forcegen, int delay) { struct ifnet *ifp = ia0->ia_ifa.ifa_ifp; struct in6_ifaddr *newia; struct in6_aliasreq ifra; int error; int trylimit = 3; /* XXX: adhoc value */ int updateflags; u_int32_t randid[2]; time_t vltime0, pltime0; in6_prepare_ifra(&ifra, &ia0->ia_addr.sin6_addr, &ia0->ia_prefixmask.sin6_addr); ifra.ifra_addr = ia0->ia_addr; /* XXX: do we need this ? */ /* clear the old IFID */ IN6_MASK_ADDR(&ifra.ifra_addr.sin6_addr, &ifra.ifra_prefixmask.sin6_addr); again: if (in6_get_tmpifid(ifp, (u_int8_t *)randid, (const u_int8_t *)&ia0->ia_addr.sin6_addr.s6_addr[8], forcegen)) { nd6log((LOG_NOTICE, "in6_tmpifadd: failed to find a good " "random IFID\n")); return (EINVAL); } ifra.ifra_addr.sin6_addr.s6_addr32[2] |= (randid[0] & ~(ifra.ifra_prefixmask.sin6_addr.s6_addr32[2])); ifra.ifra_addr.sin6_addr.s6_addr32[3] |= (randid[1] & ~(ifra.ifra_prefixmask.sin6_addr.s6_addr32[3])); /* * in6_get_tmpifid() quite likely provided a unique interface ID. * However, we may still have a chance to see collision, because * there may be a time lag between generation of the ID and generation * of the address. So, we'll do one more sanity check. */ if (in6_localip(&ifra.ifra_addr.sin6_addr) != 0) { if (trylimit-- > 0) { forcegen = 1; goto again; } /* Give up. Something strange should have happened. */ nd6log((LOG_NOTICE, "in6_tmpifadd: failed to " "find a unique random IFID\n")); return (EEXIST); } /* * The Valid Lifetime is the lower of the Valid Lifetime of the * public address or TEMP_VALID_LIFETIME. * The Preferred Lifetime is the lower of the Preferred Lifetime * of the public address or TEMP_PREFERRED_LIFETIME - * DESYNC_FACTOR. */ if (ia0->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) { vltime0 = IFA6_IS_INVALID(ia0) ? 0 : (ia0->ia6_lifetime.ia6t_vltime - (time_uptime - ia0->ia6_updatetime)); if (vltime0 > V_ip6_temp_valid_lifetime) vltime0 = V_ip6_temp_valid_lifetime; } else vltime0 = V_ip6_temp_valid_lifetime; if (ia0->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) { pltime0 = IFA6_IS_DEPRECATED(ia0) ? 0 : (ia0->ia6_lifetime.ia6t_pltime - (time_uptime - ia0->ia6_updatetime)); if (pltime0 > V_ip6_temp_preferred_lifetime - V_ip6_desync_factor){ pltime0 = V_ip6_temp_preferred_lifetime - V_ip6_desync_factor; } } else pltime0 = V_ip6_temp_preferred_lifetime - V_ip6_desync_factor; ifra.ifra_lifetime.ia6t_vltime = vltime0; ifra.ifra_lifetime.ia6t_pltime = pltime0; /* * A temporary address is created only if this calculated Preferred * Lifetime is greater than REGEN_ADVANCE time units. */ if (ifra.ifra_lifetime.ia6t_pltime <= V_ip6_temp_regen_advance) return (0); /* XXX: scope zone ID? */ ifra.ifra_flags |= (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY); /* allocate ifaddr structure, link into chain, etc. */ updateflags = 0; if (delay) updateflags |= IN6_IFAUPDATE_DADDELAY; if ((error = in6_update_ifa(ifp, &ifra, NULL, updateflags)) != 0) return (error); newia = in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr); if (newia == NULL) { /* XXX: can it happen? */ nd6log((LOG_ERR, "in6_tmpifadd: ifa update succeeded, but we got " "no ifaddr\n")); return (EINVAL); /* XXX */ } newia->ia6_ndpr = ia0->ia6_ndpr; newia->ia6_ndpr->ndpr_refcnt++; ifa_free(&newia->ia_ifa); /* * A newly added address might affect the status of other addresses. * XXX: when the temporary address is generated with a new public * address, the onlink check is redundant. However, it would be safe * to do the check explicitly everywhere a new address is generated, * and, in fact, we surely need the check when we create a new * temporary address due to deprecation of an old temporary address. */ pfxlist_onlink_check(); return (0); } static int in6_init_prefix_ltimes(struct nd_prefix *ndpr) { if (ndpr->ndpr_pltime == ND6_INFINITE_LIFETIME) ndpr->ndpr_preferred = 0; else ndpr->ndpr_preferred = time_uptime + ndpr->ndpr_pltime; if (ndpr->ndpr_vltime == ND6_INFINITE_LIFETIME) ndpr->ndpr_expire = 0; else ndpr->ndpr_expire = time_uptime + ndpr->ndpr_vltime; return 0; } static void in6_init_address_ltimes(struct nd_prefix *new, struct in6_addrlifetime *lt6) { /* init ia6t_expire */ if (lt6->ia6t_vltime == ND6_INFINITE_LIFETIME) lt6->ia6t_expire = 0; else { lt6->ia6t_expire = time_uptime; lt6->ia6t_expire += lt6->ia6t_vltime; } /* init ia6t_preferred */ if (lt6->ia6t_pltime == ND6_INFINITE_LIFETIME) lt6->ia6t_preferred = 0; else { lt6->ia6t_preferred = time_uptime; lt6->ia6t_preferred += lt6->ia6t_pltime; } } /* * Delete all the routing table entries that use the specified gateway. * XXX: this function causes search through all entries of routing table, so * it shouldn't be called when acting as a router. */ void rt6_flush(struct in6_addr *gateway, struct ifnet *ifp) { /* We'll care only link-local addresses */ if (!IN6_IS_ADDR_LINKLOCAL(gateway)) return; /* XXX Do we really need to walk any but the default FIB? */ rt_foreach_fib_walk_del(AF_INET6, rt6_deleteroute, (void *)gateway); } static int rt6_deleteroute(const struct rtentry *rt, void *arg) { #define SIN6(s) ((struct sockaddr_in6 *)s) struct in6_addr *gate = (struct in6_addr *)arg; if (rt->rt_gateway == NULL || rt->rt_gateway->sa_family != AF_INET6) return (0); if (!IN6_ARE_ADDR_EQUAL(gate, &SIN6(rt->rt_gateway)->sin6_addr)) { return (0); } /* * Do not delete a static route. * XXX: this seems to be a bit ad-hoc. Should we consider the * 'cloned' bit instead? */ if ((rt->rt_flags & RTF_STATIC) != 0) return (0); /* * We delete only host route. This means, in particular, we don't * delete default route. */ if ((rt->rt_flags & RTF_HOST) == 0) return (0); return (1); #undef SIN6 } int nd6_setdefaultiface(int ifindex) { int error = 0; if (ifindex < 0 || V_if_index < ifindex) return (EINVAL); if (ifindex != 0 && !ifnet_byindex(ifindex)) return (EINVAL); if (V_nd6_defifindex != ifindex) { V_nd6_defifindex = ifindex; if (V_nd6_defifindex > 0) V_nd6_defifp = ifnet_byindex(V_nd6_defifindex); else V_nd6_defifp = NULL; /* * Our current implementation assumes one-to-one maping between * interfaces and links, so it would be natural to use the * default interface as the default link. */ scope6_setdefault(V_nd6_defifp); } return (error); }