Index: head/sys/net/if_llatbl.h =================================================================== --- head/sys/net/if_llatbl.h +++ head/sys/net/if_llatbl.h @@ -79,6 +79,8 @@ int16_t ln_state; /* IPv6 has ND6_LLINFO_NOSTATE == -2 */ uint16_t ln_router; time_t ln_ntick; + time_t lle_remtime; /* Real time remaining */ + time_t lle_hittime; /* Time when r_skip_req was unset */ int lle_refcnt; LIST_ENTRY(llentry) lle_chain; /* chain of deleted items */ @@ -222,6 +224,8 @@ size_t lltable_drop_entry_queue(struct llentry *); void lltable_set_entry_addr(struct ifnet *ifp, struct llentry *lle, const char *lladdr); +int lltable_try_set_entry_addr(struct ifnet *ifp, struct llentry *lle, + const char *lladdr); struct llentry *lltable_alloc_entry(struct lltable *llt, u_int flags, const struct sockaddr *l4addr); Index: head/sys/net/if_llatbl.c =================================================================== --- head/sys/net/if_llatbl.c +++ head/sys/net/if_llatbl.c @@ -288,6 +288,47 @@ } /* + * Tries to update @lle link-level address. + * Since update requires AFDATA WLOCK, function + * drops @lle lock, acquires AFDATA lock and then acquires + * @lle lock to maintain lock order. + * + * Returns 1 on success. + */ +int +lltable_try_set_entry_addr(struct ifnet *ifp, struct llentry *lle, + const char *lladdr) +{ + + /* Perform real LLE update */ + /* use afdata WLOCK to update fields */ + LLE_WLOCK_ASSERT(lle); + LLE_ADDREF(lle); + LLE_WUNLOCK(lle); + IF_AFDATA_WLOCK(ifp); + LLE_WLOCK(lle); + + /* + * Since we droppped LLE lock, other thread might have deleted + * this lle. Check and return + */ + if ((lle->la_flags & LLE_DELETED) != 0) { + IF_AFDATA_WUNLOCK(ifp); + LLE_FREE_LOCKED(lle); + return (0); + } + + /* Update data */ + lltable_set_entry_addr(ifp, lle, lladdr); + + IF_AFDATA_WUNLOCK(ifp); + + LLE_REMREF(lle); + + return (1); +} + +/* * * Performes generic cleanup routines and frees lle. * Index: head/sys/netinet6/in6.c =================================================================== --- head/sys/netinet6/in6.c +++ head/sys/netinet6/in6.c @@ -2064,6 +2064,7 @@ LLE_WUNLOCK(lle); LLE_LOCK_DESTROY(lle); + LLE_REQ_DESTROY(lle); free(lle, M_LLTABLE); } @@ -2080,6 +2081,7 @@ lle->base.lle_refcnt = 1; lle->base.lle_free = in6_lltable_destroy_lle; LLE_LOCK_INIT(&lle->base); + LLE_REQ_INIT(&lle->base); callout_init(&lle->base.lle_timer, 1); return (&lle->base); @@ -2288,6 +2290,13 @@ if (lle == NULL) return (NULL); + KASSERT((flags & (LLE_UNLOCKED|LLE_EXCLUSIVE)) != + (LLE_UNLOCKED|LLE_EXCLUSIVE),("wrong lle request flags: 0x%X", + flags)); + + if (flags & LLE_UNLOCKED) + return (lle); + if (flags & LLE_EXCLUSIVE) LLE_WLOCK(lle); else @@ -2350,8 +2359,8 @@ sdl->sdl_index = ifp->if_index; sdl->sdl_type = ifp->if_type; bcopy(&lle->ll_addr, LLADDR(sdl), ifp->if_addrlen); - ndpc.rtm.rtm_rmx.rmx_expire = - lle->la_flags & LLE_STATIC ? 0 : lle->la_expire; + ndpc.rtm.rtm_rmx.rmx_expire = lle->la_expire + + lle->lle_remtime / hz; ndpc.rtm.rtm_flags |= (RTF_HOST | RTF_LLDATA); if (lle->la_flags & LLE_STATIC) ndpc.rtm.rtm_flags |= RTF_STATIC; Index: head/sys/netinet6/nd6.c =================================================================== --- head/sys/netinet6/nd6.c +++ head/sys/netinet6/nd6.c @@ -542,6 +542,107 @@ } /* + * Checks if we need to switch from STALE state. + * + * RFC 4861 requires switching from STALE to DELAY state + * on first packet matching entry, waiting V_nd6_delay and + * transition to PROBE state (if upper layer confirmation was + * not received). + * + * This code performs a bit differently: + * On packet hit we don't change state (but desired state + * can be guessed by control plane). However, after V_nd6_delay + * seconds code will transition to PROBE state (so DELAY state + * is kinda skipped in most situations). + * + * Typically, V_nd6_gctimer is bigger than V_nd6_delay, so + * we perform the following upon entering STALE state: + * + * 1) Arm timer to run each V_nd6_delay seconds to make sure that + * if packet was transmitted at the start of given interval, we + * would be able to switch to PROBE state in V_nd6_delay seconds + * as user expects. + * + * 2) Reschedule timer until original V_nd6_gctimer expires keeping + * lle in STALE state (remaining timer value stored in lle_remtime). + * + * 3) Reschedule timer if packet was transmitted less that V_nd6_delay + * seconds ago. + * + * Returns non-zero value if the entry is still STALE (storing + * the next timer interval in @pdelay). + * + * Returns zero value if original timer expired or we need to switch to + * PROBE (store that in @do_switch variable). + */ +static int +nd6_is_stale(struct llentry *lle, long *pdelay, int *do_switch) +{ + int nd_delay, nd_gctimer, r_skip_req; + time_t lle_hittime; + long delay; + + *do_switch = 0; + nd_gctimer = V_nd6_gctimer; + nd_delay = V_nd6_delay; + + LLE_REQ_LOCK(lle); + r_skip_req = lle->r_skip_req; + lle_hittime = lle->lle_hittime; + LLE_REQ_UNLOCK(lle); + + if (r_skip_req > 0) { + + /* + * Nonzero r_skip_req value was set upon entering + * STALE state. Since value was not changed, no + * packets were passed using this lle. Ask for + * timer reschedule and keep STALE state. + */ + delay = (long)(MIN(nd_gctimer, nd_delay)); + delay *= hz; + if (lle->lle_remtime > delay) + lle->lle_remtime -= delay; + else { + delay = lle->lle_remtime; + lle->lle_remtime = 0; + } + + if (delay == 0) { + + /* + * The original ng6_gctime timeout ended, + * no more rescheduling. + */ + return (0); + } + + *pdelay = delay; + return (1); + } + + /* + * Packet received. Verify timestamp + */ + delay = (long)(time_uptime - lle_hittime); + if (delay < nd_delay) { + + /* + * V_nd6_delay still not passed since the first + * hit in STALE state. + * Reshedule timer and return. + */ + *pdelay = (long)(nd_delay - delay) * hz; + return (1); + } + + /* Request switching to probe */ + *do_switch = 1; + return (0); +} + + +/* * Switch @lle state to new state optionally arming timers. * * Set noinline to be dtrace-friendly @@ -550,9 +651,11 @@ nd6_llinfo_setstate(struct llentry *lle, int newstate) { struct ifnet *ifp; - long delay; + int nd_gctimer, nd_delay; + long delay, remtime; delay = 0; + remtime = 0; switch (newstate) { case ND6_LLINFO_INCOMPLETE: @@ -566,7 +669,19 @@ } break; case ND6_LLINFO_STALE: - delay = (long)V_nd6_gctimer * hz; + + /* + * Notify fast path that we want to know if any packet + * is transmitted by setting r_skip_req. + */ + LLE_REQ_LOCK(lle); + lle->r_skip_req = 1; + LLE_REQ_UNLOCK(lle); + nd_delay = V_nd6_delay; + nd_gctimer = V_nd6_gctimer; + + delay = (long)(MIN(nd_gctimer, nd_delay)) * hz; + remtime = (long)nd_gctimer * hz - delay; break; case ND6_LLINFO_DELAY: lle->la_asked = 0; @@ -577,6 +692,7 @@ if (delay > 0) nd6_llinfo_settimer_locked(lle, delay); + lle->lle_remtime = remtime; lle->ln_state = newstate; } @@ -592,7 +708,8 @@ struct in6_addr *dst, *pdst, *psrc, src; struct ifnet *ifp; struct nd_ifinfo *ndi = NULL; - int send_ns; + int do_switch, send_ns; + long delay; KASSERT(arg != NULL, ("%s: arg NULL", __func__)); ln = (struct llentry *)arg; @@ -680,13 +797,35 @@ break; case ND6_LLINFO_STALE: - /* Garbage Collection(RFC 2461 5.3) */ - if (!ND6_LLINFO_PERMANENT(ln)) { - EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_EXPIRED); - nd6_free(ln, 1); - ln = NULL; + if (nd6_is_stale(ln, &delay, &do_switch) != 0) { + + /* + * No packet has used this entry and GC timeout + * has not been passed. Reshedule timer and + * return. + */ + nd6_llinfo_settimer_locked(ln, delay); + break; } - break; + + if (do_switch == 0) { + + /* + * GC timer has ended and entry hasn't been used. + * Run Garbage collector (RFC 4861, 5.3) + */ + if (!ND6_LLINFO_PERMANENT(ln)) { + EVENTHANDLER_INVOKE(lle_event, ln, + LLENTRY_EXPIRED); + nd6_free(ln, 1); + ln = NULL; + } + break; + } + + /* Entry has been used AND delay timer has ended. */ + + /* FALLTHROUGH */ case ND6_LLINFO_DELAY: if (ndi && (ndi->flags & ND6_IFF_PERFORMNUD) != 0) { @@ -1796,7 +1935,11 @@ * Record source link-layer address * XXX is it dependent to ifp->if_type? */ - lltable_set_entry_addr(ifp, ln, lladdr); + if (lltable_try_set_entry_addr(ifp, ln, lladdr) == 0) { + /* Entry was deleted */ + return; + } + nd6_llinfo_setstate(ln, ND6_LLINFO_STALE); EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED); @@ -1996,31 +2139,25 @@ } IF_AFDATA_RLOCK(ifp); - ln = nd6_lookup(&dst6->sin6_addr, 0, ifp); - IF_AFDATA_RUNLOCK(ifp); - - /* - * Perform fast path for the following cases: - * 1) lle state is REACHABLE - * 2) lle state is DELAY (NS message sent) - * - * Every other case involves lle modification, so we handle - * them separately. - */ - if (ln == NULL || (ln->ln_state != ND6_LLINFO_REACHABLE && - ln->ln_state != ND6_LLINFO_DELAY)) { - /* Fall back to slow processing path */ - if (ln != NULL) - LLE_RUNLOCK(ln); - return (nd6_resolve_slow(ifp, m, dst6, desten, pflags)); + ln = nd6_lookup(&dst6->sin6_addr, LLE_UNLOCKED, ifp); + if (ln != NULL && (ln->r_flags & RLLE_VALID) != 0) { + /* Entry found, let's copy lle info */ + bcopy(&ln->ll_addr, desten, ifp->if_addrlen); + if (pflags != NULL) + *pflags = LLE_VALID | (ln->r_flags & RLLE_IFADDR); + /* Check if we have feedback request from nd6 timer */ + if (ln->r_skip_req != 0) { + LLE_REQ_LOCK(ln); + ln->r_skip_req = 0; /* Notify that entry was used */ + ln->lle_hittime = time_uptime; + LLE_REQ_UNLOCK(ln); + } + IF_AFDATA_RUNLOCK(ifp); + return (0); } + IF_AFDATA_RUNLOCK(ifp); - - bcopy(&ln->ll_addr, desten, ifp->if_addrlen); - if (pflags != NULL) - *pflags = ln->la_flags; - LLE_RUNLOCK(ln); - return (0); + return (nd6_resolve_slow(ifp, m, dst6, desten, pflags)); } Index: head/sys/netinet6/nd6_nbr.c =================================================================== --- head/sys/netinet6/nd6_nbr.c +++ head/sys/netinet6/nd6_nbr.c @@ -765,7 +765,10 @@ /* * Record link-layer address, and update the state. */ - lltable_set_entry_addr(ifp, ln, lladdr); + if (lltable_try_set_entry_addr(ifp, ln, lladdr) == 0) { + ln = NULL; + goto freeit; + } EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED); if (is_solicited) nd6_llinfo_setstate(ln, ND6_LLINFO_REACHABLE); @@ -831,7 +834,12 @@ * Update link-local address, if any. */ if (lladdr != NULL) { - lltable_set_entry_addr(ifp, ln, lladdr); + int ret; + ret = lltable_try_set_entry_addr(ifp, ln,lladdr); + if (ret == 0) { + ln = NULL; + goto freeit; + } EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED); }