Index: sys/conf/files =================================================================== --- sys/conf/files +++ sys/conf/files @@ -3649,6 +3649,7 @@ netinet/in_gif.c optional gif inet | netgraph_gif inet netinet/ip_gre.c optional gre inet netinet/ip_id.c optional inet +netinet/in_lle.c optional inet netinet/in_mcast.c optional inet netinet/in_pcb.c optional inet | inet6 netinet/in_pcbgroup.c optional inet pcbgroup | inet6 pcbgroup @@ -3716,6 +3717,7 @@ netinet6/in6_fib.c optional inet6 netinet6/in6_gif.c optional gif inet6 | netgraph_gif inet6 netinet6/in6_ifattach.c optional inet6 +netinet6/in6_lle.c optional inet6 netinet6/in6_mcast.c optional inet6 netinet6/in6_pcb.c optional inet6 netinet6/in6_pcbgroup.c optional inet6 pcbgroup Index: sys/net/if_llatbl.h =================================================================== --- sys/net/if_llatbl.h +++ sys/net/if_llatbl.h @@ -38,16 +38,10 @@ struct rt_msghdr; struct rt_addrinfo; +struct lltable; struct llentry; LIST_HEAD(llentries, llentry); -extern struct rwlock lltable_rwlock; -#define LLTABLE_RLOCK() rw_rlock(&lltable_rwlock) -#define LLTABLE_RUNLOCK() rw_runlock(&lltable_rwlock) -#define LLTABLE_WLOCK() rw_wlock(&lltable_rwlock) -#define LLTABLE_WUNLOCK() rw_wunlock(&lltable_rwlock) -#define LLTABLE_LOCK_ASSERT() rw_assert(&lltable_rwlock, RA_LOCKED) - #define LLE_MAX_LINKHDR 24 /* Full IB header */ /* * Code referencing llentry must at least hold @@ -98,12 +92,6 @@ #define LLE_LOCK_DESTROY(lle) rw_destroy(&(lle)->lle_lock) #define LLE_WLOCK_ASSERT(lle) rw_assert(&(lle)->lle_lock, RA_WLOCKED) -#define LLE_REQ_INIT(lle) mtx_init(&(lle)->req_mtx, "lle req", \ - NULL, MTX_DEF) -#define LLE_REQ_DESTROY(lle) mtx_destroy(&(lle)->req_mtx) -#define LLE_REQ_LOCK(lle) mtx_lock(&(lle)->req_mtx) -#define LLE_REQ_UNLOCK(lle) mtx_unlock(&(lle)->req_mtx) - #define LLE_IS_VALID(lle) (((lle) != NULL) && ((lle) != (void *)-1)) #define LLE_ADDREF(lle) do { \ @@ -139,51 +127,6 @@ } while (0) -typedef struct llentry *(llt_lookup_t)(struct lltable *, u_int flags, - const struct sockaddr *l3addr); -typedef struct llentry *(llt_alloc_t)(struct lltable *, u_int flags, - const struct sockaddr *l3addr); -typedef void (llt_delete_t)(struct lltable *, struct llentry *); -typedef void (llt_prefix_free_t)(struct lltable *, - const struct sockaddr *addr, const struct sockaddr *mask, u_int flags); -typedef int (llt_dump_entry_t)(struct lltable *, struct llentry *, - struct sysctl_req *); -typedef uint32_t (llt_hash_t)(const struct llentry *, uint32_t); -typedef int (llt_match_prefix_t)(const struct sockaddr *, - const struct sockaddr *, u_int, struct llentry *); -typedef void (llt_free_entry_t)(struct lltable *, struct llentry *); -typedef void (llt_fill_sa_entry_t)(const struct llentry *, struct sockaddr *); -typedef void (llt_free_tbl_t)(struct lltable *); -typedef void (llt_link_entry_t)(struct lltable *, struct llentry *); -typedef void (llt_unlink_entry_t)(struct llentry *); - -typedef int (llt_foreach_cb_t)(struct lltable *, struct llentry *, void *); -typedef int (llt_foreach_entry_t)(struct lltable *, llt_foreach_cb_t *, void *); - -struct lltable { - SLIST_ENTRY(lltable) llt_link; - int llt_af; - int llt_hsize; - struct llentries *lle_head; - struct ifnet *llt_ifp; - - llt_lookup_t *llt_lookup; - llt_alloc_t *llt_alloc_entry; - llt_delete_t *llt_delete_entry; - llt_prefix_free_t *llt_prefix_free; - llt_dump_entry_t *llt_dump_entry; - llt_hash_t *llt_hash; - llt_match_prefix_t *llt_match_prefix; - llt_free_entry_t *llt_free_entry; - llt_foreach_entry_t *llt_foreach_entry; - llt_link_entry_t *llt_link_entry; - llt_unlink_entry_t *llt_unlink_entry; - llt_fill_sa_entry_t *llt_fill_sa_entry; - llt_free_tbl_t *llt_free_tbl; -}; - -MALLOC_DECLARE(M_LLTABLE); - /* * LLentry flags */ @@ -204,14 +147,9 @@ #define RLLE_VALID 0x0001 /* entry is valid */ #define RLLE_IFADDR LLE_IFADDR /* entry is ifaddr */ -#define LLATBL_HASH(key, mask) \ - (((((((key >> 8) ^ key) >> 8) ^ key) >> 8) ^ key) & mask) - -struct lltable *lltable_allocate_htbl(uint32_t hsize); -void lltable_free(struct lltable *); -void lltable_link(struct lltable *llt); void lltable_prefix_free(int, struct sockaddr *, struct sockaddr *, u_int); +void lltable_free(struct lltable *); #if 0 void lltable_drain(int); #endif @@ -222,7 +160,6 @@ struct sockaddr_storage *); /* helper functions */ -size_t lltable_drop_entry_queue(struct llentry *); void lltable_set_entry_addr(struct ifnet *ifp, struct llentry *lle, const char *linkhdr, size_t linkhdrsize, int lladdr_off); int lltable_try_set_entry_addr(struct ifnet *ifp, struct llentry *lle, @@ -242,18 +179,6 @@ struct ifnet *lltable_get_ifp(const struct lltable *llt); int lltable_get_af(const struct lltable *llt); -int lltable_foreach_lle(struct lltable *llt, llt_foreach_cb_t *f, - void *farg); -/* - * Generic link layer address lookup function. - */ -static __inline struct llentry * -lla_lookup(struct lltable *llt, u_int flags, const struct sockaddr *l3addr) -{ - - return (llt->llt_lookup(llt, flags, l3addr)); -} - int lla_rt_output(struct rt_msghdr *, struct rt_addrinfo *); #include Index: sys/net/if_llatbl.c =================================================================== --- sys/net/if_llatbl.c +++ sys/net/if_llatbl.c @@ -51,6 +51,7 @@ #include #include +#include #include #include #include Index: sys/net/if_llatbl_var.h =================================================================== --- /dev/null +++ sys/net/if_llatbl_var.h @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2004 Luigi Rizzo, Alessandro Cerri. All rights reserved. + * Copyright (c) 2004-2008 Qing Li. All rights reserved. + * Copyright (c) 2008 Kip Macy. All rights reserved. + * Copyright (c) 2015-2016 Alexander V. Chernikov. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include +__FBSDID("$FreeBSD$"); + +#ifndef _NET_IF_LLATBL_VAR_H_ +#define _NET_IF_LLATBL_VAR_H_ + +/* LLE request mutex macro */ +#define LLE_REQ_INIT(lle) mtx_init(&(lle)->req_mtx, "lle req", \ + NULL, MTX_DEF) +#define LLE_REQ_DESTROY(lle) mtx_destroy(&(lle)->req_mtx) +#define LLE_REQ_LOCK(lle) mtx_lock(&(lle)->req_mtx) +#define LLE_REQ_UNLOCK(lle) mtx_unlock(&(lle)->req_mtx) + +extern struct rwlock lltable_rwlock; +#define LLTABLE_RLOCK() rw_rlock(&lltable_rwlock) +#define LLTABLE_RUNLOCK() rw_runlock(&lltable_rwlock) +#define LLTABLE_WLOCK() rw_wlock(&lltable_rwlock) +#define LLTABLE_WUNLOCK() rw_wunlock(&lltable_rwlock) +#define LLTABLE_LOCK_ASSERT() rw_assert(&lltable_rwlock, RA_LOCKED) + +typedef struct llentry *(llt_lookup_t)(struct lltable *, u_int flags, + const struct sockaddr *l3addr); +typedef struct llentry *(llt_alloc_t)(struct lltable *, u_int flags, + const struct sockaddr *l3addr); +typedef void (llt_delete_t)(struct lltable *, struct llentry *); +typedef void (llt_prefix_free_t)(struct lltable *, + const struct sockaddr *addr, const struct sockaddr *mask, u_int flags); +typedef int (llt_dump_entry_t)(struct lltable *, struct llentry *, + struct sysctl_req *); +typedef uint32_t (llt_hash_t)(const struct llentry *, uint32_t); +typedef int (llt_match_prefix_t)(const struct sockaddr *, + const struct sockaddr *, u_int, struct llentry *); +typedef void (llt_free_entry_t)(struct lltable *, struct llentry *); +typedef void (llt_fill_sa_entry_t)(const struct llentry *, struct sockaddr *); +typedef void (llt_free_tbl_t)(struct lltable *); +typedef void (llt_link_entry_t)(struct lltable *, struct llentry *); +typedef void (llt_unlink_entry_t)(struct llentry *); + +typedef int (llt_foreach_cb_t)(struct lltable *, struct llentry *, void *); +typedef int (llt_foreach_entry_t)(struct lltable *, llt_foreach_cb_t *, void *); + +struct lltable { + SLIST_ENTRY(lltable) llt_link; + int llt_af; + int llt_hsize; + struct llentries *lle_head; + struct ifnet *llt_ifp; + + llt_lookup_t *llt_lookup; + llt_alloc_t *llt_alloc_entry; + llt_delete_t *llt_delete_entry; + llt_prefix_free_t *llt_prefix_free; + llt_dump_entry_t *llt_dump_entry; + llt_hash_t *llt_hash; + llt_match_prefix_t *llt_match_prefix; + llt_free_entry_t *llt_free_entry; + llt_foreach_entry_t *llt_foreach_entry; + llt_link_entry_t *llt_link_entry; + llt_unlink_entry_t *llt_unlink_entry; + llt_fill_sa_entry_t *llt_fill_sa_entry; + llt_free_tbl_t *llt_free_tbl; +}; + +struct lltable *lltable_allocate_htbl(uint32_t hsize); +void lltable_link(struct lltable *llt); + +/* + * Generic link layer address lookup function. + */ +static __inline struct llentry * +lla_lookup(struct lltable *llt, u_int flags, const struct sockaddr *l3addr) +{ + + return (llt->llt_lookup(llt, flags, l3addr)); +} + +/* helper functions */ +size_t lltable_drop_entry_queue(struct llentry *); +int lltable_foreach_lle(struct lltable *llt, llt_foreach_cb_t *f, void *farg); + +MALLOC_DECLARE(M_LLTABLE); + +#define LLATBL_HASH(key, mask) \ + (((((((key >> 8) ^ key) >> 8) ^ key) >> 8) ^ key) & mask) + + + +#endif + Index: sys/netinet/if_ether.h =================================================================== --- sys/netinet/if_ether.h +++ sys/netinet/if_ether.h @@ -112,6 +112,21 @@ extern u_char ether_ipmulticast_min[ETHER_ADDR_LEN]; extern u_char ether_ipmulticast_max[ETHER_ADDR_LEN]; +VNET_DECLARE(int, arpt_rexmit); +VNET_DECLARE(int, arpt_keep); +VNET_DECLARE(int, arpt_down); +VNET_DECLARE(int, arp_maxtries); +VNET_DECLARE(int, arp_maxhold); +VNET_DECLARE(int, arp_proxyall); + +#define V_arpt_rexmit VNET(arpt_rexmit) +#define V_arpt_keep VNET(arpt_keep) +#define V_arp_maxtries VNET(arp_maxtries) +#define V_arp_maxhold VNET(arp_maxhold) +#define V_arpt_down VNET(arpt_down) +#define V_arp_proxyall VNET(arp_proxyall) + + struct ifaddr; int arpresolve_addr(struct ifnet *ifp, int flags, Index: sys/netinet/if_ether.c =================================================================== --- sys/netinet/if_ether.c +++ sys/netinet/if_ether.c @@ -66,6 +66,7 @@ #include #include #include +#include #include #ifdef INET #include @@ -79,26 +80,16 @@ static int arp_curpps; static int arp_maxpps = 1; -/* Simple ARP state machine */ -enum arp_llinfo_state { - ARP_LLINFO_INCOMPLETE = 0, /* No LLE data */ - ARP_LLINFO_REACHABLE, /* LLE is valid */ - ARP_LLINFO_VERIFY, /* LLE is valid, need refresh */ - ARP_LLINFO_DELETED, /* LLE is deleted */ -}; - SYSCTL_DECL(_net_link_ether); static SYSCTL_NODE(_net_link_ether, PF_INET, inet, CTLFLAG_RW, 0, ""); static SYSCTL_NODE(_net_link_ether, PF_ARP, arp, CTLFLAG_RW, 0, ""); /* timer values */ -static VNET_DEFINE(int, arpt_keep) = (20*60); /* once resolved, good for 20 - * minutes */ -static VNET_DEFINE(int, arp_maxtries) = 5; -static VNET_DEFINE(int, arp_proxyall) = 0; -static VNET_DEFINE(int, arpt_down) = 20; /* keep incomplete entries for - * 20 seconds */ -static VNET_DEFINE(int, arpt_rexmit) = 1; /* retransmit arp entries, sec*/ +VNET_DEFINE(int, arpt_keep) = (20*60); /* once resolved, good for 20 minutes */ +VNET_DEFINE(int, arp_maxtries) = 5; +VNET_DEFINE(int, arp_proxyall) = 0; +VNET_DEFINE(int, arpt_down) = 20; /* keep incomplete entries for 20 sec */ +VNET_DEFINE(int, arpt_rexmit) = 1; /* retransmit arp entries, sec*/ VNET_PCPUSTAT_DEFINE(struct arpstat, arpstat); /* ARP statistics, see if_arp.h */ VNET_PCPUSTAT_SYSINIT(arpstat); @@ -106,14 +97,7 @@ VNET_PCPUSTAT_SYSUNINIT(arpstat); #endif /* VIMAGE */ -static VNET_DEFINE(int, arp_maxhold) = 1; - -#define V_arpt_keep VNET(arpt_keep) -#define V_arpt_down VNET(arpt_down) -#define V_arpt_rexmit VNET(arpt_rexmit) -#define V_arp_maxtries VNET(arp_maxtries) -#define V_arp_proxyall VNET(arp_proxyall) -#define V_arp_maxhold VNET(arp_maxhold) +VNET_DEFINE(int, arp_maxhold) = 1; SYSCTL_INT(_net_link_ether_inet, OID_AUTO, max_age, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(arpt_keep), 0, @@ -145,17 +129,12 @@ static void arp_init(void); static void arpintr(struct mbuf *); -static void arptimer(void *); #ifdef INET static void in_arpinput(struct mbuf *); #endif static void arp_check_update_lle(struct arphdr *ah, struct in_addr isaddr, struct ifnet *ifp, int bridged, struct llentry *la); -static void arp_mark_lle_reachable(struct llentry *la); -static void arp_iflladdr(void *arg __unused, struct ifnet *ifp); - -static eventhandler_tag iflladdr_tag; static const struct netisr_handler arp_nh = { .nh_name = "arp", @@ -165,123 +144,6 @@ }; /* - * Timeout routine. Age arp_tab entries periodically. - */ -static void -arptimer(void *arg) -{ - struct llentry *lle = (struct llentry *)arg; - struct ifnet *ifp; - int r_skip_req; - - if (lle->la_flags & LLE_STATIC) { - return; - } - LLE_WLOCK(lle); - if (callout_pending(&lle->lle_timer)) { - /* - * Here we are a bit odd here in the treatment of - * active/pending. If the pending bit is set, it got - * rescheduled before I ran. The active - * bit we ignore, since if it was stopped - * in ll_tablefree() and was currently running - * it would have return 0 so the code would - * not have deleted it since the callout could - * not be stopped so we want to go through - * with the delete here now. If the callout - * was restarted, the pending bit will be back on and - * we just want to bail since the callout_reset would - * return 1 and our reference would have been removed - * by arpresolve() below. - */ - LLE_WUNLOCK(lle); - return; - } - ifp = lle->lle_tbl->llt_ifp; - CURVNET_SET(ifp->if_vnet); - - switch (lle->ln_state) { - case ARP_LLINFO_REACHABLE: - - /* - * Expiration time is approaching. - * Let's try to refresh entry if it is still - * in use. - * - * Set r_skip_req to get feedback from - * fast path. Change state and re-schedule - * ourselves. - */ - LLE_REQ_LOCK(lle); - lle->r_skip_req = 1; - LLE_REQ_UNLOCK(lle); - lle->ln_state = ARP_LLINFO_VERIFY; - callout_schedule(&lle->lle_timer, hz * V_arpt_rexmit); - LLE_WUNLOCK(lle); - CURVNET_RESTORE(); - return; - case ARP_LLINFO_VERIFY: - LLE_REQ_LOCK(lle); - r_skip_req = lle->r_skip_req; - LLE_REQ_UNLOCK(lle); - - if (r_skip_req == 0 && lle->la_preempt > 0) { - /* Entry was used, issue refresh request */ - struct in_addr dst; - dst = lle->r_l3addr.addr4; - lle->la_preempt--; - callout_schedule(&lle->lle_timer, hz * V_arpt_rexmit); - LLE_WUNLOCK(lle); - arprequest(ifp, NULL, &dst, NULL); - CURVNET_RESTORE(); - return; - } - /* Nothing happened. Reschedule if not too late */ - if (lle->la_expire > time_uptime) { - callout_schedule(&lle->lle_timer, hz * V_arpt_rexmit); - LLE_WUNLOCK(lle); - CURVNET_RESTORE(); - return; - } - break; - case ARP_LLINFO_INCOMPLETE: - case ARP_LLINFO_DELETED: - break; - } - - if ((lle->la_flags & LLE_DELETED) == 0) { - int evt; - - if (lle->la_flags & LLE_VALID) - evt = LLENTRY_EXPIRED; - else - evt = LLENTRY_TIMEDOUT; - EVENTHANDLER_INVOKE(lle_event, lle, evt); - } - - callout_stop(&lle->lle_timer); - - /* XXX: LOR avoidance. We still have ref on lle. */ - LLE_WUNLOCK(lle); - IF_AFDATA_LOCK(ifp); - LLE_WLOCK(lle); - - /* Guard against race with other llentry_free(). */ - if (lle->la_flags & LLE_LINKED) { - LLE_REMREF(lle); - lltable_unlink_entry(lle->lle_tbl, lle); - } - IF_AFDATA_UNLOCK(ifp); - - size_t pkts_dropped = llentry_free(lle); - - ARPSTAT_ADD(dropped, pkts_dropped); - ARPSTAT_INC(timeouts); - - CURVNET_RESTORE(); -} - -/* * Stores link-layer header for @ifp in format suitable for if_output() * into buffer @buf. Resulting header length is stored in @bufsize. * @@ -406,219 +268,6 @@ ARPSTAT_INC(txrequests); } - -/* - * Resolve an IP address into an ethernet address - heavy version. - * Used internally by arpresolve(). - * We have already checked than we can't use existing lle without - * modification so we have to acquire LLE_EXCLUSIVE lle lock. - * - * On success, desten and flags are filled in and the function returns 0; - * If the packet must be held pending resolution, we return EWOULDBLOCK - * On other errors, we return the corresponding error code. - * Note that m_freem() handles NULL. - */ -static int -arpresolve_full(struct ifnet *ifp, int is_gw, int flags, struct mbuf *m, - const struct sockaddr *dst, u_char *desten, uint32_t *pflags) -{ - struct llentry *la = NULL, *la_tmp; - struct mbuf *curr = NULL; - struct mbuf *next = NULL; - int error, renew; - char *lladdr; - int ll_len; - - if (pflags != NULL) - *pflags = 0; - - if ((flags & LLE_CREATE) == 0) { - IF_AFDATA_RLOCK(ifp); - la = lla_lookup(LLTABLE(ifp), LLE_EXCLUSIVE, dst); - IF_AFDATA_RUNLOCK(ifp); - } - if (la == NULL && (ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) == 0) { - la = lltable_alloc_entry(LLTABLE(ifp), 0, dst); - if (la == NULL) { - log(LOG_DEBUG, - "arpresolve: can't allocate llinfo for %s on %s\n", - inet_ntoa(SIN(dst)->sin_addr), if_name(ifp)); - m_freem(m); - return (EINVAL); - } - - IF_AFDATA_WLOCK(ifp); - LLE_WLOCK(la); - la_tmp = lla_lookup(LLTABLE(ifp), LLE_EXCLUSIVE, dst); - /* Prefer ANY existing lle over newly-created one */ - if (la_tmp == NULL) - lltable_link_entry(LLTABLE(ifp), la); - IF_AFDATA_WUNLOCK(ifp); - if (la_tmp != NULL) { - lltable_free_entry(LLTABLE(ifp), la); - la = la_tmp; - } - } - if (la == NULL) { - m_freem(m); - return (EINVAL); - } - - if ((la->la_flags & LLE_VALID) && - ((la->la_flags & LLE_STATIC) || la->la_expire > time_uptime)) { - if (flags & LLE_ADDRONLY) { - lladdr = la->ll_addr; - ll_len = ifp->if_addrlen; - } else { - lladdr = la->r_linkdata; - ll_len = la->r_hdrlen; - } - bcopy(lladdr, desten, ll_len); - - /* Check if we have feedback request from arptimer() */ - if (la->r_skip_req != 0) { - LLE_REQ_LOCK(la); - la->r_skip_req = 0; /* Notify that entry was used */ - LLE_REQ_UNLOCK(la); - } - if (pflags != NULL) - *pflags = la->la_flags & (LLE_VALID|LLE_IFADDR); - LLE_WUNLOCK(la); - return (0); - } - - renew = (la->la_asked == 0 || la->la_expire != time_uptime); - /* - * There is an arptab entry, but no ethernet address - * response yet. Add the mbuf to the list, dropping - * the oldest packet if we have exceeded the system - * setting. - */ - if (m != NULL) { - if (la->la_numheld >= V_arp_maxhold) { - if (la->la_hold != NULL) { - next = la->la_hold->m_nextpkt; - m_freem(la->la_hold); - la->la_hold = next; - la->la_numheld--; - ARPSTAT_INC(dropped); - } - } - if (la->la_hold != NULL) { - curr = la->la_hold; - while (curr->m_nextpkt != NULL) - curr = curr->m_nextpkt; - curr->m_nextpkt = m; - } else - la->la_hold = m; - la->la_numheld++; - } - /* - * Return EWOULDBLOCK if we have tried less than arp_maxtries. It - * will be masked by ether_output(). Return EHOSTDOWN/EHOSTUNREACH - * if we have already sent arp_maxtries ARP requests. Retransmit the - * ARP request, but not faster than one request per second. - */ - if (la->la_asked < V_arp_maxtries) - error = EWOULDBLOCK; /* First request. */ - else - error = is_gw != 0 ? EHOSTUNREACH : EHOSTDOWN; - - if (renew) { - int canceled; - - LLE_ADDREF(la); - la->la_expire = time_uptime; - canceled = callout_reset(&la->lle_timer, hz * V_arpt_down, - arptimer, la); - if (canceled) - LLE_REMREF(la); - la->la_asked++; - LLE_WUNLOCK(la); - arprequest(ifp, NULL, &SIN(dst)->sin_addr, NULL); - return (error); - } - - LLE_WUNLOCK(la); - return (error); -} - -/* - * Resolve an IP address into an ethernet address. - */ -int -arpresolve_addr(struct ifnet *ifp, int flags, const struct sockaddr *dst, - char *desten, uint32_t *pflags) -{ - int error; - - flags |= LLE_ADDRONLY; - error = arpresolve_full(ifp, 0, flags, NULL, dst, desten, pflags); - return (error); -} - - -/* - * Lookups link header based on an IP address. - * On input: - * ifp is the interface we use - * is_gw != 0 if @dst represents gateway to some destination - * m is the mbuf. May be NULL if we don't have a packet. - * dst is the next hop, - * desten is the storage to put LL header. - * flags returns subset of lle flags: LLE_VALID | LLE_IFADDR - * - * On success, full/partial link header and flags are filled in and - * the function returns 0. - * If the packet must be held pending resolution, we return EWOULDBLOCK - * On other errors, we return the corresponding error code. - * Note that m_freem() handles NULL. - */ -int -arpresolve(struct ifnet *ifp, int is_gw, struct mbuf *m, - const struct sockaddr *dst, u_char *desten, uint32_t *pflags) -{ - struct llentry *la = 0; - - if (pflags != NULL) - *pflags = 0; - - if (m != NULL) { - if (m->m_flags & M_BCAST) { - /* broadcast */ - (void)memcpy(desten, - ifp->if_broadcastaddr, ifp->if_addrlen); - return (0); - } - if (m->m_flags & M_MCAST) { - /* multicast */ - ETHER_MAP_IP_MULTICAST(&SIN(dst)->sin_addr, desten); - return (0); - } - } - - IF_AFDATA_RLOCK(ifp); - la = lla_lookup(LLTABLE(ifp), LLE_UNLOCKED, dst); - if (la != NULL && (la->r_flags & RLLE_VALID) != 0) { - /* Entry found, let's copy lle info */ - bcopy(la->r_linkdata, desten, la->r_hdrlen); - if (pflags != NULL) - *pflags = LLE_VALID | (la->r_flags & RLLE_IFADDR); - /* Check if we have feedback request from arptimer() */ - if (la->r_skip_req != 0) { - LLE_REQ_LOCK(la); - la->r_skip_req = 0; /* Notify that entry was used */ - LLE_REQ_UNLOCK(la); - } - IF_AFDATA_RUNLOCK(ifp); - return (0); - } - IF_AFDATA_RUNLOCK(ifp); - - return (arpresolve_full(ifp, is_gw, la == NULL ? LLE_CREATE : 0, m, dst, - desten, pflags)); -} - /* * Common length and type checks are done here, * then the protocol-specific routine is called. @@ -1207,88 +856,6 @@ LLE_WUNLOCK(la); } -static void -arp_mark_lle_reachable(struct llentry *la) -{ - int canceled, wtime; - - LLE_WLOCK_ASSERT(la); - - la->ln_state = ARP_LLINFO_REACHABLE; - EVENTHANDLER_INVOKE(lle_event, la, LLENTRY_RESOLVED); - - if (!(la->la_flags & LLE_STATIC)) { - LLE_ADDREF(la); - la->la_expire = time_uptime + V_arpt_keep; - wtime = V_arpt_keep - V_arp_maxtries * V_arpt_rexmit; - if (wtime < 0) - wtime = V_arpt_keep; - canceled = callout_reset(&la->lle_timer, - hz * wtime, arptimer, la); - if (canceled) - LLE_REMREF(la); - } - la->la_asked = 0; - la->la_preempt = V_arp_maxtries; -} - -/* - * Add pernament link-layer record for given interface address. - */ -static __noinline void -arp_add_ifa_lle(struct ifnet *ifp, const struct sockaddr *dst) -{ - struct llentry *lle, *lle_tmp; - - /* - * Interface address LLE record is considered static - * because kernel code relies on LLE_STATIC flag to check - * if these entries can be rewriten by arp updates. - */ - lle = lltable_alloc_entry(LLTABLE(ifp), LLE_IFADDR | LLE_STATIC, dst); - if (lle == NULL) { - log(LOG_INFO, "arp_ifinit: cannot create arp " - "entry for interface address\n"); - return; - } - - IF_AFDATA_WLOCK(ifp); - LLE_WLOCK(lle); - /* Unlink any entry if exists */ - lle_tmp = lla_lookup(LLTABLE(ifp), LLE_EXCLUSIVE, dst); - if (lle_tmp != NULL) - lltable_unlink_entry(LLTABLE(ifp), lle_tmp); - - lltable_link_entry(LLTABLE(ifp), lle); - IF_AFDATA_WUNLOCK(ifp); - - if (lle_tmp != NULL) - EVENTHANDLER_INVOKE(lle_event, lle_tmp, LLENTRY_EXPIRED); - - EVENTHANDLER_INVOKE(lle_event, lle, LLENTRY_RESOLVED); - LLE_WUNLOCK(lle); - if (lle_tmp != NULL) - lltable_free_entry(LLTABLE(ifp), lle_tmp); -} - -void -arp_ifinit(struct ifnet *ifp, struct ifaddr *ifa) -{ - const struct sockaddr_in *dst_in; - const struct sockaddr *dst; - - if (ifa->ifa_carp != NULL) - return; - - dst = ifa->ifa_addr; - dst_in = (const struct sockaddr_in *)dst; - - if (ntohl(dst_in->sin_addr.s_addr) == INADDR_ANY) - return; - arp_announce_ifaddr(ifp, dst_in->sin_addr, IF_LLADDR(ifp)); - - arp_add_ifa_lle(ifp, dst); -} void arp_announce_ifaddr(struct ifnet *ifp, struct in_addr addr, u_char *enaddr) @@ -1298,41 +865,11 @@ arprequest(ifp, &addr, &addr, enaddr); } -/* - * Sends gratuitous ARPs for each ifaddr to notify other - * nodes about the address change. - */ -static __noinline void -arp_handle_ifllchange(struct ifnet *ifp) -{ - struct ifaddr *ifa; - - TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { - if (ifa->ifa_addr->sa_family == AF_INET) - arp_ifinit(ifp, ifa); - } -} - -/* - * A handler for interface link layer address change event. - */ -static void -arp_iflladdr(void *arg __unused, struct ifnet *ifp) -{ - - lltable_update_ifaddr(LLTABLE(ifp)); - - if ((ifp->if_flags & IFF_UP) != 0) - arp_handle_ifllchange(ifp); -} - static void arp_init(void) { netisr_register(&arp_nh); - if (IS_DEFAULT_VNET(curvnet)) - iflladdr_tag = EVENTHANDLER_REGISTER(iflladdr_event, - arp_iflladdr, NULL, EVENTHANDLER_PRI_ANY); + lle4_init(); } SYSINIT(arp, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, arp_init, 0); Index: sys/netinet/in.c =================================================================== --- sys/netinet/in.c +++ sys/netinet/in.c @@ -994,409 +994,6 @@ IN_MULTI_UNLOCK(); } -struct in_llentry { - struct llentry base; -}; - -#define IN_LLTBL_DEFAULT_HSIZE 32 -#define IN_LLTBL_HASH(k, h) \ - (((((((k >> 8) ^ k) >> 8) ^ k) >> 8) ^ k) & ((h) - 1)) - -/* - * Do actual deallocation of @lle. - * Called by LLE_FREE_LOCKED when number of references - * drops to zero. - */ -static void -in_lltable_destroy_lle(struct llentry *lle) -{ - - LLE_WUNLOCK(lle); - LLE_LOCK_DESTROY(lle); - LLE_REQ_DESTROY(lle); - free(lle, M_LLTABLE); -} - -static struct llentry * -in_lltable_new(struct in_addr addr4, u_int flags) -{ - struct in_llentry *lle; - - lle = malloc(sizeof(struct in_llentry), M_LLTABLE, M_NOWAIT | M_ZERO); - if (lle == NULL) /* NB: caller generates msg */ - return NULL; - - /* - * For IPv4 this will trigger "arpresolve" to generate - * an ARP request. - */ - lle->base.la_expire = time_uptime; /* mark expired */ - lle->base.r_l3addr.addr4 = addr4; - lle->base.lle_refcnt = 1; - lle->base.lle_free = in_lltable_destroy_lle; - LLE_LOCK_INIT(&lle->base); - LLE_REQ_INIT(&lle->base); - callout_init(&lle->base.lle_timer, 1); - - return (&lle->base); -} - -#define IN_ARE_MASKED_ADDR_EQUAL(d, a, m) ( \ - ((((d).s_addr ^ (a).s_addr) & (m).s_addr)) == 0 ) - -static int -in_lltable_match_prefix(const struct sockaddr *saddr, - const struct sockaddr *smask, u_int flags, struct llentry *lle) -{ - struct in_addr addr, mask, lle_addr; - - addr = ((const struct sockaddr_in *)saddr)->sin_addr; - mask = ((const struct sockaddr_in *)smask)->sin_addr; - lle_addr.s_addr = ntohl(lle->r_l3addr.addr4.s_addr); - - if (IN_ARE_MASKED_ADDR_EQUAL(lle_addr, addr, mask) == 0) - return (0); - - if (lle->la_flags & LLE_IFADDR) { - - /* - * Delete LLE_IFADDR records IFF address & flag matches. - * Note that addr is the interface address within prefix - * being matched. - * Note also we should handle 'ifdown' cases without removing - * ifaddr macs. - */ - if (addr.s_addr == lle_addr.s_addr && (flags & LLE_STATIC) != 0) - return (1); - return (0); - } - - /* flags & LLE_STATIC means deleting both dynamic and static entries */ - if ((flags & LLE_STATIC) || !(lle->la_flags & LLE_STATIC)) - return (1); - - return (0); -} - -static void -in_lltable_free_entry(struct lltable *llt, struct llentry *lle) -{ - struct ifnet *ifp; - size_t pkts_dropped; - - LLE_WLOCK_ASSERT(lle); - KASSERT(llt != NULL, ("lltable is NULL")); - - /* Unlink entry from table if not already */ - if ((lle->la_flags & LLE_LINKED) != 0) { - ifp = llt->llt_ifp; - IF_AFDATA_WLOCK_ASSERT(ifp); - lltable_unlink_entry(llt, lle); - } - - /* cancel timer */ - if (callout_stop(&lle->lle_timer) > 0) - LLE_REMREF(lle); - - /* Drop hold queue */ - pkts_dropped = llentry_free(lle); - ARPSTAT_ADD(dropped, pkts_dropped); -} - -static int -in_lltable_rtcheck(struct ifnet *ifp, u_int flags, const struct sockaddr *l3addr) -{ - struct rtentry *rt; - - KASSERT(l3addr->sa_family == AF_INET, - ("sin_family %d", l3addr->sa_family)); - - /* XXX rtalloc1_fib should take a const param */ - rt = rtalloc1_fib(__DECONST(struct sockaddr *, l3addr), 0, 0, - ifp->if_fib); - - if (rt == NULL) - return (EINVAL); - - /* - * If the gateway for an existing host route matches the target L3 - * address, which is a special route inserted by some implementation - * such as MANET, and the interface is of the correct type, then - * allow for ARP to proceed. - */ - if (rt->rt_flags & RTF_GATEWAY) { - if (!(rt->rt_flags & RTF_HOST) || !rt->rt_ifp || - rt->rt_ifp->if_type != IFT_ETHER || - (rt->rt_ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) != 0 || - memcmp(rt->rt_gateway->sa_data, l3addr->sa_data, - sizeof(in_addr_t)) != 0) { - RTFREE_LOCKED(rt); - return (EINVAL); - } - } - - /* - * Make sure that at least the destination address is covered - * by the route. This is for handling the case where 2 or more - * interfaces have the same prefix. An incoming packet arrives - * on one interface and the corresponding outgoing packet leaves - * another interface. - */ - if (!(rt->rt_flags & RTF_HOST) && rt->rt_ifp != ifp) { - const char *sa, *mask, *addr, *lim; - int len; - - mask = (const char *)rt_mask(rt); - /* - * Just being extra cautious to avoid some custom - * code getting into trouble. - */ - if (mask == NULL) { - RTFREE_LOCKED(rt); - return (EINVAL); - } - - sa = (const char *)rt_key(rt); - addr = (const char *)l3addr; - len = ((const struct sockaddr_in *)l3addr)->sin_len; - lim = addr + len; - - for ( ; addr < lim; sa++, mask++, addr++) { - if ((*sa ^ *addr) & *mask) { -#ifdef DIAGNOSTIC - log(LOG_INFO, "IPv4 address: \"%s\" is not on the network\n", - inet_ntoa(((const struct sockaddr_in *)l3addr)->sin_addr)); -#endif - RTFREE_LOCKED(rt); - return (EINVAL); - } - } - } - - RTFREE_LOCKED(rt); - return (0); -} - -static inline uint32_t -in_lltable_hash_dst(const struct in_addr dst, uint32_t hsize) -{ - - return (IN_LLTBL_HASH(dst.s_addr, hsize)); -} - -static uint32_t -in_lltable_hash(const struct llentry *lle, uint32_t hsize) -{ - - return (in_lltable_hash_dst(lle->r_l3addr.addr4, hsize)); -} - -static void -in_lltable_fill_sa_entry(const struct llentry *lle, struct sockaddr *sa) -{ - struct sockaddr_in *sin; - - sin = (struct sockaddr_in *)sa; - bzero(sin, sizeof(*sin)); - sin->sin_family = AF_INET; - sin->sin_len = sizeof(*sin); - sin->sin_addr = lle->r_l3addr.addr4; -} - -static inline struct llentry * -in_lltable_find_dst(struct lltable *llt, struct in_addr dst) -{ - struct llentry *lle; - struct llentries *lleh; - u_int hashidx; - - hashidx = in_lltable_hash_dst(dst, llt->llt_hsize); - lleh = &llt->lle_head[hashidx]; - LIST_FOREACH(lle, lleh, lle_next) { - if (lle->la_flags & LLE_DELETED) - continue; - if (lle->r_l3addr.addr4.s_addr == dst.s_addr) - break; - } - - return (lle); -} - -static void -in_lltable_delete_entry(struct lltable *llt, struct llentry *lle) -{ - - lle->la_flags |= LLE_DELETED; - EVENTHANDLER_INVOKE(lle_event, lle, LLENTRY_DELETED); -#ifdef DIAGNOSTIC - log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle); -#endif - llentry_free(lle); -} - -static struct llentry * -in_lltable_alloc(struct lltable *llt, u_int flags, const struct sockaddr *l3addr) -{ - const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr; - struct ifnet *ifp = llt->llt_ifp; - struct llentry *lle; - char linkhdr[LLE_MAX_LINKHDR]; - size_t linkhdrsize; - int lladdr_off; - - KASSERT(l3addr->sa_family == AF_INET, - ("sin_family %d", l3addr->sa_family)); - - /* - * A route that covers the given address must have - * been installed 1st because we are doing a resolution, - * verify this. - */ - if (!(flags & LLE_IFADDR) && - in_lltable_rtcheck(ifp, flags, l3addr) != 0) - return (NULL); - - lle = in_lltable_new(sin->sin_addr, flags); - if (lle == NULL) { - log(LOG_INFO, "lla_lookup: new lle malloc failed\n"); - return (NULL); - } - lle->la_flags = flags; - if (flags & LLE_STATIC) - lle->r_flags |= RLLE_VALID; - if ((flags & LLE_IFADDR) == LLE_IFADDR) { - linkhdrsize = LLE_MAX_LINKHDR; - if (lltable_calc_llheader(ifp, AF_INET, IF_LLADDR(ifp), - linkhdr, &linkhdrsize, &lladdr_off) != 0) - return (NULL); - lltable_set_entry_addr(ifp, lle, linkhdr, linkhdrsize, - lladdr_off); - lle->la_flags |= LLE_STATIC; - lle->r_flags |= (RLLE_VALID | RLLE_IFADDR); - } - - return (lle); -} - -/* - * Return NULL if not found or marked for deletion. - * If found return lle read locked. - */ -static struct llentry * -in_lltable_lookup(struct lltable *llt, u_int flags, const struct sockaddr *l3addr) -{ - const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr; - struct llentry *lle; - - IF_AFDATA_LOCK_ASSERT(llt->llt_ifp); - KASSERT(l3addr->sa_family == AF_INET, - ("sin_family %d", l3addr->sa_family)); - lle = in_lltable_find_dst(llt, sin->sin_addr); - - if (lle == NULL) - return (NULL); - - KASSERT((flags & (LLE_UNLOCKED|LLE_EXCLUSIVE)) != - (LLE_UNLOCKED|LLE_EXCLUSIVE),("wrong lle request flags: 0x%X", - flags)); - - if (flags & LLE_UNLOCKED) - return (lle); - - if (flags & LLE_EXCLUSIVE) - LLE_WLOCK(lle); - else - LLE_RLOCK(lle); - - return (lle); -} - -static int -in_lltable_dump_entry(struct lltable *llt, struct llentry *lle, - struct sysctl_req *wr) -{ - struct ifnet *ifp = llt->llt_ifp; - /* XXX stack use */ - struct { - struct rt_msghdr rtm; - struct sockaddr_in sin; - struct sockaddr_dl sdl; - } arpc; - struct sockaddr_dl *sdl; - int error; - - bzero(&arpc, sizeof(arpc)); - /* skip deleted entries */ - if ((lle->la_flags & LLE_DELETED) == LLE_DELETED) - return (0); - /* Skip if jailed and not a valid IP of the prison. */ - lltable_fill_sa_entry(lle,(struct sockaddr *)&arpc.sin); - if (prison_if(wr->td->td_ucred, - (struct sockaddr *)&arpc.sin) != 0) - return (0); - /* - * produce a msg made of: - * struct rt_msghdr; - * struct sockaddr_in; (IPv4) - * struct sockaddr_dl; - */ - arpc.rtm.rtm_msglen = sizeof(arpc); - arpc.rtm.rtm_version = RTM_VERSION; - arpc.rtm.rtm_type = RTM_GET; - arpc.rtm.rtm_flags = RTF_UP; - arpc.rtm.rtm_addrs = RTA_DST | RTA_GATEWAY; - - /* publish */ - if (lle->la_flags & LLE_PUB) - arpc.rtm.rtm_flags |= RTF_ANNOUNCE; - - sdl = &arpc.sdl; - sdl->sdl_family = AF_LINK; - sdl->sdl_len = sizeof(*sdl); - sdl->sdl_index = ifp->if_index; - sdl->sdl_type = ifp->if_type; - if ((lle->la_flags & LLE_VALID) == LLE_VALID) { - sdl->sdl_alen = ifp->if_addrlen; - bcopy(lle->ll_addr, LLADDR(sdl), ifp->if_addrlen); - } else { - sdl->sdl_alen = 0; - bzero(LLADDR(sdl), ifp->if_addrlen); - } - - arpc.rtm.rtm_rmx.rmx_expire = - lle->la_flags & LLE_STATIC ? 0 : lle->la_expire; - arpc.rtm.rtm_flags |= (RTF_HOST | RTF_LLDATA); - if (lle->la_flags & LLE_STATIC) - arpc.rtm.rtm_flags |= RTF_STATIC; - if (lle->la_flags & LLE_IFADDR) - arpc.rtm.rtm_flags |= RTF_PINNED; - arpc.rtm.rtm_index = ifp->if_index; - error = SYSCTL_OUT(wr, &arpc, sizeof(arpc)); - - return (error); -} - -static struct lltable * -in_lltattach(struct ifnet *ifp) -{ - struct lltable *llt; - - llt = lltable_allocate_htbl(IN_LLTBL_DEFAULT_HSIZE); - llt->llt_af = AF_INET; - llt->llt_ifp = ifp; - - llt->llt_lookup = in_lltable_lookup; - llt->llt_alloc_entry = in_lltable_alloc; - llt->llt_delete_entry = in_lltable_delete_entry; - llt->llt_dump_entry = in_lltable_dump_entry; - llt->llt_hash = in_lltable_hash; - llt->llt_fill_sa_entry = in_lltable_fill_sa_entry; - llt->llt_free_entry = in_lltable_free_entry; - llt->llt_match_prefix = in_lltable_match_prefix; - lltable_link(llt); - - return (llt); -} void * in_domifattach(struct ifnet *ifp) Index: sys/netinet/in_lle.c =================================================================== --- /dev/null +++ sys/netinet/in_lle.c @@ -0,0 +1,928 @@ +/* + * Copyright (c) 2004 Luigi Rizzo, Alessandro Cerri. All rights reserved. + * Copyright (c) 2004-2008 Qing Li. All rights reserved. + * Copyright (c) 2008 Kip Macy. All rights reserved. + * Copyright (c) 2015-2016 Alexander V. Chernikov. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include +__FBSDID("$FreeBSD$"); + +#include "opt_inet.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#define SIN(s) ((const struct sockaddr_in *)(s)) + +/* Simple ARP state machine */ +enum arp_llinfo_state { + ARP_LLINFO_INCOMPLETE = 0, /* No LLE data */ + ARP_LLINFO_REACHABLE, /* LLE is valid */ + ARP_LLINFO_VERIFY, /* LLE is valid, need refresh */ + ARP_LLINFO_DELETED, /* LLE is deleted */ +}; + +static eventhandler_tag iflladdr_tag; + +static void arptimer(void *); + +/* + * Timeout routine. Age arp_tab entries periodically. + */ +static void +arptimer(void *arg) +{ + struct llentry *lle = (struct llentry *)arg; + struct ifnet *ifp; + int r_skip_req; + + if (lle->la_flags & LLE_STATIC) { + return; + } + LLE_WLOCK(lle); + if (callout_pending(&lle->lle_timer)) { + /* + * Here we are a bit odd here in the treatment of + * active/pending. If the pending bit is set, it got + * rescheduled before I ran. The active + * bit we ignore, since if it was stopped + * in ll_tablefree() and was currently running + * it would have return 0 so the code would + * not have deleted it since the callout could + * not be stopped so we want to go through + * with the delete here now. If the callout + * was restarted, the pending bit will be back on and + * we just want to bail since the callout_reset would + * return 1 and our reference would have been removed + * by arpresolve() below. + */ + LLE_WUNLOCK(lle); + return; + } + ifp = lle->lle_tbl->llt_ifp; + CURVNET_SET(ifp->if_vnet); + + switch (lle->ln_state) { + case ARP_LLINFO_REACHABLE: + + /* + * Expiration time is approaching. + * Let's try to refresh entry if it is still + * in use. + * + * Set r_skip_req to get feedback from + * fast path. Change state and re-schedule + * ourselves. + */ + LLE_REQ_LOCK(lle); + lle->r_skip_req = 1; + LLE_REQ_UNLOCK(lle); + lle->ln_state = ARP_LLINFO_VERIFY; + callout_schedule(&lle->lle_timer, hz * V_arpt_rexmit); + LLE_WUNLOCK(lle); + CURVNET_RESTORE(); + return; + case ARP_LLINFO_VERIFY: + LLE_REQ_LOCK(lle); + r_skip_req = lle->r_skip_req; + LLE_REQ_UNLOCK(lle); + + if (r_skip_req == 0 && lle->la_preempt > 0) { + /* Entry was used, issue refresh request */ + struct in_addr dst; + dst = lle->r_l3addr.addr4; + lle->la_preempt--; + callout_schedule(&lle->lle_timer, hz * V_arpt_rexmit); + LLE_WUNLOCK(lle); + arprequest(ifp, NULL, &dst, NULL); + CURVNET_RESTORE(); + return; + } + /* Nothing happened. Reschedule if not too late */ + if (lle->la_expire > time_uptime) { + callout_schedule(&lle->lle_timer, hz * V_arpt_rexmit); + LLE_WUNLOCK(lle); + CURVNET_RESTORE(); + return; + } + break; + case ARP_LLINFO_INCOMPLETE: + case ARP_LLINFO_DELETED: + break; + } + + if ((lle->la_flags & LLE_DELETED) == 0) { + int evt; + + if (lle->la_flags & LLE_VALID) + evt = LLENTRY_EXPIRED; + else + evt = LLENTRY_TIMEDOUT; + EVENTHANDLER_INVOKE(lle_event, lle, evt); + } + + callout_stop(&lle->lle_timer); + + /* XXX: LOR avoidance. We still have ref on lle. */ + LLE_WUNLOCK(lle); + IF_AFDATA_LOCK(ifp); + LLE_WLOCK(lle); + + /* Guard against race with other llentry_free(). */ + if (lle->la_flags & LLE_LINKED) { + LLE_REMREF(lle); + lltable_unlink_entry(lle->lle_tbl, lle); + } + IF_AFDATA_UNLOCK(ifp); + + size_t pkts_dropped = llentry_free(lle); + + ARPSTAT_ADD(dropped, pkts_dropped); + ARPSTAT_INC(timeouts); + + CURVNET_RESTORE(); +} + +/* + * Resolve an IP address into an ethernet address - heavy version. + * Used internally by arpresolve(). + * We have already checked than we can't use existing lle without + * modification so we have to acquire LLE_EXCLUSIVE lle lock. + * + * On success, desten and flags are filled in and the function returns 0; + * If the packet must be held pending resolution, we return EWOULDBLOCK + * On other errors, we return the corresponding error code. + * Note that m_freem() handles NULL. + */ +static int +arpresolve_full(struct ifnet *ifp, int is_gw, int flags, struct mbuf *m, + const struct sockaddr *dst, u_char *desten, uint32_t *pflags) +{ + struct llentry *la = NULL, *la_tmp; + struct mbuf *curr = NULL; + struct mbuf *next = NULL; + int error, renew; + char *lladdr; + int ll_len; + + if (pflags != NULL) + *pflags = 0; + + if ((flags & LLE_CREATE) == 0) { + IF_AFDATA_RLOCK(ifp); + la = lla_lookup(LLTABLE(ifp), LLE_EXCLUSIVE, dst); + IF_AFDATA_RUNLOCK(ifp); + } + if (la == NULL && (ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) == 0) { + la = lltable_alloc_entry(LLTABLE(ifp), 0, dst); + if (la == NULL) { + log(LOG_DEBUG, + "arpresolve: can't allocate llinfo for %s on %s\n", + inet_ntoa(SIN(dst)->sin_addr), if_name(ifp)); + m_freem(m); + return (EINVAL); + } + + IF_AFDATA_WLOCK(ifp); + LLE_WLOCK(la); + la_tmp = lla_lookup(LLTABLE(ifp), LLE_EXCLUSIVE, dst); + /* Prefer ANY existing lle over newly-created one */ + if (la_tmp == NULL) + lltable_link_entry(LLTABLE(ifp), la); + IF_AFDATA_WUNLOCK(ifp); + if (la_tmp != NULL) { + lltable_free_entry(LLTABLE(ifp), la); + la = la_tmp; + } + } + if (la == NULL) { + m_freem(m); + return (EINVAL); + } + + if ((la->la_flags & LLE_VALID) && + ((la->la_flags & LLE_STATIC) || la->la_expire > time_uptime)) { + if (flags & LLE_ADDRONLY) { + lladdr = la->ll_addr; + ll_len = ifp->if_addrlen; + } else { + lladdr = la->r_linkdata; + ll_len = la->r_hdrlen; + } + bcopy(lladdr, desten, ll_len); + + /* Check if we have feedback request from arptimer() */ + if (la->r_skip_req != 0) { + LLE_REQ_LOCK(la); + la->r_skip_req = 0; /* Notify that entry was used */ + LLE_REQ_UNLOCK(la); + } + if (pflags != NULL) + *pflags = la->la_flags & (LLE_VALID|LLE_IFADDR); + LLE_WUNLOCK(la); + return (0); + } + + renew = (la->la_asked == 0 || la->la_expire != time_uptime); + /* + * There is an arptab entry, but no ethernet address + * response yet. Add the mbuf to the list, dropping + * the oldest packet if we have exceeded the system + * setting. + */ + if (m != NULL) { + if (la->la_numheld >= V_arp_maxhold) { + if (la->la_hold != NULL) { + next = la->la_hold->m_nextpkt; + m_freem(la->la_hold); + la->la_hold = next; + la->la_numheld--; + ARPSTAT_INC(dropped); + } + } + if (la->la_hold != NULL) { + curr = la->la_hold; + while (curr->m_nextpkt != NULL) + curr = curr->m_nextpkt; + curr->m_nextpkt = m; + } else + la->la_hold = m; + la->la_numheld++; + } + /* + * Return EWOULDBLOCK if we have tried less than arp_maxtries. It + * will be masked by ether_output(). Return EHOSTDOWN/EHOSTUNREACH + * if we have already sent arp_maxtries ARP requests. Retransmit the + * ARP request, but not faster than one request per second. + */ + if (la->la_asked < V_arp_maxtries) + error = EWOULDBLOCK; /* First request. */ + else + error = is_gw != 0 ? EHOSTUNREACH : EHOSTDOWN; + + if (renew) { + int canceled; + + LLE_ADDREF(la); + la->la_expire = time_uptime; + canceled = callout_reset(&la->lle_timer, hz * V_arpt_down, + arptimer, la); + if (canceled) + LLE_REMREF(la); + la->la_asked++; + LLE_WUNLOCK(la); + arprequest(ifp, NULL, &SIN(dst)->sin_addr, NULL); + return (error); + } + + LLE_WUNLOCK(la); + return (error); +} + +/* + * Resolve an IP address into an ethernet address. + */ +int +arpresolve_addr(struct ifnet *ifp, int flags, const struct sockaddr *dst, + char *desten, uint32_t *pflags) +{ + int error; + + flags |= LLE_ADDRONLY; + error = arpresolve_full(ifp, 0, flags, NULL, dst, desten, pflags); + return (error); +} + + +/* + * Lookups link header based on an IP address. + * On input: + * ifp is the interface we use + * is_gw != 0 if @dst represents gateway to some destination + * m is the mbuf. May be NULL if we don't have a packet. + * dst is the next hop, + * desten is the storage to put LL header. + * flags returns subset of lle flags: LLE_VALID | LLE_IFADDR + * + * On success, full/partial link header and flags are filled in and + * the function returns 0. + * If the packet must be held pending resolution, we return EWOULDBLOCK + * On other errors, we return the corresponding error code. + * Note that m_freem() handles NULL. + */ +int +arpresolve(struct ifnet *ifp, int is_gw, struct mbuf *m, + const struct sockaddr *dst, u_char *desten, uint32_t *pflags) +{ + struct llentry *la = 0; + + if (pflags != NULL) + *pflags = 0; + + if (m != NULL) { + if (m->m_flags & M_BCAST) { + /* broadcast */ + (void)memcpy(desten, + ifp->if_broadcastaddr, ifp->if_addrlen); + return (0); + } + if (m->m_flags & M_MCAST) { + /* multicast */ + ETHER_MAP_IP_MULTICAST(&SIN(dst)->sin_addr, desten); + return (0); + } + } + + IF_AFDATA_RLOCK(ifp); + la = lla_lookup(LLTABLE(ifp), LLE_UNLOCKED, dst); + if (la != NULL && (la->r_flags & RLLE_VALID) != 0) { + /* Entry found, let's copy lle info */ + bcopy(la->r_linkdata, desten, la->r_hdrlen); + if (pflags != NULL) + *pflags = LLE_VALID | (la->r_flags & RLLE_IFADDR); + /* Check if we have feedback request from arptimer() */ + if (la->r_skip_req != 0) { + LLE_REQ_LOCK(la); + la->r_skip_req = 0; /* Notify that entry was used */ + LLE_REQ_UNLOCK(la); + } + IF_AFDATA_RUNLOCK(ifp); + return (0); + } + IF_AFDATA_RUNLOCK(ifp); + + return (arpresolve_full(ifp, is_gw, la == NULL ? LLE_CREATE : 0, m, dst, + desten, pflags)); +} + +void +arp_mark_lle_reachable(struct llentry *la) +{ + int canceled, wtime; + + LLE_WLOCK_ASSERT(la); + + la->ln_state = ARP_LLINFO_REACHABLE; + EVENTHANDLER_INVOKE(lle_event, la, LLENTRY_RESOLVED); + + if (!(la->la_flags & LLE_STATIC)) { + LLE_ADDREF(la); + la->la_expire = time_uptime + V_arpt_keep; + wtime = V_arpt_keep - V_arp_maxtries * V_arpt_rexmit; + if (wtime < 0) + wtime = V_arpt_keep; + canceled = callout_reset(&la->lle_timer, + hz * wtime, arptimer, la); + if (canceled) + LLE_REMREF(la); + } + la->la_asked = 0; + la->la_preempt = V_arp_maxtries; +} + +/* + * Add pernament link-layer record for given interface address. + */ +static __noinline void +arp_add_ifa_lle(struct ifnet *ifp, const struct sockaddr *dst) +{ + struct llentry *lle, *lle_tmp; + + /* + * Interface address LLE record is considered static + * because kernel code relies on LLE_STATIC flag to check + * if these entries can be rewriten by arp updates. + */ + lle = lltable_alloc_entry(LLTABLE(ifp), LLE_IFADDR | LLE_STATIC, dst); + if (lle == NULL) { + log(LOG_INFO, "arp_ifinit: cannot create arp " + "entry for interface address\n"); + return; + } + + IF_AFDATA_WLOCK(ifp); + LLE_WLOCK(lle); + /* Unlink any entry if exists */ + lle_tmp = lla_lookup(LLTABLE(ifp), LLE_EXCLUSIVE, dst); + if (lle_tmp != NULL) + lltable_unlink_entry(LLTABLE(ifp), lle_tmp); + + lltable_link_entry(LLTABLE(ifp), lle); + IF_AFDATA_WUNLOCK(ifp); + + if (lle_tmp != NULL) + EVENTHANDLER_INVOKE(lle_event, lle_tmp, LLENTRY_EXPIRED); + + EVENTHANDLER_INVOKE(lle_event, lle, LLENTRY_RESOLVED); + LLE_WUNLOCK(lle); + if (lle_tmp != NULL) + lltable_free_entry(LLTABLE(ifp), lle_tmp); +} + +void +arp_ifinit(struct ifnet *ifp, struct ifaddr *ifa) +{ + const struct sockaddr_in *dst_in; + const struct sockaddr *dst; + + if (ifa->ifa_carp != NULL) + return; + + dst = ifa->ifa_addr; + dst_in = (const struct sockaddr_in *)dst; + + if (ntohl(dst_in->sin_addr.s_addr) == INADDR_ANY) + return; + arp_announce_ifaddr(ifp, dst_in->sin_addr, IF_LLADDR(ifp)); + + arp_add_ifa_lle(ifp, dst); +} + +/* + * Sends gratuitous ARPs for each ifaddr to notify other + * nodes about the address change. + */ +static __noinline void +arp_handle_ifllchange(struct ifnet *ifp) +{ + struct ifaddr *ifa; + + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr->sa_family == AF_INET) + arp_ifinit(ifp, ifa); + } +} + +/* + * A handler for interface link layer address change event. + */ +static void +arp_iflladdr(void *arg __unused, struct ifnet *ifp) +{ + + lltable_update_ifaddr(LLTABLE(ifp)); + + if ((ifp->if_flags & IFF_UP) != 0) + arp_handle_ifllchange(ifp); +} + +void +lle4_init(void) +{ + + if (IS_DEFAULT_VNET(curvnet)) + iflladdr_tag = EVENTHANDLER_REGISTER(iflladdr_event, + arp_iflladdr, NULL, EVENTHANDLER_PRI_ANY); +} + + +struct in_llentry { + struct llentry base; +}; + +#define IN_LLTBL_DEFAULT_HSIZE 32 +#define IN_LLTBL_HASH(k, h) \ + (((((((k >> 8) ^ k) >> 8) ^ k) >> 8) ^ k) & ((h) - 1)) + +/* + * Do actual deallocation of @lle. + * Called by LLE_FREE_LOCKED when number of references + * drops to zero. + */ +static void +in_lltable_destroy_lle(struct llentry *lle) +{ + + LLE_WUNLOCK(lle); + LLE_LOCK_DESTROY(lle); + LLE_REQ_DESTROY(lle); + free(lle, M_LLTABLE); +} + +static struct llentry * +in_lltable_new(struct in_addr addr4, u_int flags) +{ + struct in_llentry *lle; + + lle = malloc(sizeof(struct in_llentry), M_LLTABLE, M_NOWAIT | M_ZERO); + if (lle == NULL) /* NB: caller generates msg */ + return NULL; + + /* + * For IPv4 this will trigger "arpresolve" to generate + * an ARP request. + */ + lle->base.la_expire = time_uptime; /* mark expired */ + lle->base.r_l3addr.addr4 = addr4; + lle->base.lle_refcnt = 1; + lle->base.lle_free = in_lltable_destroy_lle; + LLE_LOCK_INIT(&lle->base); + LLE_REQ_INIT(&lle->base); + callout_init(&lle->base.lle_timer, 1); + + return (&lle->base); +} + +#define IN_ARE_MASKED_ADDR_EQUAL(d, a, m) ( \ + ((((d).s_addr ^ (a).s_addr) & (m).s_addr)) == 0 ) + +static int +in_lltable_match_prefix(const struct sockaddr *saddr, + const struct sockaddr *smask, u_int flags, struct llentry *lle) +{ + struct in_addr addr, mask, lle_addr; + + addr = ((const struct sockaddr_in *)saddr)->sin_addr; + mask = ((const struct sockaddr_in *)smask)->sin_addr; + lle_addr.s_addr = ntohl(lle->r_l3addr.addr4.s_addr); + + if (IN_ARE_MASKED_ADDR_EQUAL(lle_addr, addr, mask) == 0) + return (0); + + if (lle->la_flags & LLE_IFADDR) { + + /* + * Delete LLE_IFADDR records IFF address & flag matches. + * Note that addr is the interface address within prefix + * being matched. + * Note also we should handle 'ifdown' cases without removing + * ifaddr macs. + */ + if (addr.s_addr == lle_addr.s_addr && (flags & LLE_STATIC) != 0) + return (1); + return (0); + } + + /* flags & LLE_STATIC means deleting both dynamic and static entries */ + if ((flags & LLE_STATIC) || !(lle->la_flags & LLE_STATIC)) + return (1); + + return (0); +} + +static void +in_lltable_free_entry(struct lltable *llt, struct llentry *lle) +{ + struct ifnet *ifp; + size_t pkts_dropped; + + LLE_WLOCK_ASSERT(lle); + KASSERT(llt != NULL, ("lltable is NULL")); + + /* Unlink entry from table if not already */ + if ((lle->la_flags & LLE_LINKED) != 0) { + ifp = llt->llt_ifp; + IF_AFDATA_WLOCK_ASSERT(ifp); + lltable_unlink_entry(llt, lle); + } + + /* cancel timer */ + if (callout_stop(&lle->lle_timer) > 0) + LLE_REMREF(lle); + + /* Drop hold queue */ + pkts_dropped = llentry_free(lle); + ARPSTAT_ADD(dropped, pkts_dropped); +} + +static int +in_lltable_rtcheck(struct ifnet *ifp, u_int flags, const struct sockaddr *l3addr) +{ + struct rtentry *rt; + + KASSERT(l3addr->sa_family == AF_INET, + ("sin_family %d", l3addr->sa_family)); + + /* XXX rtalloc1_fib should take a const param */ + rt = rtalloc1_fib(__DECONST(struct sockaddr *, l3addr), 0, 0, + ifp->if_fib); + + if (rt == NULL) + return (EINVAL); + + /* + * If the gateway for an existing host route matches the target L3 + * address, which is a special route inserted by some implementation + * such as MANET, and the interface is of the correct type, then + * allow for ARP to proceed. + */ + if (rt->rt_flags & RTF_GATEWAY) { + if (!(rt->rt_flags & RTF_HOST) || !rt->rt_ifp || + rt->rt_ifp->if_type != IFT_ETHER || + (rt->rt_ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) != 0 || + memcmp(rt->rt_gateway->sa_data, l3addr->sa_data, + sizeof(in_addr_t)) != 0) { + RTFREE_LOCKED(rt); + return (EINVAL); + } + } + + /* + * Make sure that at least the destination address is covered + * by the route. This is for handling the case where 2 or more + * interfaces have the same prefix. An incoming packet arrives + * on one interface and the corresponding outgoing packet leaves + * another interface. + */ + if (!(rt->rt_flags & RTF_HOST) && rt->rt_ifp != ifp) { + const char *sa, *mask, *addr, *lim; + int len; + + mask = (const char *)rt_mask(rt); + /* + * Just being extra cautious to avoid some custom + * code getting into trouble. + */ + if (mask == NULL) { + RTFREE_LOCKED(rt); + return (EINVAL); + } + + sa = (const char *)rt_key(rt); + addr = (const char *)l3addr; + len = ((const struct sockaddr_in *)l3addr)->sin_len; + lim = addr + len; + + for ( ; addr < lim; sa++, mask++, addr++) { + if ((*sa ^ *addr) & *mask) { +#ifdef DIAGNOSTIC + log(LOG_INFO, "IPv4 address: \"%s\" is not on the network\n", + inet_ntoa(((const struct sockaddr_in *)l3addr)->sin_addr)); +#endif + RTFREE_LOCKED(rt); + return (EINVAL); + } + } + } + + RTFREE_LOCKED(rt); + return (0); +} + +static inline uint32_t +in_lltable_hash_dst(const struct in_addr dst, uint32_t hsize) +{ + + return (IN_LLTBL_HASH(dst.s_addr, hsize)); +} + +static uint32_t +in_lltable_hash(const struct llentry *lle, uint32_t hsize) +{ + + return (in_lltable_hash_dst(lle->r_l3addr.addr4, hsize)); +} + +static void +in_lltable_fill_sa_entry(const struct llentry *lle, struct sockaddr *sa) +{ + struct sockaddr_in *sin; + + sin = (struct sockaddr_in *)sa; + bzero(sin, sizeof(*sin)); + sin->sin_family = AF_INET; + sin->sin_len = sizeof(*sin); + sin->sin_addr = lle->r_l3addr.addr4; +} + +static inline struct llentry * +in_lltable_find_dst(struct lltable *llt, struct in_addr dst) +{ + struct llentry *lle; + struct llentries *lleh; + u_int hashidx; + + hashidx = in_lltable_hash_dst(dst, llt->llt_hsize); + lleh = &llt->lle_head[hashidx]; + LIST_FOREACH(lle, lleh, lle_next) { + if (lle->la_flags & LLE_DELETED) + continue; + if (lle->r_l3addr.addr4.s_addr == dst.s_addr) + break; + } + + return (lle); +} + +static void +in_lltable_delete_entry(struct lltable *llt, struct llentry *lle) +{ + + lle->la_flags |= LLE_DELETED; + EVENTHANDLER_INVOKE(lle_event, lle, LLENTRY_DELETED); +#ifdef DIAGNOSTIC + log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle); +#endif + llentry_free(lle); +} + +static struct llentry * +in_lltable_alloc(struct lltable *llt, u_int flags, const struct sockaddr *l3addr) +{ + const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr; + struct ifnet *ifp = llt->llt_ifp; + struct llentry *lle; + char linkhdr[LLE_MAX_LINKHDR]; + size_t linkhdrsize; + int lladdr_off; + + KASSERT(l3addr->sa_family == AF_INET, + ("sin_family %d", l3addr->sa_family)); + + /* + * A route that covers the given address must have + * been installed 1st because we are doing a resolution, + * verify this. + */ + if (!(flags & LLE_IFADDR) && + in_lltable_rtcheck(ifp, flags, l3addr) != 0) + return (NULL); + + lle = in_lltable_new(sin->sin_addr, flags); + if (lle == NULL) { + log(LOG_INFO, "lla_lookup: new lle malloc failed\n"); + return (NULL); + } + lle->la_flags = flags; + if (flags & LLE_STATIC) + lle->r_flags |= RLLE_VALID; + if ((flags & LLE_IFADDR) == LLE_IFADDR) { + linkhdrsize = LLE_MAX_LINKHDR; + if (lltable_calc_llheader(ifp, AF_INET, IF_LLADDR(ifp), + linkhdr, &linkhdrsize, &lladdr_off) != 0) + return (NULL); + lltable_set_entry_addr(ifp, lle, linkhdr, linkhdrsize, + lladdr_off); + lle->la_flags |= LLE_STATIC; + lle->r_flags |= (RLLE_VALID | RLLE_IFADDR); + } + + return (lle); +} + +/* + * Return NULL if not found or marked for deletion. + * If found return lle read locked. + */ +static struct llentry * +in_lltable_lookup(struct lltable *llt, u_int flags, const struct sockaddr *l3addr) +{ + const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr; + struct llentry *lle; + + IF_AFDATA_LOCK_ASSERT(llt->llt_ifp); + KASSERT(l3addr->sa_family == AF_INET, + ("sin_family %d", l3addr->sa_family)); + lle = in_lltable_find_dst(llt, sin->sin_addr); + + if (lle == NULL) + return (NULL); + + KASSERT((flags & (LLE_UNLOCKED|LLE_EXCLUSIVE)) != + (LLE_UNLOCKED|LLE_EXCLUSIVE),("wrong lle request flags: 0x%X", + flags)); + + if (flags & LLE_UNLOCKED) + return (lle); + + if (flags & LLE_EXCLUSIVE) + LLE_WLOCK(lle); + else + LLE_RLOCK(lle); + + return (lle); +} + +static int +in_lltable_dump_entry(struct lltable *llt, struct llentry *lle, + struct sysctl_req *wr) +{ + struct ifnet *ifp = llt->llt_ifp; + /* XXX stack use */ + struct { + struct rt_msghdr rtm; + struct sockaddr_in sin; + struct sockaddr_dl sdl; + } arpc; + struct sockaddr_dl *sdl; + int error; + + bzero(&arpc, sizeof(arpc)); + /* skip deleted entries */ + if ((lle->la_flags & LLE_DELETED) == LLE_DELETED) + return (0); + /* Skip if jailed and not a valid IP of the prison. */ + lltable_fill_sa_entry(lle,(struct sockaddr *)&arpc.sin); + if (prison_if(wr->td->td_ucred, + (struct sockaddr *)&arpc.sin) != 0) + return (0); + /* + * produce a msg made of: + * struct rt_msghdr; + * struct sockaddr_in; (IPv4) + * struct sockaddr_dl; + */ + arpc.rtm.rtm_msglen = sizeof(arpc); + arpc.rtm.rtm_version = RTM_VERSION; + arpc.rtm.rtm_type = RTM_GET; + arpc.rtm.rtm_flags = RTF_UP; + arpc.rtm.rtm_addrs = RTA_DST | RTA_GATEWAY; + + /* publish */ + if (lle->la_flags & LLE_PUB) + arpc.rtm.rtm_flags |= RTF_ANNOUNCE; + + sdl = &arpc.sdl; + sdl->sdl_family = AF_LINK; + sdl->sdl_len = sizeof(*sdl); + sdl->sdl_index = ifp->if_index; + sdl->sdl_type = ifp->if_type; + if ((lle->la_flags & LLE_VALID) == LLE_VALID) { + sdl->sdl_alen = ifp->if_addrlen; + bcopy(lle->ll_addr, LLADDR(sdl), ifp->if_addrlen); + } else { + sdl->sdl_alen = 0; + bzero(LLADDR(sdl), ifp->if_addrlen); + } + + arpc.rtm.rtm_rmx.rmx_expire = + lle->la_flags & LLE_STATIC ? 0 : lle->la_expire; + arpc.rtm.rtm_flags |= (RTF_HOST | RTF_LLDATA); + if (lle->la_flags & LLE_STATIC) + arpc.rtm.rtm_flags |= RTF_STATIC; + if (lle->la_flags & LLE_IFADDR) + arpc.rtm.rtm_flags |= RTF_PINNED; + arpc.rtm.rtm_index = ifp->if_index; + error = SYSCTL_OUT(wr, &arpc, sizeof(arpc)); + + return (error); +} + +struct lltable * +in_lltattach(struct ifnet *ifp) +{ + struct lltable *llt; + + llt = lltable_allocate_htbl(IN_LLTBL_DEFAULT_HSIZE); + llt->llt_af = AF_INET; + llt->llt_ifp = ifp; + + llt->llt_lookup = in_lltable_lookup; + llt->llt_alloc_entry = in_lltable_alloc; + llt->llt_delete_entry = in_lltable_delete_entry; + llt->llt_dump_entry = in_lltable_dump_entry; + llt->llt_hash = in_lltable_hash; + llt->llt_fill_sa_entry = in_lltable_fill_sa_entry; + llt->llt_free_entry = in_lltable_free_entry; + llt->llt_match_prefix = in_lltable_match_prefix; + lltable_link(llt); + + return (llt); +} + Index: sys/netinet/in_var.h =================================================================== --- sys/netinet/in_var.h +++ sys/netinet/in_var.h @@ -352,6 +352,7 @@ struct route; struct ip_moptions; struct radix_node_head; +struct llentry; struct in_multi *inm_lookup_locked(struct ifnet *, const struct in_addr); struct in_multi *inm_lookup(struct ifnet *, const struct in_addr); @@ -383,6 +384,9 @@ struct mbuf *ip_tryforward(struct mbuf *); void *in_domifattach(struct ifnet *); void in_domifdetach(struct ifnet *, void *); +struct lltable *in_lltattach(struct ifnet *); +void arp_mark_lle_reachable(struct llentry *); +void lle4_init(void); /* XXX */ Index: sys/netinet6/in6.c =================================================================== --- sys/netinet6/in6.c +++ sys/netinet6/in6.c @@ -2046,370 +2046,6 @@ #include -struct in6_llentry { - struct llentry base; -}; - -#define IN6_LLTBL_DEFAULT_HSIZE 32 -#define IN6_LLTBL_HASH(k, h) \ - (((((((k >> 8) ^ k) >> 8) ^ k) >> 8) ^ k) & ((h) - 1)) - -/* - * Do actual deallocation of @lle. - * Called by LLE_FREE_LOCKED when number of references - * drops to zero. - */ -static void -in6_lltable_destroy_lle(struct llentry *lle) -{ - - LLE_WUNLOCK(lle); - LLE_LOCK_DESTROY(lle); - LLE_REQ_DESTROY(lle); - free(lle, M_LLTABLE); -} - -static struct llentry * -in6_lltable_new(const struct in6_addr *addr6, u_int flags) -{ - struct in6_llentry *lle; - - lle = malloc(sizeof(struct in6_llentry), M_LLTABLE, M_NOWAIT | M_ZERO); - if (lle == NULL) /* NB: caller generates msg */ - return NULL; - - lle->base.r_l3addr.addr6 = *addr6; - lle->base.lle_refcnt = 1; - lle->base.lle_free = in6_lltable_destroy_lle; - LLE_LOCK_INIT(&lle->base); - LLE_REQ_INIT(&lle->base); - callout_init(&lle->base.lle_timer, 1); - - return (&lle->base); -} - -static int -in6_lltable_match_prefix(const struct sockaddr *saddr, - const struct sockaddr *smask, u_int flags, struct llentry *lle) -{ - const struct in6_addr *addr, *mask, *lle_addr; - - addr = &((const struct sockaddr_in6 *)saddr)->sin6_addr; - mask = &((const struct sockaddr_in6 *)smask)->sin6_addr; - lle_addr = &lle->r_l3addr.addr6; - - if (IN6_ARE_MASKED_ADDR_EQUAL(lle_addr, addr, mask) == 0) - return (0); - - if (lle->la_flags & LLE_IFADDR) { - - /* - * Delete LLE_IFADDR records IFF address & flag matches. - * Note that addr is the interface address within prefix - * being matched. - */ - if (IN6_ARE_ADDR_EQUAL(addr, lle_addr) && - (flags & LLE_STATIC) != 0) - return (1); - return (0); - } - - /* flags & LLE_STATIC means deleting both dynamic and static entries */ - if ((flags & LLE_STATIC) || !(lle->la_flags & LLE_STATIC)) - return (1); - - return (0); -} - -static void -in6_lltable_free_entry(struct lltable *llt, struct llentry *lle) -{ - struct ifnet *ifp; - - LLE_WLOCK_ASSERT(lle); - KASSERT(llt != NULL, ("lltable is NULL")); - - /* Unlink entry from table */ - if ((lle->la_flags & LLE_LINKED) != 0) { - - ifp = llt->llt_ifp; - IF_AFDATA_WLOCK_ASSERT(ifp); - lltable_unlink_entry(llt, lle); - } - - if (callout_stop(&lle->lle_timer) > 0) - LLE_REMREF(lle); - - llentry_free(lle); -} - -static int -in6_lltable_rtcheck(struct ifnet *ifp, - u_int flags, - const struct sockaddr *l3addr) -{ - const struct sockaddr_in6 *sin6; - struct nhop6_basic nh6; - struct in6_addr dst; - uint32_t scopeid; - int error; - char ip6buf[INET6_ADDRSTRLEN]; - - KASSERT(l3addr->sa_family == AF_INET6, - ("sin_family %d", l3addr->sa_family)); - - /* Our local addresses are always only installed on the default FIB. */ - - sin6 = (const struct sockaddr_in6 *)l3addr; - in6_splitscope(&sin6->sin6_addr, &dst, &scopeid); - error = fib6_lookup_nh_basic(RT_DEFAULT_FIB, &dst, scopeid, 0, 0, &nh6); - if (error != 0 || (nh6.nh_flags & NHF_GATEWAY) || nh6.nh_ifp != ifp) { - struct ifaddr *ifa; - /* - * Create an ND6 cache for an IPv6 neighbor - * that is not covered by our own prefix. - */ - ifa = ifaof_ifpforaddr(l3addr, ifp); - if (ifa != NULL) { - ifa_free(ifa); - return 0; - } - log(LOG_INFO, "IPv6 address: \"%s\" is not on the network\n", - ip6_sprintf(ip6buf, &sin6->sin6_addr)); - return EINVAL; - } - return 0; -} - -static inline uint32_t -in6_lltable_hash_dst(const struct in6_addr *dst, uint32_t hsize) -{ - - return (IN6_LLTBL_HASH(dst->s6_addr32[3], hsize)); -} - -static uint32_t -in6_lltable_hash(const struct llentry *lle, uint32_t hsize) -{ - - return (in6_lltable_hash_dst(&lle->r_l3addr.addr6, hsize)); -} - -static void -in6_lltable_fill_sa_entry(const struct llentry *lle, struct sockaddr *sa) -{ - struct sockaddr_in6 *sin6; - - sin6 = (struct sockaddr_in6 *)sa; - bzero(sin6, sizeof(*sin6)); - sin6->sin6_family = AF_INET6; - sin6->sin6_len = sizeof(*sin6); - sin6->sin6_addr = lle->r_l3addr.addr6; -} - -static inline struct llentry * -in6_lltable_find_dst(struct lltable *llt, const struct in6_addr *dst) -{ - struct llentry *lle; - struct llentries *lleh; - u_int hashidx; - - hashidx = in6_lltable_hash_dst(dst, llt->llt_hsize); - lleh = &llt->lle_head[hashidx]; - LIST_FOREACH(lle, lleh, lle_next) { - if (lle->la_flags & LLE_DELETED) - continue; - if (IN6_ARE_ADDR_EQUAL(&lle->r_l3addr.addr6, dst)) - break; - } - - return (lle); -} - -static void -in6_lltable_delete_entry(struct lltable *llt, struct llentry *lle) -{ - - lle->la_flags |= LLE_DELETED; - EVENTHANDLER_INVOKE(lle_event, lle, LLENTRY_DELETED); -#ifdef DIAGNOSTIC - log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle); -#endif - llentry_free(lle); -} - -static struct llentry * -in6_lltable_alloc(struct lltable *llt, u_int flags, - const struct sockaddr *l3addr) -{ - const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)l3addr; - struct ifnet *ifp = llt->llt_ifp; - struct llentry *lle; - char linkhdr[LLE_MAX_LINKHDR]; - size_t linkhdrsize; - int lladdr_off; - - KASSERT(l3addr->sa_family == AF_INET6, - ("sin_family %d", l3addr->sa_family)); - - /* - * A route that covers the given address must have - * been installed 1st because we are doing a resolution, - * verify this. - */ - if (!(flags & LLE_IFADDR) && - in6_lltable_rtcheck(ifp, flags, l3addr) != 0) - return (NULL); - - lle = in6_lltable_new(&sin6->sin6_addr, flags); - if (lle == NULL) { - log(LOG_INFO, "lla_lookup: new lle malloc failed\n"); - return (NULL); - } - lle->la_flags = flags; - if ((flags & LLE_IFADDR) == LLE_IFADDR) { - linkhdrsize = LLE_MAX_LINKHDR; - if (lltable_calc_llheader(ifp, AF_INET6, IF_LLADDR(ifp), - linkhdr, &linkhdrsize, &lladdr_off) != 0) - return (NULL); - lltable_set_entry_addr(ifp, lle, linkhdr, linkhdrsize, - lladdr_off); - lle->la_flags |= LLE_STATIC; - } - - if ((lle->la_flags & LLE_STATIC) != 0) - lle->ln_state = ND6_LLINFO_REACHABLE; - - return (lle); -} - -static struct llentry * -in6_lltable_lookup(struct lltable *llt, u_int flags, - const struct sockaddr *l3addr) -{ - const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)l3addr; - struct llentry *lle; - - IF_AFDATA_LOCK_ASSERT(llt->llt_ifp); - KASSERT(l3addr->sa_family == AF_INET6, - ("sin_family %d", l3addr->sa_family)); - - lle = in6_lltable_find_dst(llt, &sin6->sin6_addr); - - if (lle == NULL) - return (NULL); - - KASSERT((flags & (LLE_UNLOCKED|LLE_EXCLUSIVE)) != - (LLE_UNLOCKED|LLE_EXCLUSIVE),("wrong lle request flags: 0x%X", - flags)); - - if (flags & LLE_UNLOCKED) - return (lle); - - if (flags & LLE_EXCLUSIVE) - LLE_WLOCK(lle); - else - LLE_RLOCK(lle); - return (lle); -} - -static int -in6_lltable_dump_entry(struct lltable *llt, struct llentry *lle, - struct sysctl_req *wr) -{ - struct ifnet *ifp = llt->llt_ifp; - /* XXX stack use */ - struct { - struct rt_msghdr rtm; - struct sockaddr_in6 sin6; - /* - * ndp.c assumes that sdl is word aligned - */ -#ifdef __LP64__ - uint32_t pad; -#endif - struct sockaddr_dl sdl; - } ndpc; - struct sockaddr_dl *sdl; - int error; - - bzero(&ndpc, sizeof(ndpc)); - /* skip deleted entries */ - if ((lle->la_flags & LLE_DELETED) == LLE_DELETED) - return (0); - /* Skip if jailed and not a valid IP of the prison. */ - lltable_fill_sa_entry(lle, - (struct sockaddr *)&ndpc.sin6); - if (prison_if(wr->td->td_ucred, - (struct sockaddr *)&ndpc.sin6) != 0) - return (0); - /* - * produce a msg made of: - * struct rt_msghdr; - * struct sockaddr_in6 (IPv6) - * struct sockaddr_dl; - */ - ndpc.rtm.rtm_msglen = sizeof(ndpc); - ndpc.rtm.rtm_version = RTM_VERSION; - ndpc.rtm.rtm_type = RTM_GET; - ndpc.rtm.rtm_flags = RTF_UP; - ndpc.rtm.rtm_addrs = RTA_DST | RTA_GATEWAY; - if (V_deembed_scopeid) - sa6_recoverscope(&ndpc.sin6); - - /* publish */ - if (lle->la_flags & LLE_PUB) - ndpc.rtm.rtm_flags |= RTF_ANNOUNCE; - - sdl = &ndpc.sdl; - sdl->sdl_family = AF_LINK; - sdl->sdl_len = sizeof(*sdl); - sdl->sdl_alen = ifp->if_addrlen; - sdl->sdl_index = ifp->if_index; - sdl->sdl_type = ifp->if_type; - bcopy(&lle->ll_addr, LLADDR(sdl), ifp->if_addrlen); - if (lle->la_expire != 0) - ndpc.rtm.rtm_rmx.rmx_expire = lle->la_expire + - lle->lle_remtime / hz + - time_second - time_uptime; - ndpc.rtm.rtm_flags |= (RTF_HOST | RTF_LLDATA); - if (lle->la_flags & LLE_STATIC) - ndpc.rtm.rtm_flags |= RTF_STATIC; - if (lle->la_flags & LLE_IFADDR) - ndpc.rtm.rtm_flags |= RTF_PINNED; - if (lle->ln_router != 0) - ndpc.rtm.rtm_flags |= RTF_GATEWAY; - ndpc.rtm.rtm_rmx.rmx_pksent = lle->la_asked; - /* Store state in rmx_weight value */ - ndpc.rtm.rtm_rmx.rmx_state = lle->ln_state; - ndpc.rtm.rtm_index = ifp->if_index; - error = SYSCTL_OUT(wr, &ndpc, sizeof(ndpc)); - - return (error); -} - -static struct lltable * -in6_lltattach(struct ifnet *ifp) -{ - struct lltable *llt; - - llt = lltable_allocate_htbl(IN6_LLTBL_DEFAULT_HSIZE); - llt->llt_af = AF_INET6; - llt->llt_ifp = ifp; - - llt->llt_lookup = in6_lltable_lookup; - llt->llt_alloc_entry = in6_lltable_alloc; - llt->llt_delete_entry = in6_lltable_delete_entry; - llt->llt_dump_entry = in6_lltable_dump_entry; - llt->llt_hash = in6_lltable_hash; - llt->llt_fill_sa_entry = in6_lltable_fill_sa_entry; - llt->llt_free_entry = in6_lltable_free_entry; - llt->llt_match_prefix = in6_lltable_match_prefix; - lltable_link(llt); - - return (llt); -} - void * in6_domifattach(struct ifnet *ifp) { Index: sys/netinet6/in6_lle.c =================================================================== --- /dev/null +++ sys/netinet6/in6_lle.c @@ -0,0 +1,1450 @@ +/* + * Copyright (c) 2004 Luigi Rizzo, Alessandro Cerri. All rights reserved. + * Copyright (c) 2004-2008 Qing Li. All rights reserved. + * Copyright (c) 2008 Kip Macy. All rights reserved. + * Copyright (c) 2015-2016 Alexander V. Chernikov. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include +__FBSDID("$FreeBSD$"); + +#include "opt_inet.h" +#include "opt_inet6.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define SIN6(s) ((const struct sockaddr_in6 *)(s)) + +static eventhandler_tag lle_event_eh, iflladdr_event_eh; + +static void nd6_free(struct llentry *, int); +static void nd6_free_redirect(const struct llentry *); +static void nd6_llinfo_timer(void *); +static void nd6_llinfo_settimer_locked(struct llentry *, long); +static void clear_llinfo_pqueue(struct llentry *); +static int nd6_resolve_slow(struct ifnet *, int, struct mbuf *, + const struct sockaddr_in6 *, u_char *, uint32_t *); + +/* + * ND6 timer routine to handle ND6 entries + */ +static void +nd6_llinfo_settimer_locked(struct llentry *ln, long tick) +{ + int canceled; + + LLE_WLOCK_ASSERT(ln); + + if (tick < 0) { + ln->la_expire = 0; + ln->ln_ntick = 0; + canceled = callout_stop(&ln->lle_timer); + } else { + ln->la_expire = time_uptime + tick / hz; + LLE_ADDREF(ln); + if (tick > INT_MAX) { + ln->ln_ntick = tick - INT_MAX; + canceled = callout_reset(&ln->lle_timer, INT_MAX, + nd6_llinfo_timer, ln); + } else { + ln->ln_ntick = 0; + canceled = callout_reset(&ln->lle_timer, tick, + nd6_llinfo_timer, ln); + } + } + if (canceled > 0) + LLE_REMREF(ln); +} + +/* + * Checks if we need to switch from STALE state. + * + * RFC 4861 requires switching from STALE to DELAY state + * on first packet matching entry, waiting V_nd6_delay and + * transition to PROBE state (if upper layer confirmation was + * not received). + * + * This code performs a bit differently: + * On packet hit we don't change state (but desired state + * can be guessed by control plane). However, after V_nd6_delay + * seconds code will transition to PROBE state (so DELAY state + * is kinda skipped in most situations). + * + * Typically, V_nd6_gctimer is bigger than V_nd6_delay, so + * we perform the following upon entering STALE state: + * + * 1) Arm timer to run each V_nd6_delay seconds to make sure that + * if packet was transmitted at the start of given interval, we + * would be able to switch to PROBE state in V_nd6_delay seconds + * as user expects. + * + * 2) Reschedule timer until original V_nd6_gctimer expires keeping + * lle in STALE state (remaining timer value stored in lle_remtime). + * + * 3) Reschedule timer if packet was transmitted less that V_nd6_delay + * seconds ago. + * + * Returns non-zero value if the entry is still STALE (storing + * the next timer interval in @pdelay). + * + * Returns zero value if original timer expired or we need to switch to + * PROBE (store that in @do_switch variable). + */ +static int +nd6_is_stale(struct llentry *lle, long *pdelay, int *do_switch) +{ + int nd_delay, nd_gctimer, r_skip_req; + time_t lle_hittime; + long delay; + + *do_switch = 0; + nd_gctimer = V_nd6_gctimer; + nd_delay = V_nd6_delay; + + LLE_REQ_LOCK(lle); + r_skip_req = lle->r_skip_req; + lle_hittime = lle->lle_hittime; + LLE_REQ_UNLOCK(lle); + + if (r_skip_req > 0) { + + /* + * Nonzero r_skip_req value was set upon entering + * STALE state. Since value was not changed, no + * packets were passed using this lle. Ask for + * timer reschedule and keep STALE state. + */ + delay = (long)(MIN(nd_gctimer, nd_delay)); + delay *= hz; + if (lle->lle_remtime > delay) + lle->lle_remtime -= delay; + else { + delay = lle->lle_remtime; + lle->lle_remtime = 0; + } + + if (delay == 0) { + + /* + * The original ng6_gctime timeout ended, + * no more rescheduling. + */ + return (0); + } + + *pdelay = delay; + return (1); + } + + /* + * Packet received. Verify timestamp + */ + delay = (long)(time_uptime - lle_hittime); + if (delay < nd_delay) { + + /* + * V_nd6_delay still not passed since the first + * hit in STALE state. + * Reshedule timer and return. + */ + *pdelay = (long)(nd_delay - delay) * hz; + return (1); + } + + /* Request switching to probe */ + *do_switch = 1; + return (0); +} + +/* + * Gets source address of the first packet in hold queue + * and stores it in @src. + * Returns pointer to @src (if hold queue is not empty) or NULL. + * + * Set noinline to be dtrace-friendly + */ +static __noinline struct in6_addr * +nd6_llinfo_get_holdsrc(struct llentry *ln, struct in6_addr *src) +{ + struct ip6_hdr hdr; + struct mbuf *m; + + if (ln->la_hold == NULL) + return (NULL); + + /* + * assume every packet in la_hold has the same IP header + */ + m = ln->la_hold; + if (sizeof(hdr) > m->m_len) + return (NULL); + + m_copydata(m, 0, sizeof(hdr), (caddr_t)&hdr); + *src = hdr.ip6_src; + + return (src); +} + +/* + * Switch @lle state to new state optionally arming timers. + * + * Set noinline to be dtrace-friendly + */ +__noinline void +nd6_llinfo_setstate(struct llentry *lle, int newstate) +{ + struct ifnet *ifp; + int nd_gctimer, nd_delay; + long delay, remtime; + + delay = 0; + remtime = 0; + + switch (newstate) { + case ND6_LLINFO_INCOMPLETE: + ifp = lle->lle_tbl->llt_ifp; + delay = (long)ND_IFINFO(ifp)->retrans * hz / 1000; + break; + case ND6_LLINFO_REACHABLE: + if (!ND6_LLINFO_PERMANENT(lle)) { + ifp = lle->lle_tbl->llt_ifp; + delay = (long)ND_IFINFO(ifp)->reachable * hz; + } + break; + case ND6_LLINFO_STALE: + + /* + * Notify fast path that we want to know if any packet + * is transmitted by setting r_skip_req. + */ + LLE_REQ_LOCK(lle); + lle->r_skip_req = 1; + LLE_REQ_UNLOCK(lle); + nd_delay = V_nd6_delay; + nd_gctimer = V_nd6_gctimer; + + delay = (long)(MIN(nd_gctimer, nd_delay)) * hz; + remtime = (long)nd_gctimer * hz - delay; + break; + case ND6_LLINFO_DELAY: + lle->la_asked = 0; + delay = (long)V_nd6_delay * hz; + break; + } + + if (delay > 0) + nd6_llinfo_settimer_locked(lle, delay); + + lle->lle_remtime = remtime; + lle->ln_state = newstate; +} + +/* + * Timer-dependent part of nd state machine. + * + * Set noinline to be dtrace-friendly + */ +static __noinline void +nd6_llinfo_timer(void *arg) +{ + struct llentry *ln; + struct in6_addr *dst, *pdst, *psrc, src; + struct ifnet *ifp; + struct nd_ifinfo *ndi = NULL; + int do_switch, send_ns; + long delay; + + KASSERT(arg != NULL, ("%s: arg NULL", __func__)); + ln = (struct llentry *)arg; + LLE_WLOCK(ln); + if (callout_pending(&ln->lle_timer)) { + /* + * Here we are a bit odd here in the treatment of + * active/pending. If the pending bit is set, it got + * rescheduled before I ran. The active + * bit we ignore, since if it was stopped + * in ll_tablefree() and was currently running + * it would have return 0 so the code would + * not have deleted it since the callout could + * not be stopped so we want to go through + * with the delete here now. If the callout + * was restarted, the pending bit will be back on and + * we just want to bail since the callout_reset would + * return 1 and our reference would have been removed + * by nd6_llinfo_settimer_locked above since canceled + * would have been 1. + */ + LLE_WUNLOCK(ln); + return; + } + ifp = ln->lle_tbl->llt_ifp; + CURVNET_SET(ifp->if_vnet); + ndi = ND_IFINFO(ifp); + send_ns = 0; + dst = &ln->r_l3addr.addr6; + pdst = dst; + + if (ln->ln_ntick > 0) { + if (ln->ln_ntick > INT_MAX) { + ln->ln_ntick -= INT_MAX; + nd6_llinfo_settimer_locked(ln, INT_MAX); + } else { + ln->ln_ntick = 0; + nd6_llinfo_settimer_locked(ln, ln->ln_ntick); + } + goto done; + } + + if (ln->la_flags & LLE_STATIC) { + goto done; + } + + if (ln->la_flags & LLE_DELETED) { + nd6_free(ln, 0); + ln = NULL; + goto done; + } + + switch (ln->ln_state) { + case ND6_LLINFO_INCOMPLETE: + if (ln->la_asked < V_nd6_mmaxtries) { + ln->la_asked++; + send_ns = 1; + /* Send NS to multicast address */ + pdst = NULL; + } else { + struct mbuf *m = ln->la_hold; + if (m) { + struct mbuf *m0; + + /* + * assuming every packet in la_hold has the + * same IP header. Send error after unlock. + */ + m0 = m->m_nextpkt; + m->m_nextpkt = NULL; + ln->la_hold = m0; + clear_llinfo_pqueue(ln); + } + EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_TIMEDOUT); + nd6_free(ln, 0); + ln = NULL; + if (m != NULL) + icmp6_error2(m, ICMP6_DST_UNREACH, + ICMP6_DST_UNREACH_ADDR, 0, ifp); + } + break; + case ND6_LLINFO_REACHABLE: + if (!ND6_LLINFO_PERMANENT(ln)) + nd6_llinfo_setstate(ln, ND6_LLINFO_STALE); + break; + + case ND6_LLINFO_STALE: + if (nd6_is_stale(ln, &delay, &do_switch) != 0) { + + /* + * No packet has used this entry and GC timeout + * has not been passed. Reshedule timer and + * return. + */ + nd6_llinfo_settimer_locked(ln, delay); + break; + } + + if (do_switch == 0) { + + /* + * GC timer has ended and entry hasn't been used. + * Run Garbage collector (RFC 4861, 5.3) + */ + if (!ND6_LLINFO_PERMANENT(ln)) { + EVENTHANDLER_INVOKE(lle_event, ln, + LLENTRY_EXPIRED); + nd6_free(ln, 1); + ln = NULL; + } + break; + } + + /* Entry has been used AND delay timer has ended. */ + + /* FALLTHROUGH */ + + case ND6_LLINFO_DELAY: + if (ndi && (ndi->flags & ND6_IFF_PERFORMNUD) != 0) { + /* We need NUD */ + ln->la_asked = 1; + nd6_llinfo_setstate(ln, ND6_LLINFO_PROBE); + send_ns = 1; + } else + nd6_llinfo_setstate(ln, ND6_LLINFO_STALE); /* XXX */ + break; + case ND6_LLINFO_PROBE: + if (ln->la_asked < V_nd6_umaxtries) { + ln->la_asked++; + send_ns = 1; + } else { + EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_EXPIRED); + nd6_free(ln, 0); + ln = NULL; + } + break; + default: + panic("%s: paths in a dark night can be confusing: %d", + __func__, ln->ln_state); + } +done: + if (send_ns != 0) { + nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000); + psrc = nd6_llinfo_get_holdsrc(ln, &src); + LLE_FREE_LOCKED(ln); + ln = NULL; + nd6_ns_output(ifp, psrc, pdst, dst, NULL); + } + + if (ln != NULL) + LLE_FREE_LOCKED(ln); + CURVNET_RESTORE(); +} + +static void +clear_llinfo_pqueue(struct llentry *ln) +{ + struct mbuf *m_hold, *m_hold_next; + + for (m_hold = ln->la_hold; m_hold; m_hold = m_hold_next) { + m_hold_next = m_hold->m_nextpkt; + m_freem(m_hold); + } + + ln->la_hold = NULL; + return; +} + + +/* + * the caller acquires and releases the lock on the lltbls + * Returns the llentry locked + */ +struct llentry * +nd6_lookup(const struct in6_addr *addr6, int flags, struct ifnet *ifp) +{ + struct sockaddr_in6 sin6; + struct llentry *ln; + + bzero(&sin6, sizeof(sin6)); + sin6.sin6_len = sizeof(struct sockaddr_in6); + sin6.sin6_family = AF_INET6; + sin6.sin6_addr = *addr6; + + IF_AFDATA_LOCK_ASSERT(ifp); + + ln = lla_lookup(LLTABLE6(ifp), flags, (struct sockaddr *)&sin6); + + return (ln); +} + +struct llentry * +nd6_alloc(const struct in6_addr *addr6, int flags, struct ifnet *ifp) +{ + struct sockaddr_in6 sin6; + struct llentry *ln; + + bzero(&sin6, sizeof(sin6)); + sin6.sin6_len = sizeof(struct sockaddr_in6); + sin6.sin6_family = AF_INET6; + sin6.sin6_addr = *addr6; + + ln = lltable_alloc_entry(LLTABLE6(ifp), 0, (struct sockaddr *)&sin6); + if (ln != NULL) + ln->ln_state = ND6_LLINFO_NOSTATE; + + return (ln); +} + +/* + * Free an nd6 llinfo entry. + * Since the function would cause significant changes in the kernel, DO NOT + * make it global, unless you have a strong reason for the change, and are sure + * that the change is safe. + * + * Set noinline to be dtrace-friendly + */ +static __noinline void +nd6_free(struct llentry *ln, int gc) +{ + struct nd_defrouter *dr; + struct ifnet *ifp; + + LLE_WLOCK_ASSERT(ln); + + /* + * we used to have pfctlinput(PRC_HOSTDEAD) here. + * even though it is not harmful, it was not really necessary. + */ + + /* cancel timer */ + nd6_llinfo_settimer_locked(ln, -1); + + ifp = ln->lle_tbl->llt_ifp; + + if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) { + dr = defrouter_lookup(&ln->r_l3addr.addr6, ifp); + + if (dr != NULL && dr->expire && + ln->ln_state == ND6_LLINFO_STALE && gc) { + /* + * If the reason for the deletion is just garbage + * collection, and the neighbor is an active default + * router, do not delete it. Instead, reset the GC + * timer using the router's lifetime. + * Simply deleting the entry would affect default + * router selection, which is not necessarily a good + * thing, especially when we're using router preference + * values. + * XXX: the check for ln_state would be redundant, + * but we intentionally keep it just in case. + */ + if (dr->expire > time_uptime) + nd6_llinfo_settimer_locked(ln, + (dr->expire - time_uptime) * hz); + else + nd6_llinfo_settimer_locked(ln, + (long)V_nd6_gctimer * hz); + + LLE_REMREF(ln); + LLE_WUNLOCK(ln); + return; + } + + if (dr) { + /* + * Unreachablity of a router might affect the default + * router selection and on-link detection of advertised + * prefixes. + */ + + /* + * Temporarily fake the state to choose a new default + * router and to perform on-link determination of + * prefixes correctly. + * Below the state will be set correctly, + * or the entry itself will be deleted. + */ + ln->ln_state = ND6_LLINFO_INCOMPLETE; + } + + if (ln->ln_router || dr) { + + /* + * We need to unlock to avoid a LOR with rt6_flush() with the + * rnh and for the calls to pfxlist_onlink_check() and + * defrouter_select() in the block further down for calls + * into nd6_lookup(). We still hold a ref. + */ + LLE_WUNLOCK(ln); + + /* + * rt6_flush must be called whether or not the neighbor + * is in the Default Router List. + * See a corresponding comment in nd6_na_input(). + */ + rt6_flush(&ln->r_l3addr.addr6, ifp); + } + + if (dr) { + /* + * Since defrouter_select() does not affect the + * on-link determination and MIP6 needs the check + * before the default router selection, we perform + * the check now. + */ + pfxlist_onlink_check(); + + /* + * Refresh default router list. + */ + defrouter_select(); + } + + /* + * If this entry was added by an on-link redirect, remove the + * corresponding host route. + */ + if (ln->la_flags & LLE_REDIRECT) + nd6_free_redirect(ln); + + if (ln->ln_router || dr) + LLE_WLOCK(ln); + } + + /* + * Save to unlock. We still hold an extra reference and will not + * free(9) in llentry_free() if someone else holds one as well. + */ + LLE_WUNLOCK(ln); + IF_AFDATA_LOCK(ifp); + LLE_WLOCK(ln); + /* Guard against race with other llentry_free(). */ + if (ln->la_flags & LLE_LINKED) { + /* Remove callout reference */ + LLE_REMREF(ln); + lltable_unlink_entry(ln->lle_tbl, ln); + } + IF_AFDATA_UNLOCK(ifp); + + llentry_free(ln); +} + +static int +nd6_isdynrte(const struct rtentry *rt, void *xap) +{ + + if (rt->rt_flags == (RTF_UP | RTF_HOST | RTF_DYNAMIC)) + return (1); + + return (0); +} +/* + * Remove the rtentry for the given llentry, + * both of which were installed by a redirect. + */ +static void +nd6_free_redirect(const struct llentry *ln) +{ + int fibnum; + struct sockaddr_in6 sin6; + struct rt_addrinfo info; + + lltable_fill_sa_entry(ln, (struct sockaddr *)&sin6); + memset(&info, 0, sizeof(info)); + info.rti_info[RTAX_DST] = (struct sockaddr *)&sin6; + info.rti_filter = nd6_isdynrte; + + for (fibnum = 0; fibnum < rt_numfibs; fibnum++) + rtrequest1_fib(RTM_DELETE, &info, NULL, fibnum); +} + + +/* + * Lookup link headerfor @sa_dst address. Stores found + * data in @desten buffer. Copy of lle ln_flags can be also + * saved in @pflags if @pflags is non-NULL. + * + * If destination LLE does not exists or lle state modification + * is required, call "slow" version. + * + * Return values: + * - 0 on success (address copied to buffer). + * - EWOULDBLOCK (no local error, but address is still unresolved) + * - other errors (alloc failure, etc) + */ +int +nd6_resolve(struct ifnet *ifp, int is_gw, struct mbuf *m, + const struct sockaddr *sa_dst, u_char *desten, uint32_t *pflags) +{ + struct llentry *ln = NULL; + const struct sockaddr_in6 *dst6; + + if (pflags != NULL) + *pflags = 0; + + dst6 = (const struct sockaddr_in6 *)sa_dst; + + /* discard the packet if IPv6 operation is disabled on the interface */ + if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)) { + m_freem(m); + return (ENETDOWN); /* better error? */ + } + + if (m != NULL && m->m_flags & M_MCAST) { + switch (ifp->if_type) { + case IFT_ETHER: + case IFT_FDDI: + case IFT_L2VLAN: + case IFT_IEEE80211: + case IFT_BRIDGE: + case IFT_ISO88025: + ETHER_MAP_IPV6_MULTICAST(&dst6->sin6_addr, + desten); + return (0); + default: + m_freem(m); + return (EAFNOSUPPORT); + } + } + + IF_AFDATA_RLOCK(ifp); + ln = nd6_lookup(&dst6->sin6_addr, LLE_UNLOCKED, ifp); + if (ln != NULL && (ln->r_flags & RLLE_VALID) != 0) { + /* Entry found, let's copy lle info */ + bcopy(ln->r_linkdata, desten, ln->r_hdrlen); + if (pflags != NULL) + *pflags = LLE_VALID | (ln->r_flags & RLLE_IFADDR); + /* Check if we have feedback request from nd6 timer */ + if (ln->r_skip_req != 0) { + LLE_REQ_LOCK(ln); + ln->r_skip_req = 0; /* Notify that entry was used */ + ln->lle_hittime = time_uptime; + LLE_REQ_UNLOCK(ln); + } + IF_AFDATA_RUNLOCK(ifp); + return (0); + } + IF_AFDATA_RUNLOCK(ifp); + + return (nd6_resolve_slow(ifp, 0, m, dst6, desten, pflags)); +} + + +/* + * Do L2 address resolution for @sa_dst address. Stores found + * address in @desten buffer. Copy of lle ln_flags can be also + * saved in @pflags if @pflags is non-NULL. + * + * Heavy version. + * Function assume that destination LLE does not exist, + * is invalid or stale, so LLE_EXCLUSIVE lock needs to be acquired. + * + * Set noinline to be dtrace-friendly + */ +static __noinline int +nd6_resolve_slow(struct ifnet *ifp, int flags, struct mbuf *m, + const struct sockaddr_in6 *dst, u_char *desten, uint32_t *pflags) +{ + struct llentry *lle = NULL, *lle_tmp; + struct in6_addr *psrc, src; + int send_ns, ll_len; + char *lladdr; + + /* + * Address resolution or Neighbor Unreachability Detection + * for the next hop. + * At this point, the destination of the packet must be a unicast + * or an anycast address(i.e. not a multicast). + */ + if (lle == NULL) { + IF_AFDATA_RLOCK(ifp); + lle = nd6_lookup(&dst->sin6_addr, LLE_EXCLUSIVE, ifp); + IF_AFDATA_RUNLOCK(ifp); + if ((lle == NULL) && nd6_is_addr_neighbor(dst, ifp)) { + /* + * Since nd6_is_addr_neighbor() internally calls nd6_lookup(), + * the condition below is not very efficient. But we believe + * it is tolerable, because this should be a rare case. + */ + lle = nd6_alloc(&dst->sin6_addr, 0, ifp); + if (lle == NULL) { + char ip6buf[INET6_ADDRSTRLEN]; + log(LOG_DEBUG, + "nd6_output: can't allocate llinfo for %s " + "(ln=%p)\n", + ip6_sprintf(ip6buf, &dst->sin6_addr), lle); + m_freem(m); + return (ENOBUFS); + } + + IF_AFDATA_WLOCK(ifp); + LLE_WLOCK(lle); + /* Prefer any existing entry over newly-created one */ + lle_tmp = nd6_lookup(&dst->sin6_addr, LLE_EXCLUSIVE, ifp); + if (lle_tmp == NULL) + lltable_link_entry(LLTABLE6(ifp), lle); + IF_AFDATA_WUNLOCK(ifp); + if (lle_tmp != NULL) { + lltable_free_entry(LLTABLE6(ifp), lle); + lle = lle_tmp; + lle_tmp = NULL; + } + } + } + if (lle == NULL) { + if (!(ND_IFINFO(ifp)->flags & ND6_IFF_PERFORMNUD)) { + m_freem(m); + return (ENOBUFS); + } + + if (m != NULL) + m_freem(m); + return (ENOBUFS); + } + + LLE_WLOCK_ASSERT(lle); + + /* + * The first time we send a packet to a neighbor whose entry is + * STALE, we have to change the state to DELAY and a sets a timer to + * expire in DELAY_FIRST_PROBE_TIME seconds to ensure do + * neighbor unreachability detection on expiration. + * (RFC 2461 7.3.3) + */ + if (lle->ln_state == ND6_LLINFO_STALE) + nd6_llinfo_setstate(lle, ND6_LLINFO_DELAY); + + /* + * If the neighbor cache entry has a state other than INCOMPLETE + * (i.e. its link-layer address is already resolved), just + * send the packet. + */ + if (lle->ln_state > ND6_LLINFO_INCOMPLETE) { + if (flags & LLE_ADDRONLY) { + lladdr = lle->ll_addr; + ll_len = ifp->if_addrlen; + } else { + lladdr = lle->r_linkdata; + ll_len = lle->r_hdrlen; + } + bcopy(lladdr, desten, ll_len); + if (pflags != NULL) + *pflags = lle->la_flags; + LLE_WUNLOCK(lle); + return (0); + } + + /* + * There is a neighbor cache entry, but no ethernet address + * response yet. Append this latest packet to the end of the + * packet queue in the mbuf, unless the number of the packet + * does not exceed nd6_maxqueuelen. When it exceeds nd6_maxqueuelen, + * the oldest packet in the queue will be removed. + */ + + if (lle->la_hold != NULL) { + struct mbuf *m_hold; + int i; + + i = 0; + for (m_hold = lle->la_hold; m_hold; m_hold = m_hold->m_nextpkt){ + i++; + if (m_hold->m_nextpkt == NULL) { + m_hold->m_nextpkt = m; + break; + } + } + while (i >= V_nd6_maxqueuelen) { + m_hold = lle->la_hold; + lle->la_hold = lle->la_hold->m_nextpkt; + m_freem(m_hold); + i--; + } + } else { + lle->la_hold = m; + } + + /* + * If there has been no NS for the neighbor after entering the + * INCOMPLETE state, send the first solicitation. + * Note that for newly-created lle la_asked will be 0, + * so we will transition from ND6_LLINFO_NOSTATE to + * ND6_LLINFO_INCOMPLETE state here. + */ + psrc = NULL; + send_ns = 0; + if (lle->la_asked == 0) { + lle->la_asked++; + send_ns = 1; + psrc = nd6_llinfo_get_holdsrc(lle, &src); + + nd6_llinfo_setstate(lle, ND6_LLINFO_INCOMPLETE); + } + LLE_WUNLOCK(lle); + if (send_ns != 0) + nd6_ns_output(ifp, psrc, NULL, &dst->sin6_addr, NULL); + + return (EWOULDBLOCK); +} + +/* + * Do L2 address resolution for @sa_dst address. Stores found + * address in @desten buffer. Copy of lle ln_flags can be also + * saved in @pflags if @pflags is non-NULL. + * + * Return values: + * - 0 on success (address copied to buffer). + * - EWOULDBLOCK (no local error, but address is still unresolved) + * - other errors (alloc failure, etc) + */ +int +nd6_resolve_addr(struct ifnet *ifp, int flags, const struct sockaddr *dst, + char *desten, uint32_t *pflags) +{ + int error; + + flags |= LLE_ADDRONLY; + error = nd6_resolve_slow(ifp, flags, NULL, + (const struct sockaddr_in6 *)dst, desten, pflags); + return (error); +} + +/* + * Add pernament ND6 link-layer record for given + * interface address. + * + * Very similar to IPv4 arp_ifinit(), but: + * 1) IPv6 DAD is performed in different place + * 2) It is called by IPv6 protocol stack in contrast to + * arp_ifinit() which is typically called in SIOCSIFADDR + * driver ioctl handler. + * + */ +int +nd6_add_ifa_lle(struct in6_ifaddr *ia) +{ + struct ifnet *ifp; + struct llentry *ln, *ln_tmp; + struct sockaddr *dst; + + ifp = ia->ia_ifa.ifa_ifp; + if (nd6_need_cache(ifp) == 0) + return (0); + + ia->ia_ifa.ifa_rtrequest = nd6_rtrequest; + dst = (struct sockaddr *)&ia->ia_addr; + ln = lltable_alloc_entry(LLTABLE6(ifp), LLE_IFADDR, dst); + if (ln == NULL) + return (ENOBUFS); + + IF_AFDATA_WLOCK(ifp); + LLE_WLOCK(ln); + /* Unlink any entry if exists */ + ln_tmp = lla_lookup(LLTABLE6(ifp), LLE_EXCLUSIVE, dst); + if (ln_tmp != NULL) + lltable_unlink_entry(LLTABLE6(ifp), ln_tmp); + lltable_link_entry(LLTABLE6(ifp), ln); + IF_AFDATA_WUNLOCK(ifp); + + if (ln_tmp != NULL) + EVENTHANDLER_INVOKE(lle_event, ln_tmp, LLENTRY_EXPIRED); + EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED); + + LLE_WUNLOCK(ln); + if (ln_tmp != NULL) + llentry_free(ln_tmp); + + return (0); +} + +/* + * Removes either all lle entries for given @ia, or lle + * corresponding to @ia address. + */ +void +nd6_rem_ifa_lle(struct in6_ifaddr *ia, int all) +{ + struct sockaddr_in6 mask, addr; + struct sockaddr *saddr, *smask; + struct ifnet *ifp; + + ifp = ia->ia_ifa.ifa_ifp; + memcpy(&addr, &ia->ia_addr, sizeof(ia->ia_addr)); + memcpy(&mask, &ia->ia_prefixmask, sizeof(ia->ia_prefixmask)); + saddr = (struct sockaddr *)&addr; + smask = (struct sockaddr *)&mask; + + if (all != 0) + lltable_prefix_free(AF_INET6, saddr, smask, LLE_STATIC); + else + lltable_delete_addr(LLTABLE6(ifp), LLE_IFADDR, saddr); +} + +static void +nd6_lle_event(void *arg __unused, struct llentry *lle, int evt) +{ + struct rt_addrinfo rtinfo; + struct sockaddr_in6 dst; + struct sockaddr_dl gw; + struct ifnet *ifp; + int type; + + LLE_WLOCK_ASSERT(lle); + + if (lltable_get_af(lle->lle_tbl) != AF_INET6) + return; + + switch (evt) { + case LLENTRY_RESOLVED: + type = RTM_ADD; + KASSERT(lle->la_flags & LLE_VALID, + ("%s: %p resolved but not valid?", __func__, lle)); + break; + case LLENTRY_EXPIRED: + type = RTM_DELETE; + break; + default: + return; + } + + ifp = lltable_get_ifp(lle->lle_tbl); + + bzero(&dst, sizeof(dst)); + bzero(&gw, sizeof(gw)); + bzero(&rtinfo, sizeof(rtinfo)); + lltable_fill_sa_entry(lle, (struct sockaddr *)&dst); + dst.sin6_scope_id = in6_getscopezone(ifp, + in6_addrscope(&dst.sin6_addr)); + gw.sdl_len = sizeof(struct sockaddr_dl); + gw.sdl_family = AF_LINK; + gw.sdl_alen = ifp->if_addrlen; + gw.sdl_index = ifp->if_index; + gw.sdl_type = ifp->if_type; + if (evt == LLENTRY_RESOLVED) + bcopy(lle->ll_addr, gw.sdl_data, ifp->if_addrlen); + rtinfo.rti_info[RTAX_DST] = (struct sockaddr *)&dst; + rtinfo.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&gw; + rtinfo.rti_addrs = RTA_DST | RTA_GATEWAY; + rt_missmsg_fib(type, &rtinfo, RTF_HOST | RTF_LLDATA | ( + type == RTM_ADD ? RTF_UP: 0), 0, RT_DEFAULT_FIB); +} + +/* + * A handler for interface link layer address change event. + */ +static void +nd6_iflladdr(void *arg __unused, struct ifnet *ifp) +{ + + lltable_update_ifaddr(LLTABLE6(ifp)); +} + +void +lle6_init(void) +{ + + if (IS_DEFAULT_VNET(curvnet)) { + lle_event_eh = EVENTHANDLER_REGISTER(lle_event, nd6_lle_event, + NULL, EVENTHANDLER_PRI_ANY); + iflladdr_event_eh = EVENTHANDLER_REGISTER(iflladdr_event, + nd6_iflladdr, NULL, EVENTHANDLER_PRI_ANY); + } +} + +void +lle6_destroy(void) +{ + + if (IS_DEFAULT_VNET(curvnet)) { + EVENTHANDLER_DEREGISTER(lle_event, lle_event_eh); + EVENTHANDLER_DEREGISTER(iflladdr_event, iflladdr_event_eh); + } +} + +struct in6_llentry { + struct llentry base; +}; + +#define IN6_LLTBL_DEFAULT_HSIZE 32 +#define IN6_LLTBL_HASH(k, h) \ + (((((((k >> 8) ^ k) >> 8) ^ k) >> 8) ^ k) & ((h) - 1)) + +/* + * Do actual deallocation of @lle. + * Called by LLE_FREE_LOCKED when number of references + * drops to zero. + */ +static void +in6_lltable_destroy_lle(struct llentry *lle) +{ + + LLE_WUNLOCK(lle); + LLE_LOCK_DESTROY(lle); + LLE_REQ_DESTROY(lle); + free(lle, M_LLTABLE); +} + +static struct llentry * +in6_lltable_new(const struct in6_addr *addr6, u_int flags) +{ + struct in6_llentry *lle; + + lle = malloc(sizeof(struct in6_llentry), M_LLTABLE, M_NOWAIT | M_ZERO); + if (lle == NULL) /* NB: caller generates msg */ + return NULL; + + lle->base.r_l3addr.addr6 = *addr6; + lle->base.lle_refcnt = 1; + lle->base.lle_free = in6_lltable_destroy_lle; + LLE_LOCK_INIT(&lle->base); + LLE_REQ_INIT(&lle->base); + callout_init(&lle->base.lle_timer, 1); + + return (&lle->base); +} + +static int +in6_lltable_match_prefix(const struct sockaddr *saddr, + const struct sockaddr *smask, u_int flags, struct llentry *lle) +{ + const struct in6_addr *addr, *mask, *lle_addr; + + addr = &((const struct sockaddr_in6 *)saddr)->sin6_addr; + mask = &((const struct sockaddr_in6 *)smask)->sin6_addr; + lle_addr = &lle->r_l3addr.addr6; + + if (IN6_ARE_MASKED_ADDR_EQUAL(lle_addr, addr, mask) == 0) + return (0); + + if (lle->la_flags & LLE_IFADDR) { + + /* + * Delete LLE_IFADDR records IFF address & flag matches. + * Note that addr is the interface address within prefix + * being matched. + */ + if (IN6_ARE_ADDR_EQUAL(addr, lle_addr) && + (flags & LLE_STATIC) != 0) + return (1); + return (0); + } + + /* flags & LLE_STATIC means deleting both dynamic and static entries */ + if ((flags & LLE_STATIC) || !(lle->la_flags & LLE_STATIC)) + return (1); + + return (0); +} + +static void +in6_lltable_free_entry(struct lltable *llt, struct llentry *lle) +{ + struct ifnet *ifp; + + LLE_WLOCK_ASSERT(lle); + KASSERT(llt != NULL, ("lltable is NULL")); + + /* Unlink entry from table */ + if ((lle->la_flags & LLE_LINKED) != 0) { + + ifp = llt->llt_ifp; + IF_AFDATA_WLOCK_ASSERT(ifp); + lltable_unlink_entry(llt, lle); + } + + if (callout_stop(&lle->lle_timer) > 0) + LLE_REMREF(lle); + + llentry_free(lle); +} + +static int +in6_lltable_rtcheck(struct ifnet *ifp, + u_int flags, + const struct sockaddr *l3addr) +{ + const struct sockaddr_in6 *sin6; + struct nhop6_basic nh6; + struct in6_addr dst; + uint32_t scopeid; + int error; + char ip6buf[INET6_ADDRSTRLEN]; + + KASSERT(l3addr->sa_family == AF_INET6, + ("sin_family %d", l3addr->sa_family)); + + /* Our local addresses are always only installed on the default FIB. */ + + sin6 = (const struct sockaddr_in6 *)l3addr; + in6_splitscope(&sin6->sin6_addr, &dst, &scopeid); + error = fib6_lookup_nh_basic(RT_DEFAULT_FIB, &dst, scopeid, 0, 0, &nh6); + if (error != 0 || (nh6.nh_flags & NHF_GATEWAY) || nh6.nh_ifp != ifp) { + struct ifaddr *ifa; + /* + * Create an ND6 cache for an IPv6 neighbor + * that is not covered by our own prefix. + */ + ifa = ifaof_ifpforaddr(l3addr, ifp); + if (ifa != NULL) { + ifa_free(ifa); + return 0; + } + log(LOG_INFO, "IPv6 address: \"%s\" is not on the network\n", + ip6_sprintf(ip6buf, &sin6->sin6_addr)); + return EINVAL; + } + return 0; +} + +static inline uint32_t +in6_lltable_hash_dst(const struct in6_addr *dst, uint32_t hsize) +{ + + return (IN6_LLTBL_HASH(dst->s6_addr32[3], hsize)); +} + +static uint32_t +in6_lltable_hash(const struct llentry *lle, uint32_t hsize) +{ + + return (in6_lltable_hash_dst(&lle->r_l3addr.addr6, hsize)); +} + +static void +in6_lltable_fill_sa_entry(const struct llentry *lle, struct sockaddr *sa) +{ + struct sockaddr_in6 *sin6; + + sin6 = (struct sockaddr_in6 *)sa; + bzero(sin6, sizeof(*sin6)); + sin6->sin6_family = AF_INET6; + sin6->sin6_len = sizeof(*sin6); + sin6->sin6_addr = lle->r_l3addr.addr6; +} + +static inline struct llentry * +in6_lltable_find_dst(struct lltable *llt, const struct in6_addr *dst) +{ + struct llentry *lle; + struct llentries *lleh; + u_int hashidx; + + hashidx = in6_lltable_hash_dst(dst, llt->llt_hsize); + lleh = &llt->lle_head[hashidx]; + LIST_FOREACH(lle, lleh, lle_next) { + if (lle->la_flags & LLE_DELETED) + continue; + if (IN6_ARE_ADDR_EQUAL(&lle->r_l3addr.addr6, dst)) + break; + } + + return (lle); +} + +static void +in6_lltable_delete_entry(struct lltable *llt, struct llentry *lle) +{ + + lle->la_flags |= LLE_DELETED; + EVENTHANDLER_INVOKE(lle_event, lle, LLENTRY_DELETED); +#ifdef DIAGNOSTIC + log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle); +#endif + llentry_free(lle); +} + +static struct llentry * +in6_lltable_alloc(struct lltable *llt, u_int flags, + const struct sockaddr *l3addr) +{ + const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)l3addr; + struct ifnet *ifp = llt->llt_ifp; + struct llentry *lle; + char linkhdr[LLE_MAX_LINKHDR]; + size_t linkhdrsize; + int lladdr_off; + + KASSERT(l3addr->sa_family == AF_INET6, + ("sin_family %d", l3addr->sa_family)); + + /* + * A route that covers the given address must have + * been installed 1st because we are doing a resolution, + * verify this. + */ + if (!(flags & LLE_IFADDR) && + in6_lltable_rtcheck(ifp, flags, l3addr) != 0) + return (NULL); + + lle = in6_lltable_new(&sin6->sin6_addr, flags); + if (lle == NULL) { + log(LOG_INFO, "lla_lookup: new lle malloc failed\n"); + return (NULL); + } + lle->la_flags = flags; + if ((flags & LLE_IFADDR) == LLE_IFADDR) { + linkhdrsize = LLE_MAX_LINKHDR; + if (lltable_calc_llheader(ifp, AF_INET6, IF_LLADDR(ifp), + linkhdr, &linkhdrsize, &lladdr_off) != 0) + return (NULL); + lltable_set_entry_addr(ifp, lle, linkhdr, linkhdrsize, + lladdr_off); + lle->la_flags |= LLE_STATIC; + } + + if ((lle->la_flags & LLE_STATIC) != 0) + lle->ln_state = ND6_LLINFO_REACHABLE; + + return (lle); +} + +static struct llentry * +in6_lltable_lookup(struct lltable *llt, u_int flags, + const struct sockaddr *l3addr) +{ + const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)l3addr; + struct llentry *lle; + + IF_AFDATA_LOCK_ASSERT(llt->llt_ifp); + KASSERT(l3addr->sa_family == AF_INET6, + ("sin_family %d", l3addr->sa_family)); + + lle = in6_lltable_find_dst(llt, &sin6->sin6_addr); + + if (lle == NULL) + return (NULL); + + KASSERT((flags & (LLE_UNLOCKED|LLE_EXCLUSIVE)) != + (LLE_UNLOCKED|LLE_EXCLUSIVE),("wrong lle request flags: 0x%X", + flags)); + + if (flags & LLE_UNLOCKED) + return (lle); + + if (flags & LLE_EXCLUSIVE) + LLE_WLOCK(lle); + else + LLE_RLOCK(lle); + return (lle); +} + +static int +in6_lltable_dump_entry(struct lltable *llt, struct llentry *lle, + struct sysctl_req *wr) +{ + struct ifnet *ifp = llt->llt_ifp; + /* XXX stack use */ + struct { + struct rt_msghdr rtm; + struct sockaddr_in6 sin6; + /* + * ndp.c assumes that sdl is word aligned + */ +#ifdef __LP64__ + uint32_t pad; +#endif + struct sockaddr_dl sdl; + } ndpc; + struct sockaddr_dl *sdl; + int error; + + bzero(&ndpc, sizeof(ndpc)); + /* skip deleted entries */ + if ((lle->la_flags & LLE_DELETED) == LLE_DELETED) + return (0); + /* Skip if jailed and not a valid IP of the prison. */ + lltable_fill_sa_entry(lle, + (struct sockaddr *)&ndpc.sin6); + if (prison_if(wr->td->td_ucred, + (struct sockaddr *)&ndpc.sin6) != 0) + return (0); + /* + * produce a msg made of: + * struct rt_msghdr; + * struct sockaddr_in6 (IPv6) + * struct sockaddr_dl; + */ + ndpc.rtm.rtm_msglen = sizeof(ndpc); + ndpc.rtm.rtm_version = RTM_VERSION; + ndpc.rtm.rtm_type = RTM_GET; + ndpc.rtm.rtm_flags = RTF_UP; + ndpc.rtm.rtm_addrs = RTA_DST | RTA_GATEWAY; + if (V_deembed_scopeid) + sa6_recoverscope(&ndpc.sin6); + + /* publish */ + if (lle->la_flags & LLE_PUB) + ndpc.rtm.rtm_flags |= RTF_ANNOUNCE; + + sdl = &ndpc.sdl; + sdl->sdl_family = AF_LINK; + sdl->sdl_len = sizeof(*sdl); + sdl->sdl_alen = ifp->if_addrlen; + sdl->sdl_index = ifp->if_index; + sdl->sdl_type = ifp->if_type; + bcopy(&lle->ll_addr, LLADDR(sdl), ifp->if_addrlen); + if (lle->la_expire != 0) + ndpc.rtm.rtm_rmx.rmx_expire = lle->la_expire + + lle->lle_remtime / hz + + time_second - time_uptime; + ndpc.rtm.rtm_flags |= (RTF_HOST | RTF_LLDATA); + if (lle->la_flags & LLE_STATIC) + ndpc.rtm.rtm_flags |= RTF_STATIC; + if (lle->la_flags & LLE_IFADDR) + ndpc.rtm.rtm_flags |= RTF_PINNED; + if (lle->ln_router != 0) + ndpc.rtm.rtm_flags |= RTF_GATEWAY; + ndpc.rtm.rtm_rmx.rmx_pksent = lle->la_asked; + /* Store state in rmx_weight value */ + ndpc.rtm.rtm_rmx.rmx_state = lle->ln_state; + ndpc.rtm.rtm_index = ifp->if_index; + error = SYSCTL_OUT(wr, &ndpc, sizeof(ndpc)); + + return (error); +} + +struct lltable * +in6_lltattach(struct ifnet *ifp) +{ + struct lltable *llt; + + llt = lltable_allocate_htbl(IN6_LLTBL_DEFAULT_HSIZE); + llt->llt_af = AF_INET6; + llt->llt_ifp = ifp; + + llt->llt_lookup = in6_lltable_lookup; + llt->llt_alloc_entry = in6_lltable_alloc; + llt->llt_delete_entry = in6_lltable_delete_entry; + llt->llt_dump_entry = in6_lltable_dump_entry; + llt->llt_hash = in6_lltable_hash; + llt->llt_fill_sa_entry = in6_lltable_fill_sa_entry; + llt->llt_free_entry = in6_lltable_free_entry; + llt->llt_match_prefix = in6_lltable_match_prefix; + lltable_link(llt); + + return (llt); +} + Index: sys/netinet6/in6_var.h =================================================================== --- sys/netinet6/in6_var.h +++ sys/netinet6/in6_var.h @@ -817,6 +817,12 @@ int in6_src_ioctl(u_long, caddr_t); void in6_newaddrmsg(struct in6_ifaddr *, int); + +struct lltable; +struct lltable *in6_lltattach(struct ifnet *ifp); +void lle6_init(void); +void lle6_destroy(void); + /* * Extended API for IPv6 FIB support. */ Index: sys/netinet6/nd6.h =================================================================== --- sys/netinet6/nd6.h +++ sys/netinet6/nd6.h @@ -327,6 +327,7 @@ VNET_DECLARE(struct nd_prhead, nd_prefix); VNET_DECLARE(int, nd6_debug); VNET_DECLARE(int, nd6_onlink_ns_rfc4861); +VNET_DECLARE(int, nd6_maxqueuelen); #define V_nd6_prune VNET(nd6_prune) #define V_nd6_delay VNET(nd6_delay) #define V_nd6_umaxtries VNET(nd6_umaxtries) @@ -338,6 +339,7 @@ #define V_nd_prefix VNET(nd_prefix) #define V_nd6_debug VNET(nd6_debug) #define V_nd6_onlink_ns_rfc4861 VNET(nd6_onlink_ns_rfc4861) +#define V_nd6_maxqueuelen VNET(nd6_maxqueuelen) #define nd6log(x) do { if (V_nd6_debug) log x; } while (/*CONSTCOND*/ 0) @@ -394,6 +396,9 @@ /* XXX: need nd6_var.h?? */ /* nd6.c */ +struct rtentry; +struct rt_addrinfo; + void nd6_init(void); #ifdef VIMAGE void nd6_destroy(void); @@ -407,13 +412,8 @@ struct llentry *nd6_lookup(const struct in6_addr *, int, struct ifnet *); struct llentry *nd6_alloc(const struct in6_addr *, int, struct ifnet *); void nd6_setmtu(struct ifnet *); -void nd6_llinfo_setstate(struct llentry *lle, int newstate); void nd6_timer(void *); void nd6_purge(struct ifnet *); -int nd6_resolve_addr(struct ifnet *ifp, int flags, const struct sockaddr *dst, - char *desten, uint32_t *pflags); -int nd6_resolve(struct ifnet *, int, struct mbuf *, - const struct sockaddr *, u_char *, uint32_t *); int nd6_ioctl(u_long, caddr_t, struct ifnet *); void nd6_cache_lladdr(struct ifnet *, struct in6_addr *, char *, int, int, int); @@ -421,10 +421,19 @@ struct sockaddr_in6 *); int nd6_flush_holdchain(struct ifnet *, struct ifnet *, struct mbuf *, struct sockaddr_in6 *); -int nd6_add_ifa_lle(struct in6_ifaddr *); -void nd6_rem_ifa_lle(struct in6_ifaddr *, int); int nd6_output_ifp(struct ifnet *, struct ifnet *, struct mbuf *, struct sockaddr_in6 *, struct route *); +void nd6_rtrequest(int, struct rtentry *, struct rt_addrinfo *); +int nd6_need_cache(struct ifnet *); + +/* in6_lle.c */ +int nd6_resolve_addr(struct ifnet *ifp, int flags, const struct sockaddr *dst, + char *desten, uint32_t *pflags); +int nd6_resolve(struct ifnet *, int, struct mbuf *, + const struct sockaddr *, u_char *, uint32_t *); +void nd6_llinfo_setstate(struct llentry *lle, int newstate); +int nd6_add_ifa_lle(struct in6_ifaddr *); +void nd6_rem_ifa_lle(struct in6_ifaddr *, int); /* nd6_nbr.c */ void nd6_na_input(struct mbuf *, int, int); Index: sys/netinet6/nd6.c =================================================================== --- sys/netinet6/nd6.c +++ sys/netinet6/nd6.c @@ -100,10 +100,9 @@ VNET_DEFINE(int, nd6_maxnudhint) = 0; /* max # of subsequent upper * layer hints */ -static VNET_DEFINE(int, nd6_maxqueuelen) = 1; /* max pkts cached in unresolved +VNET_DEFINE(int, nd6_maxqueuelen) = 1; /* max pkts cached in unresolved * ND entries */ #define V_nd6_maxndopt VNET(nd6_maxndopt) -#define V_nd6_maxqueuelen VNET(nd6_maxqueuelen) #ifdef ND6_DEBUG VNET_DEFINE(int, nd6_debug) = 1; @@ -111,8 +110,6 @@ VNET_DEFINE(int, nd6_debug) = 0; #endif -static eventhandler_tag lle_event_eh, iflladdr_event_eh; - /* for debugging? */ #if 0 static int nd6_inuse, nd6_allocated; @@ -131,15 +128,6 @@ static void nd6_setmtu0(struct ifnet *, struct nd_ifinfo *); static void nd6_slowtimo(void *); static int regen_tmpaddr(struct in6_ifaddr *); -static void nd6_free(struct llentry *, int); -static void nd6_free_redirect(const struct llentry *); -static void nd6_llinfo_timer(void *); -static void nd6_llinfo_settimer_locked(struct llentry *, long); -static void clear_llinfo_pqueue(struct llentry *); -static void nd6_rtrequest(int, struct rtentry *, struct rt_addrinfo *); -static int nd6_resolve_slow(struct ifnet *, int, struct mbuf *, - const struct sockaddr_in6 *, u_char *, uint32_t *); -static int nd6_need_cache(struct ifnet *); static VNET_DEFINE(struct callout, nd6_slowtimo_ch); @@ -147,65 +135,6 @@ VNET_DEFINE(struct callout, nd6_timer_ch); -static void -nd6_lle_event(void *arg __unused, struct llentry *lle, int evt) -{ - struct rt_addrinfo rtinfo; - struct sockaddr_in6 dst; - struct sockaddr_dl gw; - struct ifnet *ifp; - int type; - - LLE_WLOCK_ASSERT(lle); - - if (lltable_get_af(lle->lle_tbl) != AF_INET6) - return; - - switch (evt) { - case LLENTRY_RESOLVED: - type = RTM_ADD; - KASSERT(lle->la_flags & LLE_VALID, - ("%s: %p resolved but not valid?", __func__, lle)); - break; - case LLENTRY_EXPIRED: - type = RTM_DELETE; - break; - default: - return; - } - - ifp = lltable_get_ifp(lle->lle_tbl); - - bzero(&dst, sizeof(dst)); - bzero(&gw, sizeof(gw)); - bzero(&rtinfo, sizeof(rtinfo)); - lltable_fill_sa_entry(lle, (struct sockaddr *)&dst); - dst.sin6_scope_id = in6_getscopezone(ifp, - in6_addrscope(&dst.sin6_addr)); - gw.sdl_len = sizeof(struct sockaddr_dl); - gw.sdl_family = AF_LINK; - gw.sdl_alen = ifp->if_addrlen; - gw.sdl_index = ifp->if_index; - gw.sdl_type = ifp->if_type; - if (evt == LLENTRY_RESOLVED) - bcopy(lle->ll_addr, gw.sdl_data, ifp->if_addrlen); - rtinfo.rti_info[RTAX_DST] = (struct sockaddr *)&dst; - rtinfo.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&gw; - rtinfo.rti_addrs = RTA_DST | RTA_GATEWAY; - rt_missmsg_fib(type, &rtinfo, RTF_HOST | RTF_LLDATA | ( - type == RTM_ADD ? RTF_UP: 0), 0, RT_DEFAULT_FIB); -} - -/* - * A handler for interface link layer address change event. - */ -static void -nd6_iflladdr(void *arg __unused, struct ifnet *ifp) -{ - - lltable_update_ifaddr(LLTABLE6(ifp)); -} - void nd6_init(void) { @@ -221,12 +150,7 @@ nd6_slowtimo, curvnet); nd6_dad_init(); - if (IS_DEFAULT_VNET(curvnet)) { - lle_event_eh = EVENTHANDLER_REGISTER(lle_event, nd6_lle_event, - NULL, EVENTHANDLER_PRI_ANY); - iflladdr_event_eh = EVENTHANDLER_REGISTER(iflladdr_event, - nd6_iflladdr, NULL, EVENTHANDLER_PRI_ANY); - } + lle6_init(); } #ifdef VIMAGE @@ -236,10 +160,7 @@ callout_drain(&V_nd6_slowtimo_ch); callout_drain(&V_nd6_timer_ch); - if (IS_DEFAULT_VNET(curvnet)) { - EVENTHANDLER_DEREGISTER(lle_event, lle_event_eh); - EVENTHANDLER_DEREGISTER(iflladdr_event, iflladdr_event_eh); - } + lle6_destroy(); } #endif @@ -499,390 +420,6 @@ } /* - * ND6 timer routine to handle ND6 entries - */ -static void -nd6_llinfo_settimer_locked(struct llentry *ln, long tick) -{ - int canceled; - - LLE_WLOCK_ASSERT(ln); - - if (tick < 0) { - ln->la_expire = 0; - ln->ln_ntick = 0; - canceled = callout_stop(&ln->lle_timer); - } else { - ln->la_expire = time_uptime + tick / hz; - LLE_ADDREF(ln); - if (tick > INT_MAX) { - ln->ln_ntick = tick - INT_MAX; - canceled = callout_reset(&ln->lle_timer, INT_MAX, - nd6_llinfo_timer, ln); - } else { - ln->ln_ntick = 0; - canceled = callout_reset(&ln->lle_timer, tick, - nd6_llinfo_timer, ln); - } - } - if (canceled > 0) - LLE_REMREF(ln); -} - -/* - * Gets source address of the first packet in hold queue - * and stores it in @src. - * Returns pointer to @src (if hold queue is not empty) or NULL. - * - * Set noinline to be dtrace-friendly - */ -static __noinline struct in6_addr * -nd6_llinfo_get_holdsrc(struct llentry *ln, struct in6_addr *src) -{ - struct ip6_hdr hdr; - struct mbuf *m; - - if (ln->la_hold == NULL) - return (NULL); - - /* - * assume every packet in la_hold has the same IP header - */ - m = ln->la_hold; - if (sizeof(hdr) > m->m_len) - return (NULL); - - m_copydata(m, 0, sizeof(hdr), (caddr_t)&hdr); - *src = hdr.ip6_src; - - return (src); -} - -/* - * Checks if we need to switch from STALE state. - * - * RFC 4861 requires switching from STALE to DELAY state - * on first packet matching entry, waiting V_nd6_delay and - * transition to PROBE state (if upper layer confirmation was - * not received). - * - * This code performs a bit differently: - * On packet hit we don't change state (but desired state - * can be guessed by control plane). However, after V_nd6_delay - * seconds code will transition to PROBE state (so DELAY state - * is kinda skipped in most situations). - * - * Typically, V_nd6_gctimer is bigger than V_nd6_delay, so - * we perform the following upon entering STALE state: - * - * 1) Arm timer to run each V_nd6_delay seconds to make sure that - * if packet was transmitted at the start of given interval, we - * would be able to switch to PROBE state in V_nd6_delay seconds - * as user expects. - * - * 2) Reschedule timer until original V_nd6_gctimer expires keeping - * lle in STALE state (remaining timer value stored in lle_remtime). - * - * 3) Reschedule timer if packet was transmitted less that V_nd6_delay - * seconds ago. - * - * Returns non-zero value if the entry is still STALE (storing - * the next timer interval in @pdelay). - * - * Returns zero value if original timer expired or we need to switch to - * PROBE (store that in @do_switch variable). - */ -static int -nd6_is_stale(struct llentry *lle, long *pdelay, int *do_switch) -{ - int nd_delay, nd_gctimer, r_skip_req; - time_t lle_hittime; - long delay; - - *do_switch = 0; - nd_gctimer = V_nd6_gctimer; - nd_delay = V_nd6_delay; - - LLE_REQ_LOCK(lle); - r_skip_req = lle->r_skip_req; - lle_hittime = lle->lle_hittime; - LLE_REQ_UNLOCK(lle); - - if (r_skip_req > 0) { - - /* - * Nonzero r_skip_req value was set upon entering - * STALE state. Since value was not changed, no - * packets were passed using this lle. Ask for - * timer reschedule and keep STALE state. - */ - delay = (long)(MIN(nd_gctimer, nd_delay)); - delay *= hz; - if (lle->lle_remtime > delay) - lle->lle_remtime -= delay; - else { - delay = lle->lle_remtime; - lle->lle_remtime = 0; - } - - if (delay == 0) { - - /* - * The original ng6_gctime timeout ended, - * no more rescheduling. - */ - return (0); - } - - *pdelay = delay; - return (1); - } - - /* - * Packet received. Verify timestamp - */ - delay = (long)(time_uptime - lle_hittime); - if (delay < nd_delay) { - - /* - * V_nd6_delay still not passed since the first - * hit in STALE state. - * Reshedule timer and return. - */ - *pdelay = (long)(nd_delay - delay) * hz; - return (1); - } - - /* Request switching to probe */ - *do_switch = 1; - return (0); -} - - -/* - * Switch @lle state to new state optionally arming timers. - * - * Set noinline to be dtrace-friendly - */ -__noinline void -nd6_llinfo_setstate(struct llentry *lle, int newstate) -{ - struct ifnet *ifp; - int nd_gctimer, nd_delay; - long delay, remtime; - - delay = 0; - remtime = 0; - - switch (newstate) { - case ND6_LLINFO_INCOMPLETE: - ifp = lle->lle_tbl->llt_ifp; - delay = (long)ND_IFINFO(ifp)->retrans * hz / 1000; - break; - case ND6_LLINFO_REACHABLE: - if (!ND6_LLINFO_PERMANENT(lle)) { - ifp = lle->lle_tbl->llt_ifp; - delay = (long)ND_IFINFO(ifp)->reachable * hz; - } - break; - case ND6_LLINFO_STALE: - - /* - * Notify fast path that we want to know if any packet - * is transmitted by setting r_skip_req. - */ - LLE_REQ_LOCK(lle); - lle->r_skip_req = 1; - LLE_REQ_UNLOCK(lle); - nd_delay = V_nd6_delay; - nd_gctimer = V_nd6_gctimer; - - delay = (long)(MIN(nd_gctimer, nd_delay)) * hz; - remtime = (long)nd_gctimer * hz - delay; - break; - case ND6_LLINFO_DELAY: - lle->la_asked = 0; - delay = (long)V_nd6_delay * hz; - break; - } - - if (delay > 0) - nd6_llinfo_settimer_locked(lle, delay); - - lle->lle_remtime = remtime; - lle->ln_state = newstate; -} - -/* - * Timer-dependent part of nd state machine. - * - * Set noinline to be dtrace-friendly - */ -static __noinline void -nd6_llinfo_timer(void *arg) -{ - struct llentry *ln; - struct in6_addr *dst, *pdst, *psrc, src; - struct ifnet *ifp; - struct nd_ifinfo *ndi = NULL; - int do_switch, send_ns; - long delay; - - KASSERT(arg != NULL, ("%s: arg NULL", __func__)); - ln = (struct llentry *)arg; - LLE_WLOCK(ln); - if (callout_pending(&ln->lle_timer)) { - /* - * Here we are a bit odd here in the treatment of - * active/pending. If the pending bit is set, it got - * rescheduled before I ran. The active - * bit we ignore, since if it was stopped - * in ll_tablefree() and was currently running - * it would have return 0 so the code would - * not have deleted it since the callout could - * not be stopped so we want to go through - * with the delete here now. If the callout - * was restarted, the pending bit will be back on and - * we just want to bail since the callout_reset would - * return 1 and our reference would have been removed - * by nd6_llinfo_settimer_locked above since canceled - * would have been 1. - */ - LLE_WUNLOCK(ln); - return; - } - ifp = ln->lle_tbl->llt_ifp; - CURVNET_SET(ifp->if_vnet); - ndi = ND_IFINFO(ifp); - send_ns = 0; - dst = &ln->r_l3addr.addr6; - pdst = dst; - - if (ln->ln_ntick > 0) { - if (ln->ln_ntick > INT_MAX) { - ln->ln_ntick -= INT_MAX; - nd6_llinfo_settimer_locked(ln, INT_MAX); - } else { - ln->ln_ntick = 0; - nd6_llinfo_settimer_locked(ln, ln->ln_ntick); - } - goto done; - } - - if (ln->la_flags & LLE_STATIC) { - goto done; - } - - if (ln->la_flags & LLE_DELETED) { - nd6_free(ln, 0); - ln = NULL; - goto done; - } - - switch (ln->ln_state) { - case ND6_LLINFO_INCOMPLETE: - if (ln->la_asked < V_nd6_mmaxtries) { - ln->la_asked++; - send_ns = 1; - /* Send NS to multicast address */ - pdst = NULL; - } else { - struct mbuf *m = ln->la_hold; - if (m) { - struct mbuf *m0; - - /* - * assuming every packet in la_hold has the - * same IP header. Send error after unlock. - */ - m0 = m->m_nextpkt; - m->m_nextpkt = NULL; - ln->la_hold = m0; - clear_llinfo_pqueue(ln); - } - EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_TIMEDOUT); - nd6_free(ln, 0); - ln = NULL; - if (m != NULL) - icmp6_error2(m, ICMP6_DST_UNREACH, - ICMP6_DST_UNREACH_ADDR, 0, ifp); - } - break; - case ND6_LLINFO_REACHABLE: - if (!ND6_LLINFO_PERMANENT(ln)) - nd6_llinfo_setstate(ln, ND6_LLINFO_STALE); - break; - - case ND6_LLINFO_STALE: - if (nd6_is_stale(ln, &delay, &do_switch) != 0) { - - /* - * No packet has used this entry and GC timeout - * has not been passed. Reshedule timer and - * return. - */ - nd6_llinfo_settimer_locked(ln, delay); - break; - } - - if (do_switch == 0) { - - /* - * GC timer has ended and entry hasn't been used. - * Run Garbage collector (RFC 4861, 5.3) - */ - if (!ND6_LLINFO_PERMANENT(ln)) { - EVENTHANDLER_INVOKE(lle_event, ln, - LLENTRY_EXPIRED); - nd6_free(ln, 1); - ln = NULL; - } - break; - } - - /* Entry has been used AND delay timer has ended. */ - - /* FALLTHROUGH */ - - case ND6_LLINFO_DELAY: - if (ndi && (ndi->flags & ND6_IFF_PERFORMNUD) != 0) { - /* We need NUD */ - ln->la_asked = 1; - nd6_llinfo_setstate(ln, ND6_LLINFO_PROBE); - send_ns = 1; - } else - nd6_llinfo_setstate(ln, ND6_LLINFO_STALE); /* XXX */ - break; - case ND6_LLINFO_PROBE: - if (ln->la_asked < V_nd6_umaxtries) { - ln->la_asked++; - send_ns = 1; - } else { - EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_EXPIRED); - nd6_free(ln, 0); - ln = NULL; - } - break; - default: - panic("%s: paths in a dark night can be confusing: %d", - __func__, ln->ln_state); - } -done: - if (send_ns != 0) { - nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000); - psrc = nd6_llinfo_get_holdsrc(ln, &src); - LLE_FREE_LOCKED(ln); - ln = NULL; - nd6_ns_output(ifp, psrc, pdst, dst, NULL); - } - - if (ln != NULL) - LLE_FREE_LOCKED(ln); - CURVNET_RESTORE(); -} - - -/* * ND6 timer routine to expire default route list and prefix list */ void @@ -1160,46 +697,6 @@ */ } -/* - * the caller acquires and releases the lock on the lltbls - * Returns the llentry locked - */ -struct llentry * -nd6_lookup(const struct in6_addr *addr6, int flags, struct ifnet *ifp) -{ - struct sockaddr_in6 sin6; - struct llentry *ln; - - bzero(&sin6, sizeof(sin6)); - sin6.sin6_len = sizeof(struct sockaddr_in6); - sin6.sin6_family = AF_INET6; - sin6.sin6_addr = *addr6; - - IF_AFDATA_LOCK_ASSERT(ifp); - - ln = lla_lookup(LLTABLE6(ifp), flags, (struct sockaddr *)&sin6); - - return (ln); -} - -struct llentry * -nd6_alloc(const struct in6_addr *addr6, int flags, struct ifnet *ifp) -{ - struct sockaddr_in6 sin6; - struct llentry *ln; - - bzero(&sin6, sizeof(sin6)); - sin6.sin6_len = sizeof(struct sockaddr_in6); - sin6.sin6_family = AF_INET6; - sin6.sin6_addr = *addr6; - - ln = lltable_alloc_entry(LLTABLE6(ifp), 0, (struct sockaddr *)&sin6); - if (ln != NULL) - ln->ln_state = ND6_LLINFO_NOSTATE; - - return (ln); -} - /* * Test whether a given IPv6 address is a neighbor or not, ignoring * the actual neighbor cache. The neighbor cache is ignored in order @@ -1330,169 +827,6 @@ } /* - * Free an nd6 llinfo entry. - * Since the function would cause significant changes in the kernel, DO NOT - * make it global, unless you have a strong reason for the change, and are sure - * that the change is safe. - * - * Set noinline to be dtrace-friendly - */ -static __noinline void -nd6_free(struct llentry *ln, int gc) -{ - struct nd_defrouter *dr; - struct ifnet *ifp; - - LLE_WLOCK_ASSERT(ln); - - /* - * we used to have pfctlinput(PRC_HOSTDEAD) here. - * even though it is not harmful, it was not really necessary. - */ - - /* cancel timer */ - nd6_llinfo_settimer_locked(ln, -1); - - ifp = ln->lle_tbl->llt_ifp; - - if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) { - dr = defrouter_lookup(&ln->r_l3addr.addr6, ifp); - - if (dr != NULL && dr->expire && - ln->ln_state == ND6_LLINFO_STALE && gc) { - /* - * If the reason for the deletion is just garbage - * collection, and the neighbor is an active default - * router, do not delete it. Instead, reset the GC - * timer using the router's lifetime. - * Simply deleting the entry would affect default - * router selection, which is not necessarily a good - * thing, especially when we're using router preference - * values. - * XXX: the check for ln_state would be redundant, - * but we intentionally keep it just in case. - */ - if (dr->expire > time_uptime) - nd6_llinfo_settimer_locked(ln, - (dr->expire - time_uptime) * hz); - else - nd6_llinfo_settimer_locked(ln, - (long)V_nd6_gctimer * hz); - - LLE_REMREF(ln); - LLE_WUNLOCK(ln); - return; - } - - if (dr) { - /* - * Unreachablity of a router might affect the default - * router selection and on-link detection of advertised - * prefixes. - */ - - /* - * Temporarily fake the state to choose a new default - * router and to perform on-link determination of - * prefixes correctly. - * Below the state will be set correctly, - * or the entry itself will be deleted. - */ - ln->ln_state = ND6_LLINFO_INCOMPLETE; - } - - if (ln->ln_router || dr) { - - /* - * We need to unlock to avoid a LOR with rt6_flush() with the - * rnh and for the calls to pfxlist_onlink_check() and - * defrouter_select() in the block further down for calls - * into nd6_lookup(). We still hold a ref. - */ - LLE_WUNLOCK(ln); - - /* - * rt6_flush must be called whether or not the neighbor - * is in the Default Router List. - * See a corresponding comment in nd6_na_input(). - */ - rt6_flush(&ln->r_l3addr.addr6, ifp); - } - - if (dr) { - /* - * Since defrouter_select() does not affect the - * on-link determination and MIP6 needs the check - * before the default router selection, we perform - * the check now. - */ - pfxlist_onlink_check(); - - /* - * Refresh default router list. - */ - defrouter_select(); - } - - /* - * If this entry was added by an on-link redirect, remove the - * corresponding host route. - */ - if (ln->la_flags & LLE_REDIRECT) - nd6_free_redirect(ln); - - if (ln->ln_router || dr) - LLE_WLOCK(ln); - } - - /* - * Save to unlock. We still hold an extra reference and will not - * free(9) in llentry_free() if someone else holds one as well. - */ - LLE_WUNLOCK(ln); - IF_AFDATA_LOCK(ifp); - LLE_WLOCK(ln); - /* Guard against race with other llentry_free(). */ - if (ln->la_flags & LLE_LINKED) { - /* Remove callout reference */ - LLE_REMREF(ln); - lltable_unlink_entry(ln->lle_tbl, ln); - } - IF_AFDATA_UNLOCK(ifp); - - llentry_free(ln); -} - -static int -nd6_isdynrte(const struct rtentry *rt, void *xap) -{ - - if (rt->rt_flags == (RTF_UP | RTF_HOST | RTF_DYNAMIC)) - return (1); - - return (0); -} -/* - * Remove the rtentry for the given llentry, - * both of which were installed by a redirect. - */ -static void -nd6_free_redirect(const struct llentry *ln) -{ - int fibnum; - struct sockaddr_in6 sin6; - struct rt_addrinfo info; - - lltable_fill_sa_entry(ln, (struct sockaddr *)&sin6); - memset(&info, 0, sizeof(info)); - info.rti_info[RTAX_DST] = (struct sockaddr *)&sin6; - info.rti_filter = nd6_isdynrte; - - for (fibnum = 0; fibnum < rt_numfibs; fibnum++) - rtrequest1_fib(RTM_DELETE, &info, NULL, fibnum); -} - -/* * Rejuvenate this function for routing operations related * processing. */ @@ -2123,256 +1457,6 @@ return (error); } -/* - * Lookup link headerfor @sa_dst address. Stores found - * data in @desten buffer. Copy of lle ln_flags can be also - * saved in @pflags if @pflags is non-NULL. - * - * If destination LLE does not exists or lle state modification - * is required, call "slow" version. - * - * Return values: - * - 0 on success (address copied to buffer). - * - EWOULDBLOCK (no local error, but address is still unresolved) - * - other errors (alloc failure, etc) - */ -int -nd6_resolve(struct ifnet *ifp, int is_gw, struct mbuf *m, - const struct sockaddr *sa_dst, u_char *desten, uint32_t *pflags) -{ - struct llentry *ln = NULL; - const struct sockaddr_in6 *dst6; - - if (pflags != NULL) - *pflags = 0; - - dst6 = (const struct sockaddr_in6 *)sa_dst; - - /* discard the packet if IPv6 operation is disabled on the interface */ - if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)) { - m_freem(m); - return (ENETDOWN); /* better error? */ - } - - if (m != NULL && m->m_flags & M_MCAST) { - switch (ifp->if_type) { - case IFT_ETHER: - case IFT_FDDI: - case IFT_L2VLAN: - case IFT_IEEE80211: - case IFT_BRIDGE: - case IFT_ISO88025: - ETHER_MAP_IPV6_MULTICAST(&dst6->sin6_addr, - desten); - return (0); - default: - m_freem(m); - return (EAFNOSUPPORT); - } - } - - IF_AFDATA_RLOCK(ifp); - ln = nd6_lookup(&dst6->sin6_addr, LLE_UNLOCKED, ifp); - if (ln != NULL && (ln->r_flags & RLLE_VALID) != 0) { - /* Entry found, let's copy lle info */ - bcopy(ln->r_linkdata, desten, ln->r_hdrlen); - if (pflags != NULL) - *pflags = LLE_VALID | (ln->r_flags & RLLE_IFADDR); - /* Check if we have feedback request from nd6 timer */ - if (ln->r_skip_req != 0) { - LLE_REQ_LOCK(ln); - ln->r_skip_req = 0; /* Notify that entry was used */ - ln->lle_hittime = time_uptime; - LLE_REQ_UNLOCK(ln); - } - IF_AFDATA_RUNLOCK(ifp); - return (0); - } - IF_AFDATA_RUNLOCK(ifp); - - return (nd6_resolve_slow(ifp, 0, m, dst6, desten, pflags)); -} - - -/* - * Do L2 address resolution for @sa_dst address. Stores found - * address in @desten buffer. Copy of lle ln_flags can be also - * saved in @pflags if @pflags is non-NULL. - * - * Heavy version. - * Function assume that destination LLE does not exist, - * is invalid or stale, so LLE_EXCLUSIVE lock needs to be acquired. - * - * Set noinline to be dtrace-friendly - */ -static __noinline int -nd6_resolve_slow(struct ifnet *ifp, int flags, struct mbuf *m, - const struct sockaddr_in6 *dst, u_char *desten, uint32_t *pflags) -{ - struct llentry *lle = NULL, *lle_tmp; - struct in6_addr *psrc, src; - int send_ns, ll_len; - char *lladdr; - - /* - * Address resolution or Neighbor Unreachability Detection - * for the next hop. - * At this point, the destination of the packet must be a unicast - * or an anycast address(i.e. not a multicast). - */ - if (lle == NULL) { - IF_AFDATA_RLOCK(ifp); - lle = nd6_lookup(&dst->sin6_addr, LLE_EXCLUSIVE, ifp); - IF_AFDATA_RUNLOCK(ifp); - if ((lle == NULL) && nd6_is_addr_neighbor(dst, ifp)) { - /* - * Since nd6_is_addr_neighbor() internally calls nd6_lookup(), - * the condition below is not very efficient. But we believe - * it is tolerable, because this should be a rare case. - */ - lle = nd6_alloc(&dst->sin6_addr, 0, ifp); - if (lle == NULL) { - char ip6buf[INET6_ADDRSTRLEN]; - log(LOG_DEBUG, - "nd6_output: can't allocate llinfo for %s " - "(ln=%p)\n", - ip6_sprintf(ip6buf, &dst->sin6_addr), lle); - m_freem(m); - return (ENOBUFS); - } - - IF_AFDATA_WLOCK(ifp); - LLE_WLOCK(lle); - /* Prefer any existing entry over newly-created one */ - lle_tmp = nd6_lookup(&dst->sin6_addr, LLE_EXCLUSIVE, ifp); - if (lle_tmp == NULL) - lltable_link_entry(LLTABLE6(ifp), lle); - IF_AFDATA_WUNLOCK(ifp); - if (lle_tmp != NULL) { - lltable_free_entry(LLTABLE6(ifp), lle); - lle = lle_tmp; - lle_tmp = NULL; - } - } - } - if (lle == NULL) { - if (!(ND_IFINFO(ifp)->flags & ND6_IFF_PERFORMNUD)) { - m_freem(m); - return (ENOBUFS); - } - - if (m != NULL) - m_freem(m); - return (ENOBUFS); - } - - LLE_WLOCK_ASSERT(lle); - - /* - * The first time we send a packet to a neighbor whose entry is - * STALE, we have to change the state to DELAY and a sets a timer to - * expire in DELAY_FIRST_PROBE_TIME seconds to ensure do - * neighbor unreachability detection on expiration. - * (RFC 2461 7.3.3) - */ - if (lle->ln_state == ND6_LLINFO_STALE) - nd6_llinfo_setstate(lle, ND6_LLINFO_DELAY); - - /* - * If the neighbor cache entry has a state other than INCOMPLETE - * (i.e. its link-layer address is already resolved), just - * send the packet. - */ - if (lle->ln_state > ND6_LLINFO_INCOMPLETE) { - if (flags & LLE_ADDRONLY) { - lladdr = lle->ll_addr; - ll_len = ifp->if_addrlen; - } else { - lladdr = lle->r_linkdata; - ll_len = lle->r_hdrlen; - } - bcopy(lladdr, desten, ll_len); - if (pflags != NULL) - *pflags = lle->la_flags; - LLE_WUNLOCK(lle); - return (0); - } - - /* - * There is a neighbor cache entry, but no ethernet address - * response yet. Append this latest packet to the end of the - * packet queue in the mbuf, unless the number of the packet - * does not exceed nd6_maxqueuelen. When it exceeds nd6_maxqueuelen, - * the oldest packet in the queue will be removed. - */ - - if (lle->la_hold != NULL) { - struct mbuf *m_hold; - int i; - - i = 0; - for (m_hold = lle->la_hold; m_hold; m_hold = m_hold->m_nextpkt){ - i++; - if (m_hold->m_nextpkt == NULL) { - m_hold->m_nextpkt = m; - break; - } - } - while (i >= V_nd6_maxqueuelen) { - m_hold = lle->la_hold; - lle->la_hold = lle->la_hold->m_nextpkt; - m_freem(m_hold); - i--; - } - } else { - lle->la_hold = m; - } - - /* - * If there has been no NS for the neighbor after entering the - * INCOMPLETE state, send the first solicitation. - * Note that for newly-created lle la_asked will be 0, - * so we will transition from ND6_LLINFO_NOSTATE to - * ND6_LLINFO_INCOMPLETE state here. - */ - psrc = NULL; - send_ns = 0; - if (lle->la_asked == 0) { - lle->la_asked++; - send_ns = 1; - psrc = nd6_llinfo_get_holdsrc(lle, &src); - - nd6_llinfo_setstate(lle, ND6_LLINFO_INCOMPLETE); - } - LLE_WUNLOCK(lle); - if (send_ns != 0) - nd6_ns_output(ifp, psrc, NULL, &dst->sin6_addr, NULL); - - return (EWOULDBLOCK); -} - -/* - * Do L2 address resolution for @sa_dst address. Stores found - * address in @desten buffer. Copy of lle ln_flags can be also - * saved in @pflags if @pflags is non-NULL. - * - * Return values: - * - 0 on success (address copied to buffer). - * - EWOULDBLOCK (no local error, but address is still unresolved) - * - other errors (alloc failure, etc) - */ -int -nd6_resolve_addr(struct ifnet *ifp, int flags, const struct sockaddr *dst, - char *desten, uint32_t *pflags) -{ - int error; - - flags |= LLE_ADDRONLY; - error = nd6_resolve_slow(ifp, flags, NULL, - (const struct sockaddr_in6 *)dst, desten, pflags); - return (error); -} - int nd6_flush_holdchain(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *chain, struct sockaddr_in6 *dst) @@ -2400,7 +1484,7 @@ return (error); } -static int +int nd6_need_cache(struct ifnet *ifp) { /* @@ -2426,91 +1510,6 @@ } } -/* - * Add pernament ND6 link-layer record for given - * interface address. - * - * Very similar to IPv4 arp_ifinit(), but: - * 1) IPv6 DAD is performed in different place - * 2) It is called by IPv6 protocol stack in contrast to - * arp_ifinit() which is typically called in SIOCSIFADDR - * driver ioctl handler. - * - */ -int -nd6_add_ifa_lle(struct in6_ifaddr *ia) -{ - struct ifnet *ifp; - struct llentry *ln, *ln_tmp; - struct sockaddr *dst; - - ifp = ia->ia_ifa.ifa_ifp; - if (nd6_need_cache(ifp) == 0) - return (0); - - ia->ia_ifa.ifa_rtrequest = nd6_rtrequest; - dst = (struct sockaddr *)&ia->ia_addr; - ln = lltable_alloc_entry(LLTABLE6(ifp), LLE_IFADDR, dst); - if (ln == NULL) - return (ENOBUFS); - - IF_AFDATA_WLOCK(ifp); - LLE_WLOCK(ln); - /* Unlink any entry if exists */ - ln_tmp = lla_lookup(LLTABLE6(ifp), LLE_EXCLUSIVE, dst); - if (ln_tmp != NULL) - lltable_unlink_entry(LLTABLE6(ifp), ln_tmp); - lltable_link_entry(LLTABLE6(ifp), ln); - IF_AFDATA_WUNLOCK(ifp); - - if (ln_tmp != NULL) - EVENTHANDLER_INVOKE(lle_event, ln_tmp, LLENTRY_EXPIRED); - EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED); - - LLE_WUNLOCK(ln); - if (ln_tmp != NULL) - llentry_free(ln_tmp); - - return (0); -} - -/* - * Removes either all lle entries for given @ia, or lle - * corresponding to @ia address. - */ -void -nd6_rem_ifa_lle(struct in6_ifaddr *ia, int all) -{ - struct sockaddr_in6 mask, addr; - struct sockaddr *saddr, *smask; - struct ifnet *ifp; - - ifp = ia->ia_ifa.ifa_ifp; - memcpy(&addr, &ia->ia_addr, sizeof(ia->ia_addr)); - memcpy(&mask, &ia->ia_prefixmask, sizeof(ia->ia_prefixmask)); - saddr = (struct sockaddr *)&addr; - smask = (struct sockaddr *)&mask; - - if (all != 0) - lltable_prefix_free(AF_INET6, saddr, smask, LLE_STATIC); - else - lltable_delete_addr(LLTABLE6(ifp), LLE_IFADDR, saddr); -} - -static void -clear_llinfo_pqueue(struct llentry *ln) -{ - struct mbuf *m_hold, *m_hold_next; - - for (m_hold = ln->la_hold; m_hold; m_hold = m_hold_next) { - m_hold_next = m_hold->m_nextpkt; - m_freem(m_hold); - } - - ln->la_hold = NULL; - return; -} - static int nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS); static int nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS); #ifdef SYSCTL_DECL