Index: sys/dev/cxgb/ulp/tom/cxgb_l2t.c =================================================================== --- sys/dev/cxgb/ulp/tom/cxgb_l2t.c +++ sys/dev/cxgb/ulp/tom/cxgb_l2t.c @@ -215,7 +215,7 @@ struct tom_data *td = sc->tom_softc; struct toedev *tod = &td->tod; struct sockaddr_in sin = {0}; - uint8_t dmac[ETHER_ADDR_LEN]; + uint8_t dmac[ETHER_HDR_LEN]; uint16_t vtag = EVL_VLID_MASK; int rc; Index: sys/dev/cxgbe/tom/t4_tom_l2t.c =================================================================== --- sys/dev/cxgbe/tom/t4_tom_l2t.c +++ sys/dev/cxgbe/tom/t4_tom_l2t.c @@ -233,7 +233,7 @@ struct sockaddr_in sin = {0}; struct sockaddr_in6 sin6 = {0}; struct sockaddr *sa; - uint8_t dmac[ETHER_ADDR_LEN]; + uint8_t dmac[ETHER_HDR_LEN]; uint16_t vtag = VLAN_NONE; int rc; Index: sys/net/bpf.c =================================================================== --- sys/net/bpf.c +++ sys/net/bpf.c @@ -69,6 +69,7 @@ #include #include +#include #include #include #ifdef BPF_JITTER @@ -76,6 +77,7 @@ #endif #include #include +#include #include #include @@ -164,7 +166,7 @@ static void bpf_detachd_locked(struct bpf_d *); static void bpf_freed(struct bpf_d *); static int bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **, - struct sockaddr *, int *, struct bpf_insn *); + struct sockaddr *, int *, struct bpf_d *); static int bpf_setif(struct bpf_d *, struct ifreq *); static void bpf_timed_out(void *); static __inline void @@ -454,7 +456,7 @@ */ static int bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp, - struct sockaddr *sockp, int *hdrlen, struct bpf_insn *wfilter) + struct sockaddr *sockp, int *hdrlen, struct bpf_d *d) { const struct ieee80211_bpf_params *p; struct ether_header *eh; @@ -549,7 +551,7 @@ if (error) goto bad; - slen = bpf_filter(wfilter, mtod(m, u_char *), len, len); + slen = bpf_filter(d->bd_wfilter, mtod(m, u_char *), len, len); if (slen == 0) { error = EPERM; goto bad; @@ -566,6 +568,10 @@ else m->m_flags |= M_MCAST; } + if (d->bd_hdrcmplt == 0) { + memcpy(eh->ether_shost, IF_LLADDR(ifp), + sizeof(eh->ether_shost)); + } break; } @@ -1088,6 +1094,7 @@ struct ifnet *ifp; struct mbuf *m, *mc; struct sockaddr dst; + struct route ro; int error, hlen; error = devfs_get_cdevpriv((void **)&d); @@ -1119,7 +1126,7 @@ hlen = 0; /* XXX: bpf_movein() can sleep */ error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp, - &m, &dst, &hlen, d->bd_wfilter); + &m, &dst, &hlen, d); if (error) { d->bd_wdcount++; return (error); @@ -1151,7 +1158,14 @@ BPFD_UNLOCK(d); #endif - error = (*ifp->if_output)(ifp, m, &dst, NULL); + bzero(&ro, sizeof(ro)); + if (hlen != 0) { + ro.ro_prepend = (u_char *)&dst.sa_data; + ro.ro_plen = hlen; + ro.ro_flags = RT_HAS_HEADER; + } + + error = (*ifp->if_output)(ifp, m, &dst, &ro); if (error) d->bd_wdcount++; Index: sys/net/flowtable.c =================================================================== --- sys/net/flowtable.c +++ sys/net/flowtable.c @@ -665,6 +665,7 @@ flowtable_lookup(sa_family_t sa, struct mbuf *m, struct route *ro) { struct flentry *fle; + struct llentry *lle; if (V_flowtable_enable == 0) return (ENXIO); @@ -693,8 +694,15 @@ } ro->ro_rt = fle->f_rt; - ro->ro_lle = fle->f_lle; ro->ro_flags |= RT_NORTREF; + lle = fle->f_lle; + if (lle != NULL && (lle->la_flags & LLE_VALID)) { + ro->ro_prepend = lle->r_linkdata; + ro->ro_plen = lle->r_hdrlen; + ro->ro_flags |= RT_MAY_LOOP; + if (lle->la_flags & LLE_IFADDR) + ro->ro_flags |= RT_L2_ME; + } return (0); } Index: sys/net/if.c =================================================================== --- sys/net/if.c +++ sys/net/if.c @@ -161,6 +161,7 @@ static void if_freemulti(struct ifmultiaddr *); static void if_grow(void); static void if_input_default(struct ifnet *, struct mbuf *); +static int if_requestencap_default(struct ifnet *, struct if_encap_req *); static void if_route(struct ifnet *, int flag, int fam); static int if_setflag(struct ifnet *, int, int, int *, int); static int if_transmit(struct ifnet *ifp, struct mbuf *m); @@ -673,6 +674,9 @@ if (ifp->if_input == NULL) ifp->if_input = if_input_default; + if (ifp->if_requestencap == NULL) + ifp->if_requestencap = if_requestencap_default; + if (!vmove) { #ifdef MAC mac_ifnet_create(ifp); @@ -3398,6 +3402,43 @@ } /* + * Compat function for handling basic encapsulation requests. + * Not converted stacks (FDDI, IB, ..) supports traditional + * output model: ARP (and other similar L2 protocols) are handled + * inside output routine, arpresolve/nd6_resolve() returns MAC + * address instead of full prepend. + * + * This function creates calculated header==MAC for IPv4/IPv6 and + * returns EAFNOSUPPORT (which is then handled in ARP code) for other + * address families. + */ +static int +if_requestencap_default(struct ifnet *ifp, struct if_encap_req *req) +{ + + if (req->rtype != IFENCAP_LL) + return (EOPNOTSUPP); + + if (req->bufsize < req->lladdr_len) + return (ENOMEM); + + switch (req->family) { + case AF_INET: + case AF_INET6: + break; + default: + return (EAFNOSUPPORT); + } + + /* Copy lladdr to storage as is */ + memmove(req->buf, req->lladdr, req->lladdr_len); + req->bufsize = req->lladdr_len; + req->lladdr_off = 0; + + return (0); +} + +/* * The name argument must be a pointer to storage which will last as * long as the interface does. For physical devices, the result of * device_get_name(dev) is a good choice and for pseudo-devices a Index: sys/net/if_ethersubr.c =================================================================== --- sys/net/if_ethersubr.c +++ sys/net/if_ethersubr.c @@ -113,6 +113,7 @@ #ifdef VIMAGE static void ether_reassign(struct ifnet *, struct vnet *, char *); #endif +static int ether_requestencap(struct ifnet *, struct if_encap_req *); #define ETHER_IS_BROADCAST(addr) \ (bcmp(etherbroadcastaddr, (addr), ETHER_ADDR_LEN) == 0) @@ -136,6 +137,138 @@ } /* + * Handle link-layer encapsulation requests. + */ +static int +ether_requestencap(struct ifnet *ifp, struct if_encap_req *req) +{ + struct ether_header *eh; + struct arphdr *ah; + uint16_t etype; + const u_char *lladdr; + + if (req->rtype != IFENCAP_LL) + return (EOPNOTSUPP); + + if (req->bufsize < ETHER_HDR_LEN) + return (ENOMEM); + + eh = (struct ether_header *)req->buf; + lladdr = req->lladdr; + req->lladdr_off = 0; + + switch (req->family) { + case AF_INET: + etype = htons(ETHERTYPE_IP); + break; + case AF_INET6: + etype = htons(ETHERTYPE_IPV6); + break; + case AF_ARP: + ah = (struct arphdr *)req->hdata; + ah->ar_hrd = htons(ARPHRD_ETHER); + + switch(ntohs(ah->ar_op)) { + case ARPOP_REVREQUEST: + case ARPOP_REVREPLY: + etype = htons(ETHERTYPE_REVARP); + break; + case ARPOP_REQUEST: + case ARPOP_REPLY: + default: + etype = htons(ETHERTYPE_ARP); + break; + } + + if (req->flags & IFENCAP_FLAG_BROADCAST) + lladdr = ifp->if_broadcastaddr; + break; + default: + return (EAFNOSUPPORT); + } + + memcpy(&eh->ether_type, &etype, sizeof(eh->ether_type)); + memcpy(eh->ether_dhost, lladdr, ETHER_ADDR_LEN); + memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN); + req->bufsize = sizeof(struct ether_header); + + return (0); +} + + +static int +ether_resolve_addr(struct ifnet *ifp, struct mbuf *m, + const struct sockaddr *dst, struct route *ro, u_char *phdr, + uint32_t *pflags) +{ + struct ether_header *eh; + struct rtentry *rt; + uint32_t lleflags = 0; + int error = 0; +#if defined(INET) || defined(INET6) + uint16_t etype; +#endif + + eh = (struct ether_header *)phdr; + + switch (dst->sa_family) { +#ifdef INET + case AF_INET: + if ((m->m_flags & (M_BCAST | M_MCAST)) == 0) + error = arpresolve(ifp, 0, m, dst, phdr, &lleflags); + else { + if (m->m_flags & M_BCAST) + memcpy(eh->ether_dhost, ifp->if_broadcastaddr, + ETHER_ADDR_LEN); + else { + const struct in_addr *a; + a = &(((const struct sockaddr_in *)dst)->sin_addr); + ETHER_MAP_IP_MULTICAST(a, eh->ether_dhost); + } + etype = htons(ETHERTYPE_IP); + memcpy(&eh->ether_type, &etype, sizeof(etype)); + memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN); + } + break; +#endif +#ifdef INET6 + case AF_INET6: + if ((m->m_flags & M_MCAST) == 0) + error = nd6_resolve(ifp, 0, m, dst, phdr, &lleflags); + else { + const struct in6_addr *a6; + a6 = &(((const struct sockaddr_in6 *)dst)->sin6_addr); + ETHER_MAP_IPV6_MULTICAST(a6, eh->ether_dhost); + etype = htons(ETHERTYPE_IPV6); + memcpy(&eh->ether_type, &etype, sizeof(etype)); + memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN); + } + break; +#endif + default: + if_printf(ifp, "can't handle af%d\n", dst->sa_family); + if (m != NULL) + m_freem(m); + return (EAFNOSUPPORT); + } + + if (error == EHOSTDOWN) { + rt = (ro != NULL) ? ro->ro_rt : NULL; + if (rt != NULL && (rt->rt_flags & RTF_GATEWAY) != 0) + error = EHOSTUNREACH; + } + + if (error != 0) + return (error); + + *pflags = RT_MAY_LOOP; + if (lleflags & LLE_IFADDR) + *pflags |= RT_L2_ME; + + return (0); +} + +/* * Ethernet output routine. * Encapsulate a packet of type family for the local net. * Use trailer local net encapsulation if enough data in first @@ -145,27 +278,20 @@ ether_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro) { - short type; - int error = 0, hdrcmplt = 0; - u_char edst[ETHER_ADDR_LEN]; - struct llentry *lle = NULL; - struct rtentry *rt0 = NULL; + int error = 0; + char linkhdr[ETHER_HDR_LEN], *phdr; struct ether_header *eh; struct pf_mtag *t; int loop_copy = 1; int hlen; /* link layer header length */ - int is_gw = 0; - uint32_t pflags = 0; + uint32_t pflags; + phdr = NULL; + pflags = 0; if (ro != NULL) { - if (!(m->m_flags & (M_BCAST | M_MCAST))) { - lle = ro->ro_lle; - if (lle != NULL) - pflags = lle->la_flags; - } - rt0 = ro->ro_rt; - if (rt0 != NULL && (rt0->rt_flags & RTF_GATEWAY) != 0) - is_gw = 1; + phdr = ro->ro_prepend; + hlen = ro->ro_plen; + pflags = ro->ro_flags; } #ifdef MAC error = mac_ifnet_check_transmit(ifp, m); @@ -180,94 +306,31 @@ (ifp->if_drv_flags & IFF_DRV_RUNNING))) senderr(ENETDOWN); - hlen = ETHER_HDR_LEN; - switch (dst->sa_family) { -#ifdef INET - case AF_INET: - if (lle != NULL && (pflags & LLE_VALID) != 0) - memcpy(edst, &lle->ll_addr.mac16, sizeof(edst)); - else - error = arpresolve(ifp, is_gw, m, dst, edst, &pflags); - if (error) + if (phdr == NULL) { + /* No prepend data supplied. Try to calculate ourselves. */ + phdr = linkhdr; + hlen = ETHER_HDR_LEN; + error = ether_resolve_addr(ifp, m, dst, ro, phdr, &pflags); + if (error != 0) return (error == EWOULDBLOCK ? 0 : error); - type = htons(ETHERTYPE_IP); - break; - case AF_ARP: - { - struct arphdr *ah; - ah = mtod(m, struct arphdr *); - ah->ar_hrd = htons(ARPHRD_ETHER); - - loop_copy = 0; /* if this is for us, don't do it */ - - switch(ntohs(ah->ar_op)) { - case ARPOP_REVREQUEST: - case ARPOP_REVREPLY: - type = htons(ETHERTYPE_REVARP); - break; - case ARPOP_REQUEST: - case ARPOP_REPLY: - default: - type = htons(ETHERTYPE_ARP); - break; - } - - if (m->m_flags & M_BCAST) - bcopy(ifp->if_broadcastaddr, edst, ETHER_ADDR_LEN); - else - bcopy(ar_tha(ah), edst, ETHER_ADDR_LEN); - - } - break; -#endif -#ifdef INET6 - case AF_INET6: - if (lle != NULL && (pflags & LLE_VALID)) - memcpy(edst, &lle->ll_addr.mac16, sizeof(edst)); - else - error = nd6_resolve(ifp, is_gw, m, dst, (u_char *)edst, - &pflags); - if (error) - return (error == EWOULDBLOCK ? 0 : error); - type = htons(ETHERTYPE_IPV6); - break; -#endif - case pseudo_AF_HDRCMPLT: - { - const struct ether_header *eh; - - hdrcmplt = 1; - /* FALLTHROUGH */ - - case AF_UNSPEC: - loop_copy = 0; /* if this is for us, don't do it */ - eh = (const struct ether_header *)dst->sa_data; - (void)memcpy(edst, eh->ether_dhost, sizeof (edst)); - type = eh->ether_type; - break; - } - default: - if_printf(ifp, "can't handle af%d\n", dst->sa_family); - senderr(EAFNOSUPPORT); } - if ((pflags & LLE_IFADDR) != 0) { + if ((pflags & RT_L2_ME) != 0) { update_mbuf_csumflags(m, m); return (if_simloop(ifp, m, dst->sa_family, 0)); } + loop_copy = pflags & RT_MAY_LOOP; /* * Add local net header. If no space in first mbuf, * allocate another. */ - M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT); + M_PREPEND(m, hlen, M_NOWAIT); if (m == NULL) senderr(ENOBUFS); - eh = mtod(m, struct ether_header *); - if (hdrcmplt == 0) { - memcpy(&eh->ether_type, &type, sizeof(eh->ether_type)); - memcpy(eh->ether_dhost, edst, sizeof (edst)); - memcpy(eh->ether_shost, IF_LLADDR(ifp),sizeof(eh->ether_shost)); + if ((pflags & RT_HAS_HEADER) == 0) { + eh = mtod(m, struct ether_header *); + memcpy(eh, phdr, hlen); } /* @@ -279,34 +342,27 @@ * on the wire). However, we don't do that here for security * reasons and compatibility with the original behavior. */ - if ((ifp->if_flags & IFF_SIMPLEX) && loop_copy && + if ((m->m_flags & M_BCAST) && loop_copy && (ifp->if_flags & IFF_SIMPLEX) && ((t = pf_find_mtag(m)) == NULL || !t->routed)) { - if (m->m_flags & M_BCAST) { - struct mbuf *n; + struct mbuf *n; - /* - * Because if_simloop() modifies the packet, we need a - * writable copy through m_dup() instead of a readonly - * one as m_copy[m] would give us. The alternative would - * be to modify if_simloop() to handle the readonly mbuf, - * but performancewise it is mostly equivalent (trading - * extra data copying vs. extra locking). - * - * XXX This is a local workaround. A number of less - * often used kernel parts suffer from the same bug. - * See PR kern/105943 for a proposed general solution. - */ - if ((n = m_dup(m, M_NOWAIT)) != NULL) { - update_mbuf_csumflags(m, n); - (void)if_simloop(ifp, n, dst->sa_family, hlen); - } else - if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); - } else if (bcmp(eh->ether_dhost, eh->ether_shost, - ETHER_ADDR_LEN) == 0) { - update_mbuf_csumflags(m, m); - (void) if_simloop(ifp, m, dst->sa_family, hlen); - return (0); /* XXX */ - } + /* + * Because if_simloop() modifies the packet, we need a + * writable copy through m_dup() instead of a readonly + * one as m_copy[m] would give us. The alternative would + * be to modify if_simloop() to handle the readonly mbuf, + * but performancewise it is mostly equivalent (trading + * extra data copying vs. extra locking). + * + * XXX This is a local workaround. A number of less + * often used kernel parts suffer from the same bug. + * See PR kern/105943 for a proposed general solution. + */ + if ((n = m_dup(m, M_NOWAIT)) != NULL) { + update_mbuf_csumflags(m, n); + (void)if_simloop(ifp, n, dst->sa_family, hlen); + } else + if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); } /* @@ -798,6 +854,7 @@ ifp->if_output = ether_output; ifp->if_input = ether_input; ifp->if_resolvemulti = ether_resolvemulti; + ifp->if_requestencap = ether_requestencap; #ifdef VIMAGE ifp->if_reassign = ether_reassign; #endif Index: sys/net/if_llatbl.h =================================================================== --- sys/net/if_llatbl.h +++ sys/net/if_llatbl.h @@ -48,6 +48,7 @@ #define LLTABLE_WUNLOCK() rw_wunlock(&lltable_rwlock) #define LLTABLE_LOCK_ASSERT() rw_assert(&lltable_rwlock, RA_LOCKED) +#define LLE_MAX_LINKHDR 24 /* Full IB header */ /* * Code referencing llentry must at least hold * a shared lock @@ -58,14 +59,11 @@ struct in_addr addr4; struct in6_addr addr6; } r_l3addr; - union { - uint64_t mac_aligned; - uint16_t mac16[3]; - uint8_t mac8[20]; /* IB needs 20 bytes. */ - } ll_addr; + char r_linkdata[LLE_MAX_LINKHDR]; /* L2 data */ + uint8_t r_hdrlen; /* length for LL header */ + uint8_t spare0[3]; uint16_t r_flags; /* LLE runtime flags */ uint16_t r_skip_req; /* feedback from fast path */ - uint64_t spare1; struct lltable *lle_tbl; struct llentries *lle_head; @@ -82,6 +80,7 @@ time_t lle_remtime; /* Real time remaining */ time_t lle_hittime; /* Time when r_skip_req was unset */ int lle_refcnt; + char *ll_addr; /* link-layer address */ LIST_ENTRY(llentry) lle_chain; /* chain of deleted items */ struct callout lle_timer; @@ -198,6 +197,8 @@ /* LLE request flags */ #define LLE_EXCLUSIVE 0x2000 /* return lle xlocked */ #define LLE_UNLOCKED 0x4000 /* return lle unlocked */ +#define LLE_ADDRONLY 0x4000 /* return lladdr instead of full header */ +#define LLE_CREATE 0x8000 /* hint to avoid lle lookup */ /* LLE flags used by fastpath code */ #define RLLE_VALID 0x0001 /* entry is valid */ @@ -223,10 +224,13 @@ /* helper functions */ size_t lltable_drop_entry_queue(struct llentry *); void lltable_set_entry_addr(struct ifnet *ifp, struct llentry *lle, - const char *lladdr); + const char *linkhdr, size_t linkhdrsize, int lladdr_off); int lltable_try_set_entry_addr(struct ifnet *ifp, struct llentry *lle, - const char *lladdr); + const char *linkhdr, size_t linkhdrsize, int lladdr_off); +int lltable_calc_llheader(struct ifnet *ifp, int family, char *lladdr, + char *buf, size_t *bufsize, int *lladdr_off); +void lltable_update_ifaddr(struct lltable *llt); struct llentry *lltable_alloc_entry(struct lltable *llt, u_int flags, const struct sockaddr *l4addr); void lltable_free_entry(struct lltable *llt, struct llentry *lle); Index: sys/net/if_llatbl.c =================================================================== --- sys/net/if_llatbl.c +++ sys/net/if_llatbl.c @@ -278,10 +278,12 @@ void lltable_set_entry_addr(struct ifnet *ifp, struct llentry *lle, - const char *lladdr) + const char *linkhdr, size_t linkhdrsize, int lladdr_off) { - bcopy(lladdr, &lle->ll_addr, ifp->if_addrlen); + memcpy(lle->r_linkdata, linkhdr, linkhdrsize); + lle->r_hdrlen = linkhdrsize; + lle->ll_addr = &lle->r_linkdata[lladdr_off]; lle->la_flags |= LLE_VALID; lle->r_flags |= RLLE_VALID; } @@ -296,7 +298,7 @@ */ int lltable_try_set_entry_addr(struct ifnet *ifp, struct llentry *lle, - const char *lladdr) + const char *linkhdr, size_t linkhdrsize, int lladdr_off) { /* Perform real LLE update */ @@ -318,7 +320,7 @@ } /* Update data */ - lltable_set_entry_addr(ifp, lle, lladdr); + lltable_set_entry_addr(ifp, lle, linkhdr, linkhdrsize, lladdr_off); IF_AFDATA_WUNLOCK(ifp); @@ -327,6 +329,84 @@ return (1); } + /* + * Helper function used to pre-compute full/partial link-layer + * header data suitable for feeding into if_output(). + */ +int +lltable_calc_llheader(struct ifnet *ifp, int family, char *lladdr, + char *buf, size_t *bufsize, int *lladdr_off) +{ + struct if_encap_req ereq; + int error; + + bzero(buf, *bufsize); + bzero(&ereq, sizeof(ereq)); + ereq.buf = buf; + ereq.bufsize = *bufsize; + ereq.rtype = IFENCAP_LL; + ereq.family = family; + ereq.lladdr = lladdr; + ereq.lladdr_len = ifp->if_addrlen; + error = ifp->if_requestencap(ifp, &ereq); + if (error == 0) { + *bufsize = ereq.bufsize; + *lladdr_off = ereq.lladdr_off; + } + + return (error); +} + +/* + * Update link-layer header for given @lle after + * interface lladdr was changed. + */ +static int +llentry_update_ifaddr(struct lltable *llt, struct llentry *lle, void *farg) +{ + struct ifnet *ifp; + u_char linkhdr[LLE_MAX_LINKHDR]; + size_t linkhdrsize; + u_char *lladdr; + int lladdr_off; + + ifp = (struct ifnet *)farg; + + lladdr = lle->ll_addr; + + LLE_WLOCK(lle); + if ((lle->la_flags & LLE_VALID) == 0) { + LLE_WUNLOCK(lle); + return (0); + } + + if ((lle->la_flags & LLE_IFADDR) != 0) + lladdr = IF_LLADDR(ifp); + + linkhdrsize = sizeof(linkhdr); + lltable_calc_llheader(ifp, llt->llt_af, lladdr, linkhdr, &linkhdrsize, + &lladdr_off); + memcpy(lle->r_linkdata, linkhdr, linkhdrsize); + LLE_WUNLOCK(lle); + + return (0); +} + +/* + * Update all calculated headers for given @llt + */ +void +lltable_update_ifaddr(struct lltable *llt) +{ + + if (llt->llt_ifp->if_flags & IFF_LOOPBACK) + return; + + IF_AFDATA_WLOCK(llt->llt_ifp); + lltable_foreach_lle(llt, llentry_update_ifaddr, llt->llt_ifp); + IF_AFDATA_WUNLOCK(llt->llt_ifp); +} + /* * * Performes generic cleanup routines and frees lle. @@ -642,6 +722,9 @@ struct ifnet *ifp; struct lltable *llt; struct llentry *lle, *lle_tmp; + uint8_t linkhdr[LLE_MAX_LINKHDR]; + size_t linkhdrsize; + int lladdr_off; u_int laflags = 0; int error; @@ -677,11 +760,14 @@ if (lle == NULL) return (ENOMEM); - bcopy(LLADDR(dl), &lle->ll_addr, ifp->if_addrlen); + linkhdrsize = sizeof(linkhdr); + if (lltable_calc_llheader(ifp, dst->sa_family, LLADDR(dl), + linkhdr, &linkhdrsize, &lladdr_off) != 0) + return (EINVAL); + lltable_set_entry_addr(ifp, lle, linkhdr, linkhdrsize, + lladdr_off); if ((rtm->rtm_flags & RTF_ANNOUNCE)) lle->la_flags |= LLE_PUB; - lle->la_flags |= LLE_VALID; - lle->r_flags |= RLLE_VALID; lle->la_expire = rtm->rtm_rmx.rmx_expire; laflags = lle->la_flags; @@ -767,7 +853,7 @@ db_printf(" ln_router=%u\n", lle->ln_router); db_printf(" ln_ntick=%ju\n", (uintmax_t)lle->ln_ntick); db_printf(" lle_refcnt=%d\n", lle->lle_refcnt); - bcopy(&lle->ll_addr.mac16, octet, sizeof(octet)); + bcopy(lle->ll_addr, octet, sizeof(octet)); db_printf(" ll_addr=%02x:%02x:%02x:%02x:%02x:%02x\n", octet[0], octet[1], octet[2], octet[3], octet[4], octet[5]); db_printf(" lle_timer=%p\n", &lle->lle_timer); Index: sys/net/if_var.h =================================================================== --- sys/net/if_var.h +++ sys/net/if_var.h @@ -134,6 +134,48 @@ u_int tsomaxsegsize; /* TSO maximum segment size in bytes */ }; +/* Interface encap request types */ +typedef enum { + IFENCAP_LL = 1 /* pre-calculate link-layer header */ +} ife_type; + +/* + * The structure below allows to request various pre-calculated L2/L3 headers + * for different media. Requests varies by type (rtype field). + * + * IFENCAP_LL type: pre-calculates link header based on address family + * and destination lladdr. + * + * Input data fields: + * buf: pointer to destination buffer + * bufsize: buffer size + * flags: IFENCAP_FLAG_BROADCAST if destination is broadcast + * family: address family defined by AF_ constant. + * lladdr: pointer to link-layer address + * lladdr_len: length of link-layer address + * hdata: pointer to L3 header (optional, used for ARP requests). + * Output data fields: + * buf: encap data is stored here + * bufsize: resulting encap length is stored here + * lladdr_off: offset of link-layer address from encap hdr start + * hdata: L3 header may be altered if necessary + */ + +struct if_encap_req { + u_char *buf; /* Destination buffer (w) */ + size_t bufsize; /* size of provided buffer (r) */ + ife_type rtype; /* request type (r) */ + uint32_t flags; /* Request flags (r) */ + int family; /* Address family AF_* (r) */ + int lladdr_off; /* offset from header start (w) */ + int lladdr_len; /* lladdr length (r) */ + char *lladdr; /* link-level address pointer (r) */ + char *hdata; /* Upper layer header data (rw) */ +}; + +#define IFENCAP_FLAG_BROADCAST 0x02 /* Destination is broadcast */ + + /* * Structure defining a network interface. * @@ -235,6 +277,8 @@ void (*if_reassign) /* reassign to vnet routine */ (struct ifnet *, struct vnet *, char *); if_get_counter_t if_get_counter; /* get counter values */ + int (*if_requestencap) /* make link header from request */ + (struct ifnet *, struct if_encap_req *); /* Statistics. */ counter_u64_t if_counters[IFCOUNTERS]; Index: sys/net/route.h =================================================================== --- sys/net/route.h +++ sys/net/route.h @@ -51,14 +51,21 @@ */ struct route { struct rtentry *ro_rt; - struct llentry *ro_lle; - struct in_ifaddr *ro_ia; - int ro_flags; + char *ro_prepend; + uint16_t ro_plen; + uint16_t ro_flags; struct sockaddr ro_dst; }; +#define RT_L2_ME_BIT 2 /* dst L2 addr is our address */ +#define RT_MAY_LOOP_BIT 3 /* dst may require loop copy */ +#define RT_HAS_HEADER_BIT 4 /* mbuf already have its header prepended */ + #define RT_CACHING_CONTEXT 0x1 /* XXX: not used anywhere */ #define RT_NORTREF 0x2 /* doesn't hold reference on ro_rt */ +#define RT_L2_ME (1 << RT_L2_ME_BIT) +#define RT_MAY_LOOP (1 << RT_MAY_LOOP_BIT) +#define RT_HAS_HEADER (1 << RT_HAS_HEADER_BIT) struct rt_metrics { u_long rmx_locks; /* Kernel must leave these values alone */ Index: sys/netinet/if_ether.h =================================================================== --- sys/netinet/if_ether.h +++ sys/netinet/if_ether.h @@ -114,6 +114,8 @@ struct ifaddr; +int arpresolve_addr(struct ifnet *ifp, int flags, + const struct sockaddr *dst, char *desten, uint32_t *pflags); int arpresolve(struct ifnet *ifp, int is_gw, struct mbuf *m, const struct sockaddr *dst, u_char *desten, uint32_t *pflags); void arprequest(struct ifnet *, const struct in_addr *, Index: sys/netinet/if_ether.c =================================================================== --- sys/netinet/if_ether.c +++ sys/netinet/if_ether.c @@ -282,6 +282,37 @@ } /* + * Stores link-layer header for @ifp in format suitable for if_output() + * into buffer @buf. Resulting header length is stored in @bufsize. + * + * Returns 0 on success. + */ +static int +arp_fillheader(struct ifnet *ifp, struct arphdr *ah, int bcast, u_char *buf, + size_t *bufsize) +{ + struct if_encap_req ereq; + int error; + + bzero(buf, *bufsize); + bzero(&ereq, sizeof(ereq)); + ereq.buf = buf; + ereq.bufsize = *bufsize; + ereq.rtype = IFENCAP_LL; + ereq.family = AF_ARP; + ereq.lladdr = ar_tha(ah); + ereq.hdata = (u_char *)ah; + if (bcast) + ereq.flags = IFENCAP_FLAG_BROADCAST; + error = ifp->if_requestencap(ifp, &ereq); + if (error == 0) + *bufsize = ereq.bufsize; + + return (error); +} + + +/* * Broadcast an ARP request. Caller specifies: * - arp header source ip address * - arp header target ip address @@ -295,6 +326,10 @@ struct arphdr *ah; struct sockaddr sa; u_char *carpaddr = NULL; + uint8_t linkhdr[LLE_MAX_LINKHDR]; + size_t linkhdrsize; + struct route ro; + int error; if (sip == NULL) { /* @@ -350,12 +385,28 @@ bcopy(tip, ar_tpa(ah), ah->ar_pln); sa.sa_family = AF_ARP; sa.sa_len = 2; + + /* Calculate link header for sending frame */ + bzero(&ro, sizeof(ro)); + linkhdrsize = sizeof(linkhdr); + error = arp_fillheader(ifp, ah, 1, linkhdr, &linkhdrsize); + if (error != 0 && error != EAFNOSUPPORT) { + ARP_LOG(LOG_ERR, "Failed to calculate ARP header on %s: %d\n", + if_name(ifp), error); + return; + } + + ro.ro_prepend = linkhdr; + ro.ro_plen = linkhdrsize; + ro.ro_flags = 0; + m->m_flags |= M_BCAST; m_clrprotoflags(m); /* Avoid confusing lower layers. */ - (*ifp->if_output)(ifp, m, &sa, NULL); + (*ifp->if_output)(ifp, m, &sa, &ro); ARPSTAT_INC(txrequests); } + /* * Resolve an IP address into an ethernet address - heavy version. * Used internally by arpresolve(). @@ -368,18 +419,20 @@ * Note that m_freem() handles NULL. */ static int -arpresolve_full(struct ifnet *ifp, int is_gw, int create, struct mbuf *m, +arpresolve_full(struct ifnet *ifp, int is_gw, int flags, struct mbuf *m, const struct sockaddr *dst, u_char *desten, uint32_t *pflags) { struct llentry *la = NULL, *la_tmp; struct mbuf *curr = NULL; struct mbuf *next = NULL; int error, renew; + char *lladdr; + int ll_len; if (pflags != NULL) *pflags = 0; - if (create == 0) { + if ((flags & LLE_CREATE) == 0) { IF_AFDATA_RLOCK(ifp); la = lla_lookup(LLTABLE(ifp), LLE_EXCLUSIVE, dst); IF_AFDATA_RUNLOCK(ifp); @@ -413,7 +466,14 @@ if ((la->la_flags & LLE_VALID) && ((la->la_flags & LLE_STATIC) || la->la_expire > time_uptime)) { - bcopy(&la->ll_addr, desten, ifp->if_addrlen); + if (flags & LLE_ADDRONLY) { + lladdr = la->ll_addr; + ll_len = ifp->if_addrlen; + } else { + lladdr = la->r_linkdata; + ll_len = la->r_hdrlen; + } + bcopy(lladdr, desten, ll_len); /* Check if we have feedback request from arptimer() */ if (la->r_skip_req != 0) { @@ -485,15 +545,31 @@ /* * Resolve an IP address into an ethernet address. + */ +int +arpresolve_addr(struct ifnet *ifp, int flags, const struct sockaddr *dst, + char *desten, uint32_t *pflags) +{ + int error; + + flags |= LLE_ADDRONLY; + error = arpresolve_full(ifp, 0, flags, NULL, dst, desten, pflags); + return (error); +} + + +/* + * Lookups link header based on an IP address. * On input: * ifp is the interface we use * is_gw != 0 if @dst represents gateway to some destination * m is the mbuf. May be NULL if we don't have a packet. * dst is the next hop, - * desten is the storage to put LL address. + * desten is the storage to put LL header. * flags returns subset of lle flags: LLE_VALID | LLE_IFADDR * - * On success, desten and flags are filled in and the function returns 0; + * On success, full/partial link header and flags are filled in and + * the function returns 0. * If the packet must be held pending resolution, we return EWOULDBLOCK * On other errors, we return the corresponding error code. * Note that m_freem() handles NULL. @@ -525,7 +601,7 @@ la = lla_lookup(LLTABLE(ifp), LLE_UNLOCKED, dst); if (la != NULL && (la->r_flags & RLLE_VALID) != 0) { /* Entry found, let's copy lle info */ - bcopy(&la->ll_addr, desten, ifp->if_addrlen); + bcopy(la->r_linkdata, desten, la->r_hdrlen); if (pflags != NULL) *pflags = LLE_VALID | (la->r_flags & RLLE_IFADDR); /* Check if we have feedback request from arptimer() */ @@ -539,7 +615,8 @@ } IF_AFDATA_RUNLOCK(ifp); - return (arpresolve_full(ifp, is_gw, 1, m, dst, desten, pflags)); + return (arpresolve_full(ifp, is_gw, la == NULL ? LLE_CREATE : 0, m, dst, + desten, pflags)); } /* @@ -683,6 +760,11 @@ struct sockaddr_in sin; struct sockaddr *dst; struct nhop4_basic nh4; + uint8_t linkhdr[LLE_MAX_LINKHDR]; + struct route ro; + size_t linkhdrsize; + int lladdr_off; + int error; sin.sin_len = sizeof(struct sockaddr_in); sin.sin_family = AF_INET; @@ -850,8 +932,14 @@ else if (itaddr.s_addr == myaddr.s_addr) { /* * Request/reply to our address, but no lle exists yet. - * Try to create new llentry. + * Calculate full link prepend to use in lle. */ + linkhdrsize = sizeof(linkhdr); + if (lltable_calc_llheader(ifp, AF_INET, ar_sha(ah), linkhdr, + &linkhdrsize, &lladdr_off) != 0) + goto reply; + + /* Allocate new entry */ la = lltable_alloc_entry(LLTABLE(ifp), 0, dst); if (la == NULL) { @@ -863,7 +951,8 @@ */ goto reply; } - lltable_set_entry_addr(ifp, la, ar_sha(ah)); + lltable_set_entry_addr(ifp, la, linkhdr, linkhdrsize, + lladdr_off); IF_AFDATA_WLOCK(ifp); LLE_WLOCK(la); @@ -921,7 +1010,7 @@ if ((lle != NULL) && (lle->la_flags & LLE_PUB)) { (void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln); - (void)memcpy(ar_sha(ah), &lle->ll_addr, ah->ar_hln); + (void)memcpy(ar_sha(ah), lle->ll_addr, ah->ar_hln); LLE_RUNLOCK(lle); } else { @@ -991,8 +1080,29 @@ m->m_pkthdr.rcvif = NULL; sa.sa_family = AF_ARP; sa.sa_len = 2; + + /* Calculate link header for sending frame */ + bzero(&ro, sizeof(ro)); + linkhdrsize = sizeof(linkhdr); + error = arp_fillheader(ifp, ah, 0, linkhdr, &linkhdrsize); + + /* + * arp_fillheader() may fail due to lack of support inside encap request + * routing. This is not necessary an error, AF_ARP can/should be handled + * by if_output(). + */ + if (error != 0 && error != EAFNOSUPPORT) { + ARP_LOG(LOG_ERR, "Failed to calculate ARP header on %s: %d\n", + if_name(ifp), error); + return; + } + + ro.ro_prepend = linkhdr; + ro.ro_plen = linkhdrsize; + ro.ro_flags = 0; + m_clrprotoflags(m); /* Avoid confusing lower layers. */ - (*ifp->if_output)(ifp, m, &sa, NULL); + (*ifp->if_output)(ifp, m, &sa, &ro); ARPSTAT_INC(txreplies); return; @@ -1011,6 +1121,9 @@ { struct sockaddr sa; struct mbuf *m_hold, *m_hold_next; + uint8_t linkhdr[LLE_MAX_LINKHDR]; + size_t linkhdrsize; + int lladdr_off; LLE_WLOCK_ASSERT(la); @@ -1027,7 +1140,7 @@ return; } if ((la->la_flags & LLE_VALID) && - bcmp(ar_sha(ah), &la->ll_addr, ifp->if_addrlen)) { + bcmp(ar_sha(ah), la->ll_addr, ifp->if_addrlen)) { if (la->la_flags & LLE_STATIC) { LLE_WUNLOCK(la); if (log_arp_permanent_modify) @@ -1050,31 +1163,19 @@ } } + /* Calculate full link prepend to use in lle */ + linkhdrsize = sizeof(linkhdr); + if (lltable_calc_llheader(ifp, AF_INET, ar_sha(ah), linkhdr, + &linkhdrsize, &lladdr_off) != 0) + return; + /* Check if something has changed */ - if (memcmp(&la->ll_addr, ar_sha(ah), ifp->if_addrlen) != 0 || + if (memcmp(la->r_linkdata, linkhdr, linkhdrsize) != 0 || (la->la_flags & LLE_VALID) == 0) { - /* Perform real LLE update */ - /* use afdata WLOCK to update fields */ - LLE_ADDREF(la); - LLE_WUNLOCK(la); - IF_AFDATA_WLOCK(ifp); - LLE_WLOCK(la); - - /* - * Since we droppped LLE lock, other thread might have deleted - * this lle. Check and return - */ - if ((la->la_flags & LLE_DELETED) != 0) { - IF_AFDATA_WUNLOCK(ifp); - LLE_FREE_LOCKED(la); + /* Try to perform LLE update */ + if (lltable_try_set_entry_addr(ifp, la, linkhdr, linkhdrsize, + lladdr_off) == 0) return; - } - - /* Update data */ - lltable_set_entry_addr(ifp, la, ar_sha(ah)); - - IF_AFDATA_WUNLOCK(ifp); - LLE_REMREF(la); /* Clear fast path feedback request if set */ la->r_skip_req = 0; @@ -1215,10 +1316,12 @@ /* * A handler for interface link layer address change event. */ -static __noinline void +static void arp_iflladdr(void *arg __unused, struct ifnet *ifp) { + lltable_update_ifaddr(LLTABLE(ifp)); + if ((ifp->if_flags & IFF_UP) != 0) arp_handle_ifllchange(ifp); } @@ -1231,5 +1334,8 @@ if (IS_DEFAULT_VNET(curvnet)) iflladdr_tag = EVENTHANDLER_REGISTER(iflladdr_event, arp_iflladdr, NULL, EVENTHANDLER_PRI_ANY); + if (IS_DEFAULT_VNET(curvnet)) + iflladdr_tag = EVENTHANDLER_REGISTER(iflladdr_event, + arp_iflladdr, NULL, EVENTHANDLER_PRI_ANY); } SYSINIT(arp, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, arp_init, 0); Index: sys/netinet/in.c =================================================================== --- sys/netinet/in.c +++ sys/netinet/in.c @@ -1240,6 +1240,9 @@ const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr; struct ifnet *ifp = llt->llt_ifp; struct llentry *lle; + char linkhdr[LLE_MAX_LINKHDR]; + size_t linkhdrsize; + int lladdr_off; KASSERT(l3addr->sa_family == AF_INET, ("sin_family %d", l3addr->sa_family)); @@ -1262,7 +1265,12 @@ if (flags & LLE_STATIC) lle->r_flags |= RLLE_VALID; if ((flags & LLE_IFADDR) == LLE_IFADDR) { - lltable_set_entry_addr(ifp, lle, IF_LLADDR(ifp)); + linkhdrsize = LLE_MAX_LINKHDR; + if (lltable_calc_llheader(ifp, AF_INET, IF_LLADDR(ifp), + linkhdr, &linkhdrsize, &lladdr_off) != 0) + return (NULL); + lltable_set_entry_addr(ifp, lle, linkhdr, linkhdrsize, + lladdr_off); lle->la_flags |= LLE_STATIC; lle->r_flags |= (RLLE_VALID | RLLE_IFADDR); } @@ -1349,7 +1357,7 @@ sdl->sdl_type = ifp->if_type; if ((lle->la_flags & LLE_VALID) == LLE_VALID) { sdl->sdl_alen = ifp->if_addrlen; - bcopy(&lle->ll_addr, LLADDR(sdl), ifp->if_addrlen); + bcopy(lle->ll_addr, LLADDR(sdl), ifp->if_addrlen); } else { sdl->sdl_alen = 0; bzero(LLADDR(sdl), ifp->if_addrlen); Index: sys/netinet/ip_output.c =================================================================== --- sys/netinet/ip_output.c +++ sys/netinet/ip_output.c @@ -567,7 +567,7 @@ RO_RTFREE(ro); if (have_ia_ref) ifa_free(&ia->ia_ifa); - ro->ro_lle = NULL; + ro->ro_prepend = NULL; rte = NULL; gw = dst; ip = mtod(m, struct ip *); Index: sys/netinet/toecore.c =================================================================== --- sys/netinet/toecore.c +++ sys/netinet/toecore.c @@ -428,7 +428,7 @@ KASSERT(lle->la_flags & LLE_VALID, ("%s: %p resolved but not valid?", __func__, lle)); - lladdr = (uint8_t *)&lle->ll_addr; + lladdr = (uint8_t *)lle->ll_addr; #ifdef VLAN_TAG VLAN_TAG(ifp, &vtag); #endif Index: sys/netinet6/icmp6.c =================================================================== --- sys/netinet6/icmp6.c +++ sys/netinet6/icmp6.c @@ -2632,7 +2632,7 @@ nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; nd_opt->nd_opt_len = len >> 3; lladdr = (char *)(nd_opt + 1); - bcopy(&ln->ll_addr, lladdr, ifp->if_addrlen); + bcopy(ln->ll_addr, lladdr, ifp->if_addrlen); p += len; } } Index: sys/netinet6/in6.h =================================================================== --- sys/netinet6/in6.h +++ sys/netinet6/in6.h @@ -375,9 +375,9 @@ #if __BSD_VISIBLE struct route_in6 { struct rtentry *ro_rt; - struct llentry *ro_lle; - struct in6_addr *ro_ia6; - int ro_flags; + char *ro_prepend; + uint16_t ro_plen; + uint16_t ro_flags; struct sockaddr_in6 ro_dst; }; #endif Index: sys/netinet6/in6.c =================================================================== --- sys/netinet6/in6.c +++ sys/netinet6/in6.c @@ -2245,6 +2245,9 @@ const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)l3addr; struct ifnet *ifp = llt->llt_ifp; struct llentry *lle; + char linkhdr[LLE_MAX_LINKHDR]; + size_t linkhdrsize; + int lladdr_off; KASSERT(l3addr->sa_family == AF_INET6, ("sin_family %d", l3addr->sa_family)); @@ -2265,7 +2268,12 @@ } lle->la_flags = flags; if ((flags & LLE_IFADDR) == LLE_IFADDR) { - lltable_set_entry_addr(ifp, lle, IF_LLADDR(ifp)); + linkhdrsize = LLE_MAX_LINKHDR; + if (lltable_calc_llheader(ifp, AF_INET6, IF_LLADDR(ifp), + linkhdr, &linkhdrsize, &lladdr_off) != 0) + return (NULL); + lltable_set_entry_addr(ifp, lle, linkhdr, linkhdrsize, + lladdr_off); lle->la_flags |= LLE_STATIC; } Index: sys/netinet6/nd6.h =================================================================== --- sys/netinet6/nd6.h +++ sys/netinet6/nd6.h @@ -410,6 +410,8 @@ void nd6_llinfo_setstate(struct llentry *lle, int newstate); void nd6_timer(void *); void nd6_purge(struct ifnet *); +int nd6_resolve_addr(struct ifnet *ifp, int flags, const struct sockaddr *dst, + char *desten, uint32_t *pflags); int nd6_resolve(struct ifnet *, int, struct mbuf *, const struct sockaddr *, u_char *, uint32_t *); int nd6_ioctl(u_long, caddr_t, struct ifnet *); Index: sys/netinet6/nd6.c =================================================================== --- sys/netinet6/nd6.c +++ sys/netinet6/nd6.c @@ -111,7 +111,7 @@ VNET_DEFINE(int, nd6_debug) = 0; #endif -static eventhandler_tag lle_event_eh; +static eventhandler_tag lle_event_eh, iflladdr_event_eh; /* for debugging? */ #if 0 @@ -137,7 +137,7 @@ static void nd6_llinfo_settimer_locked(struct llentry *, long); static void clear_llinfo_pqueue(struct llentry *); static void nd6_rtrequest(int, struct rtentry *, struct rt_addrinfo *); -static int nd6_resolve_slow(struct ifnet *, struct mbuf *, +static int nd6_resolve_slow(struct ifnet *, int, struct mbuf *, const struct sockaddr_in6 *, u_char *, uint32_t *); static int nd6_need_cache(struct ifnet *); @@ -188,7 +188,7 @@ gw.sdl_index = ifp->if_index; gw.sdl_type = ifp->if_type; if (evt == LLENTRY_RESOLVED) - bcopy(&lle->ll_addr, gw.sdl_data, ifp->if_addrlen); + bcopy(lle->ll_addr, gw.sdl_data, ifp->if_addrlen); rtinfo.rti_info[RTAX_DST] = (struct sockaddr *)&dst; rtinfo.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&gw; rtinfo.rti_addrs = RTA_DST | RTA_GATEWAY; @@ -196,6 +196,16 @@ type == RTM_ADD ? RTF_UP: 0), 0, RT_DEFAULT_FIB); } +/* + * A handler for interface link layer address change event. + */ +static void +nd6_iflladdr(void *arg __unused, struct ifnet *ifp) +{ + + lltable_update_ifaddr(LLTABLE6(ifp)); +} + void nd6_init(void) { @@ -211,9 +221,12 @@ nd6_slowtimo, curvnet); nd6_dad_init(); - if (IS_DEFAULT_VNET(curvnet)) + if (IS_DEFAULT_VNET(curvnet)) { lle_event_eh = EVENTHANDLER_REGISTER(lle_event, nd6_lle_event, NULL, EVENTHANDLER_PRI_ANY); + iflladdr_event_eh = EVENTHANDLER_REGISTER(iflladdr_event, + nd6_iflladdr, NULL, EVENTHANDLER_PRI_ANY); + } } #ifdef VIMAGE @@ -223,8 +236,10 @@ callout_drain(&V_nd6_slowtimo_ch); callout_drain(&V_nd6_timer_ch); - if (IS_DEFAULT_VNET(curvnet)) + if (IS_DEFAULT_VNET(curvnet)) { EVENTHANDLER_DEREGISTER(lle_event, lle_event_eh); + EVENTHANDLER_DEREGISTER(iflladdr_event, iflladdr_event_eh); + } } #endif @@ -1844,6 +1859,9 @@ uint16_t router = 0; struct sockaddr_in6 sin6; struct mbuf *chain = NULL; + u_char linkhdr[LLE_MAX_LINKHDR]; + size_t linkhdrsize; + int lladdr_off; IF_AFDATA_UNLOCK_ASSERT(ifp); @@ -1878,8 +1896,15 @@ * Since we already know all the data for the new entry, * fill it before insertion. */ - if (lladdr != NULL) - lltable_set_entry_addr(ifp, ln, lladdr); + if (lladdr != NULL) { + linkhdrsize = sizeof(linkhdr); + if (lltable_calc_llheader(ifp, AF_INET6, lladdr, + linkhdr, &linkhdrsize, &lladdr_off) != 0) + return; + lltable_set_entry_addr(ifp, ln, linkhdr, linkhdrsize, + lladdr_off); + } + IF_AFDATA_WLOCK(ifp); LLE_WLOCK(ln); /* Prefer any existing lle over newly-created one */ @@ -1911,7 +1936,7 @@ olladdr = (ln->la_flags & LLE_VALID) ? 1 : 0; if (olladdr && lladdr) { - llchange = bcmp(lladdr, &ln->ll_addr, + llchange = bcmp(lladdr, ln->ll_addr, ifp->if_addrlen); } else if (!olladdr && lladdr) llchange = 1; @@ -1937,7 +1962,13 @@ * Record source link-layer address * XXX is it dependent to ifp->if_type? */ - if (lltable_try_set_entry_addr(ifp, ln, lladdr) == 0) { + linkhdrsize = sizeof(linkhdr); + if (lltable_calc_llheader(ifp, AF_INET6, lladdr, + linkhdr, &linkhdrsize, &lladdr_off) != 0) + return; + + if (lltable_try_set_entry_addr(ifp, ln, linkhdr, linkhdrsize, + lladdr_off) == 0) { /* Entry was deleted */ return; } @@ -2093,8 +2124,8 @@ } /* - * Do L2 address resolution for @sa_dst address. Stores found - * address in @desten buffer. Copy of lle ln_flags can be also + * Lookup link headerfor @sa_dst address. Stores found + * data in @desten buffer. Copy of lle ln_flags can be also * saved in @pflags if @pflags is non-NULL. * * If destination LLE does not exists or lle state modification @@ -2144,7 +2175,7 @@ ln = nd6_lookup(&dst6->sin6_addr, LLE_UNLOCKED, ifp); if (ln != NULL && (ln->r_flags & RLLE_VALID) != 0) { /* Entry found, let's copy lle info */ - bcopy(&ln->ll_addr, desten, ifp->if_addrlen); + bcopy(ln->r_linkdata, desten, ln->r_hdrlen); if (pflags != NULL) *pflags = LLE_VALID | (ln->r_flags & RLLE_IFADDR); /* Check if we have feedback request from nd6 timer */ @@ -2159,7 +2190,7 @@ } IF_AFDATA_RUNLOCK(ifp); - return (nd6_resolve_slow(ifp, m, dst6, desten, pflags)); + return (nd6_resolve_slow(ifp, 0, m, dst6, desten, pflags)); } @@ -2175,12 +2206,13 @@ * Set noinline to be dtrace-friendly */ static __noinline int -nd6_resolve_slow(struct ifnet *ifp, struct mbuf *m, +nd6_resolve_slow(struct ifnet *ifp, int flags, struct mbuf *m, const struct sockaddr_in6 *dst, u_char *desten, uint32_t *pflags) { struct llentry *lle = NULL, *lle_tmp; struct in6_addr *psrc, src; - int send_ns; + int send_ns, ll_len; + char *lladdr; /* * Address resolution or Neighbor Unreachability Detection @@ -2252,7 +2284,14 @@ * send the packet. */ if (lle->ln_state > ND6_LLINFO_INCOMPLETE) { - bcopy(&lle->ll_addr, desten, ifp->if_addrlen); + if (flags & LLE_ADDRONLY) { + lladdr = lle->ll_addr; + ll_len = ifp->if_addrlen; + } else { + lladdr = lle->r_linkdata; + ll_len = lle->r_hdrlen; + } + bcopy(lladdr, desten, ll_len); if (pflags != NULL) *pflags = lle->la_flags; LLE_WUNLOCK(lle); @@ -2312,6 +2351,27 @@ return (EWOULDBLOCK); } +/* + * Do L2 address resolution for @sa_dst address. Stores found + * address in @desten buffer. Copy of lle ln_flags can be also + * saved in @pflags if @pflags is non-NULL. + * + * Return values: + * - 0 on success (address copied to buffer). + * - EWOULDBLOCK (no local error, but address is still unresolved) + * - other errors (alloc failure, etc) + */ +int +nd6_resolve_addr(struct ifnet *ifp, int flags, const struct sockaddr *dst, + char *desten, uint32_t *pflags) +{ + int error; + + flags |= LLE_ADDRONLY; + error = nd6_resolve_slow(ifp, flags, NULL, + (const struct sockaddr_in6 *)dst, desten, pflags); + return (error); +} int nd6_flush_holdchain(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *chain, Index: sys/netinet6/nd6_nbr.c =================================================================== --- sys/netinet6/nd6_nbr.c +++ sys/netinet6/nd6_nbr.c @@ -643,6 +643,9 @@ union nd_opts ndopts; struct mbuf *chain = NULL; struct sockaddr_in6 sin6; + u_char linkhdr[LLE_MAX_LINKHDR]; + size_t linkhdrsize; + int lladdr_off; char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; if (ip6->ip6_hlim != 255) { @@ -765,7 +768,13 @@ /* * Record link-layer address, and update the state. */ - if (lltable_try_set_entry_addr(ifp, ln, lladdr) == 0) { + linkhdrsize = sizeof(linkhdr); + if (lltable_calc_llheader(ifp, AF_INET6, lladdr, + linkhdr, &linkhdrsize, &lladdr_off) != 0) + return; + + if (lltable_try_set_entry_addr(ifp, ln, linkhdr, linkhdrsize, + lladdr_off) == 0) { ln = NULL; goto freeit; } @@ -792,7 +801,7 @@ llchange = 0; else { if (ln->la_flags & LLE_VALID) { - if (bcmp(lladdr, &ln->ll_addr, ifp->if_addrlen)) + if (bcmp(lladdr, ln->ll_addr, ifp->if_addrlen)) llchange = 1; else llchange = 0; @@ -834,9 +843,12 @@ * Update link-local address, if any. */ if (lladdr != NULL) { - int ret; - ret = lltable_try_set_entry_addr(ifp, ln,lladdr); - if (ret == 0) { + linkhdrsize = sizeof(linkhdr); + if (lltable_calc_llheader(ifp, AF_INET6, lladdr, + linkhdr, &linkhdrsize, &lladdr_off) != 0) + goto freeit; + if (lltable_try_set_entry_addr(ifp, ln, linkhdr, + linkhdrsize, lladdr_off) == 0) { ln = NULL; goto freeit; } Index: sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c =================================================================== --- sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1257,15 +1257,15 @@ const struct sockaddr *dst, struct route *ro) { u_char edst[INFINIBAND_ALEN]; +#if defined(INET) || defined(INET6) struct llentry *lle = NULL; +#endif struct rtentry *rt0 = NULL; struct ipoib_header *eh; int error = 0, is_gw = 0; short type; if (ro != NULL) { - if (!(m->m_flags & (M_BCAST | M_MCAST))) - lle = ro->ro_lle; rt0 = ro->ro_rt; if (rt0 != NULL && (rt0->rt_flags & RTF_GATEWAY) != 0) is_gw = 1; @@ -1291,7 +1291,7 @@ #ifdef INET case AF_INET: if (lle != NULL && (lle->la_flags & LLE_VALID)) - memcpy(edst, &lle->ll_addr.mac8, sizeof(edst)); + memcpy(edst, lle->ll_addr, sizeof(edst)); else if (m->m_flags & M_MCAST) ip_ib_mc_map(((struct sockaddr_in *)dst)->sin_addr.s_addr, ifp->if_broadcastaddr, edst); else @@ -1329,7 +1329,7 @@ #ifdef INET6 case AF_INET6: if (lle != NULL && (lle->la_flags & LLE_VALID)) - memcpy(edst, &lle->ll_addr.mac8, sizeof(edst)); + memcpy(edst, lle->ll_addr, sizeof(edst)); else if (m->m_flags & M_MCAST) ipv6_ib_mc_map(&((struct sockaddr_in6 *)dst)->sin6_addr, ifp->if_broadcastaddr, edst); else