Page MenuHomeFreeBSD

D4102.id10015.diff
No OneTemporary

D4102.id10015.diff

Index: sys/dev/cxgb/ulp/tom/cxgb_l2t.c
===================================================================
--- sys/dev/cxgb/ulp/tom/cxgb_l2t.c
+++ sys/dev/cxgb/ulp/tom/cxgb_l2t.c
@@ -215,7 +215,7 @@
struct tom_data *td = sc->tom_softc;
struct toedev *tod = &td->tod;
struct sockaddr_in sin = {0};
- uint8_t dmac[ETHER_ADDR_LEN];
+ uint8_t dmac[ETHER_HDR_LEN];
uint16_t vtag = EVL_VLID_MASK;
int rc;
Index: sys/dev/cxgbe/tom/t4_tom_l2t.c
===================================================================
--- sys/dev/cxgbe/tom/t4_tom_l2t.c
+++ sys/dev/cxgbe/tom/t4_tom_l2t.c
@@ -233,7 +233,7 @@
struct sockaddr_in sin = {0};
struct sockaddr_in6 sin6 = {0};
struct sockaddr *sa;
- uint8_t dmac[ETHER_ADDR_LEN];
+ uint8_t dmac[ETHER_HDR_LEN];
uint16_t vtag = VLAN_NONE;
int rc;
Index: sys/net/bpf.c
===================================================================
--- sys/net/bpf.c
+++ sys/net/bpf.c
@@ -69,6 +69,7 @@
#include <net/if.h>
#include <net/if_var.h>
+#include <net/if_dl.h>
#include <net/bpf.h>
#include <net/bpf_buffer.h>
#ifdef BPF_JITTER
@@ -76,6 +77,7 @@
#endif
#include <net/bpf_zerocopy.h>
#include <net/bpfdesc.h>
+#include <net/route.h>
#include <net/vnet.h>
#include <netinet/in.h>
@@ -164,7 +166,7 @@
static void bpf_detachd_locked(struct bpf_d *);
static void bpf_freed(struct bpf_d *);
static int bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **,
- struct sockaddr *, int *, struct bpf_insn *);
+ struct sockaddr *, int *, struct bpf_d *);
static int bpf_setif(struct bpf_d *, struct ifreq *);
static void bpf_timed_out(void *);
static __inline void
@@ -454,7 +456,7 @@
*/
static int
bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp,
- struct sockaddr *sockp, int *hdrlen, struct bpf_insn *wfilter)
+ struct sockaddr *sockp, int *hdrlen, struct bpf_d *d)
{
const struct ieee80211_bpf_params *p;
struct ether_header *eh;
@@ -549,7 +551,7 @@
if (error)
goto bad;
- slen = bpf_filter(wfilter, mtod(m, u_char *), len, len);
+ slen = bpf_filter(d->bd_wfilter, mtod(m, u_char *), len, len);
if (slen == 0) {
error = EPERM;
goto bad;
@@ -566,6 +568,10 @@
else
m->m_flags |= M_MCAST;
}
+ if (d->bd_hdrcmplt == 0) {
+ memcpy(eh->ether_shost, IF_LLADDR(ifp),
+ sizeof(eh->ether_shost));
+ }
break;
}
@@ -1088,6 +1094,7 @@
struct ifnet *ifp;
struct mbuf *m, *mc;
struct sockaddr dst;
+ struct route ro;
int error, hlen;
error = devfs_get_cdevpriv((void **)&d);
@@ -1119,7 +1126,7 @@
hlen = 0;
/* XXX: bpf_movein() can sleep */
error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp,
- &m, &dst, &hlen, d->bd_wfilter);
+ &m, &dst, &hlen, d);
if (error) {
d->bd_wdcount++;
return (error);
@@ -1151,7 +1158,14 @@
BPFD_UNLOCK(d);
#endif
- error = (*ifp->if_output)(ifp, m, &dst, NULL);
+ bzero(&ro, sizeof(ro));
+ if (hlen != 0) {
+ ro.ro_prepend = (u_char *)&dst.sa_data;
+ ro.ro_plen = hlen;
+ ro.ro_flags = RT_HAS_HEADER;
+ }
+
+ error = (*ifp->if_output)(ifp, m, &dst, &ro);
if (error)
d->bd_wdcount++;
Index: sys/net/ethernet.h
===================================================================
--- sys/net/ethernet.h
+++ sys/net/ethernet.h
@@ -387,6 +387,7 @@
struct route;
struct sockaddr;
struct bpf_if;
+struct if_encap_req;
extern uint32_t ether_crc32_le(const uint8_t *, size_t);
extern uint32_t ether_crc32_be(const uint8_t *, size_t);
@@ -397,6 +398,7 @@
extern int ether_output(struct ifnet *, struct mbuf *,
const struct sockaddr *, struct route *);
extern int ether_output_frame(struct ifnet *, struct mbuf *);
+extern int ether_requestencap(struct ifnet *, struct if_encap_req *);
extern char *ether_sprintf(const u_int8_t *);
void ether_vlan_mtap(struct bpf_if *, struct mbuf *,
void *, u_int);
Index: sys/net/flowtable.c
===================================================================
--- sys/net/flowtable.c
+++ sys/net/flowtable.c
@@ -665,6 +665,7 @@
flowtable_lookup(sa_family_t sa, struct mbuf *m, struct route *ro)
{
struct flentry *fle;
+ struct llentry *lle;
if (V_flowtable_enable == 0)
return (ENXIO);
@@ -693,8 +694,14 @@
}
ro->ro_rt = fle->f_rt;
- ro->ro_lle = fle->f_lle;
ro->ro_flags |= RT_NORTREF;
+ lle = fle->f_lle;
+ if (lle != NULL && (lle->la_flags & LLE_VALID)) {
+ ro->ro_prepend = lle->r_linkdata;
+ ro->ro_plen = lle->r_hdrlen;
+ ro->ro_flags |= RT_MAY_LOOP;
+ ro->ro_flags |= (!!(lle->la_flags & LLE_IFADDR)) << RT_L2_ME_BIT;
+ }
return (0);
}
Index: sys/net/if.c
===================================================================
--- sys/net/if.c
+++ sys/net/if.c
@@ -669,6 +669,9 @@
if (ifp->if_input == NULL)
ifp->if_input = if_input_default;
+ if (ifp->if_requestencap == NULL)
+ ifp->if_requestencap = if_requestencap_default;
+
if (!vmove) {
#ifdef MAC
mac_ifnet_create(ifp);
Index: sys/net/if_ethersubr.c
===================================================================
--- sys/net/if_ethersubr.c
+++ sys/net/if_ethersubr.c
@@ -136,138 +136,196 @@
}
/*
- * Ethernet output routine.
- * Encapsulate a packet of type family for the local net.
- * Use trailer local net encapsulation if enough data in first
- * packet leaves a multiple of 512 bytes of data in remainder.
+ * Handle link-layer encapsulation requests.
*/
int
-ether_output(struct ifnet *ifp, struct mbuf *m,
- const struct sockaddr *dst, struct route *ro)
+ether_requestencap(struct ifnet *ifp, struct if_encap_req *req)
{
- short type;
- int error = 0, hdrcmplt = 0;
- u_char edst[ETHER_ADDR_LEN];
- struct llentry *lle = NULL;
- struct rtentry *rt0 = NULL;
struct ether_header *eh;
- struct pf_mtag *t;
- int loop_copy = 1;
- int hlen; /* link layer header length */
- int is_gw = 0;
- uint32_t pflags = 0;
+ struct arphdr *ah;
+ uint16_t etype;
+ const u_char *lladdr;
- if (ro != NULL) {
- if (!(m->m_flags & (M_BCAST | M_MCAST))) {
- lle = ro->ro_lle;
- if (lle != NULL)
- pflags = lle->la_flags;
- }
- rt0 = ro->ro_rt;
- if (rt0 != NULL && (rt0->rt_flags & RTF_GATEWAY) != 0)
- is_gw = 1;
- }
-#ifdef MAC
- error = mac_ifnet_check_transmit(ifp, m);
- if (error)
- senderr(error);
-#endif
+ if (req->rtype != IFENCAP_LL)
+ return (EOPNOTSUPP);
- M_PROFILE(m);
- if (ifp->if_flags & IFF_MONITOR)
- senderr(ENETDOWN);
- if (!((ifp->if_flags & IFF_UP) &&
- (ifp->if_drv_flags & IFF_DRV_RUNNING)))
- senderr(ENETDOWN);
+ if (req->bufsize < ETHER_HDR_LEN)
+ return (ENOMEM);
- hlen = ETHER_HDR_LEN;
- switch (dst->sa_family) {
-#ifdef INET
+ eh = (struct ether_header *)req->buf;
+ lladdr = req->lladdr;
+ req->lladdr_off = 0;
+
+ switch (req->family) {
case AF_INET:
- if (lle != NULL && (pflags & LLE_VALID) != 0)
- memcpy(edst, &lle->ll_addr.mac16, sizeof(edst));
- else
- error = arpresolve(ifp, is_gw, m, dst, edst, &pflags);
- if (error)
- return (error == EWOULDBLOCK ? 0 : error);
- type = htons(ETHERTYPE_IP);
+ etype = htons(ETHERTYPE_IP);
+ break;
+ case AF_INET6:
+ etype = htons(ETHERTYPE_IPV6);
break;
case AF_ARP:
- {
- struct arphdr *ah;
- ah = mtod(m, struct arphdr *);
+ ah = (struct arphdr *)req->hdata;
ah->ar_hrd = htons(ARPHRD_ETHER);
- loop_copy = 0; /* if this is for us, don't do it */
-
switch(ntohs(ah->ar_op)) {
case ARPOP_REVREQUEST:
case ARPOP_REVREPLY:
- type = htons(ETHERTYPE_REVARP);
+ etype = htons(ETHERTYPE_REVARP);
break;
case ARPOP_REQUEST:
case ARPOP_REPLY:
default:
- type = htons(ETHERTYPE_ARP);
+ etype = htons(ETHERTYPE_ARP);
break;
}
- if (m->m_flags & M_BCAST)
- bcopy(ifp->if_broadcastaddr, edst, ETHER_ADDR_LEN);
- else
- bcopy(ar_tha(ah), edst, ETHER_ADDR_LEN);
-
+ if (req->flags & IFENCAP_FLAG_BROADCAST)
+ lladdr = ifp->if_broadcastaddr;
+ break;
+ default:
+ return (EAFNOSUPPORT);
}
- break;
+
+ memcpy(&eh->ether_type, &etype, sizeof(eh->ether_type));
+ memcpy(eh->ether_dhost, lladdr, ETHER_ADDR_LEN);
+ memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
+ req->bufsize = sizeof(struct ether_header);
+
+ return (0);
+}
+
+
+static inline int
+ether_resolve_addr(struct ifnet *ifp, struct mbuf *m,
+ const struct sockaddr *dst, struct route *ro, u_char *phdr,
+ uint32_t *pflags)
+{
+ struct ether_header *eh;
+ struct rtentry *rt;
+ uint32_t lleflags = 0;
+ uint16_t etype;
+ int error = 0;
+
+ eh = (struct ether_header *)phdr;
+
+ switch (dst->sa_family) {
+#ifdef INET
+ case AF_INET:
+ if ((m->m_flags & (M_BCAST | M_MCAST)) == 0)
+ error = arpresolve(ifp, 0, m, dst, phdr, &lleflags);
+ else {
+ if (m->m_flags & M_BCAST)
+ memcpy(eh->ether_dhost, ifp->if_broadcastaddr,
+ ETHER_ADDR_LEN);
+ else {
+ const struct in_addr *a;
+ a = &(((const struct sockaddr_in *)dst)->sin_addr);
+ ETHER_MAP_IP_MULTICAST(a, eh->ether_dhost);
+ }
+ etype = htons(ETHERTYPE_IP);
+ memcpy(&eh->ether_type, &etype, sizeof(etype));
+ memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
+ }
+ break;
#endif
#ifdef INET6
case AF_INET6:
- if (lle != NULL && (pflags & LLE_VALID))
- memcpy(edst, &lle->ll_addr.mac16, sizeof(edst));
- else
- error = nd6_resolve(ifp, is_gw, m, dst, (u_char *)edst,
- &pflags);
- if (error)
- return (error == EWOULDBLOCK ? 0 : error);
- type = htons(ETHERTYPE_IPV6);
+ if ((m->m_flags & M_MCAST) == 0)
+ error = nd6_resolve(ifp, 0, m, dst, phdr, &lleflags);
+ else {
+ const struct in6_addr *a6;
+ a6 = &(((const struct sockaddr_in6 *)dst)->sin6_addr);
+ ETHER_MAP_IPV6_MULTICAST(a6, eh->ether_dhost);
+ etype = htons(ETHERTYPE_IPV6);
+ memcpy(&eh->ether_type, &etype, sizeof(etype));
+ memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
+ }
break;
#endif
- case pseudo_AF_HDRCMPLT:
- {
- const struct ether_header *eh;
-
- hdrcmplt = 1;
- /* FALLTHROUGH */
-
- case AF_UNSPEC:
- loop_copy = 0; /* if this is for us, don't do it */
- eh = (const struct ether_header *)dst->sa_data;
- (void)memcpy(edst, eh->ether_dhost, sizeof (edst));
- type = eh->ether_type;
- break;
- }
default:
if_printf(ifp, "can't handle af%d\n", dst->sa_family);
- senderr(EAFNOSUPPORT);
+ if (m != NULL)
+ m_freem(m);
+ return (EAFNOSUPPORT);
+ }
+
+ if (error == EHOSTDOWN) {
+ rt = (ro != NULL) ? ro->ro_rt : NULL;
+ if (rt != NULL && (rt->rt_flags & RTF_GATEWAY) != 0)
+ error = EHOSTUNREACH;
}
- if ((pflags & LLE_IFADDR) != 0) {
+ if (error != 0)
+ return (error);
+
+ *pflags = ((!!(lleflags & LLE_IFADDR)) << RT_L2_ME_BIT) | RT_MAY_LOOP;
+
+ return (0);
+}
+
+/*
+ * Ethernet output routine.
+ * Encapsulate a packet of type family for the local net.
+ * Use trailer local net encapsulation if enough data in first
+ * packet leaves a multiple of 512 bytes of data in remainder.
+ */
+int
+ether_output(struct ifnet *ifp, struct mbuf *m,
+ const struct sockaddr *dst, struct route *ro)
+{
+ int error = 0;
+ char linkhdr[ETHER_HDR_LEN], *phdr;
+ struct ether_header *eh;
+ struct pf_mtag *t;
+ int loop_copy = 1;
+ int hlen; /* link layer header length */
+ uint32_t pflags;
+
+ phdr = NULL;
+ pflags = 0;
+ if (ro != NULL) {
+ phdr = ro->ro_prepend;
+ hlen = ro->ro_plen;
+ pflags = ro->ro_flags;
+ }
+#ifdef MAC
+ error = mac_ifnet_check_transmit(ifp, m);
+ if (error)
+ senderr(error);
+#endif
+
+ M_PROFILE(m);
+ if (ifp->if_flags & IFF_MONITOR)
+ senderr(ENETDOWN);
+ if (!((ifp->if_flags & IFF_UP) &&
+ (ifp->if_drv_flags & IFF_DRV_RUNNING)))
+ senderr(ENETDOWN);
+
+ if (phdr == NULL) {
+ /* No prepend data supplied. Try to calculate ourselves. */
+ phdr = linkhdr;
+ hlen = ETHER_HDR_LEN;
+ error = ether_resolve_addr(ifp, m, dst, ro, phdr, &pflags);
+ if (error != 0)
+ return (error == EWOULDBLOCK ? 0 : error);
+ }
+
+ if ((pflags & RT_L2_ME) != 0) {
update_mbuf_csumflags(m, m);
return (if_simloop(ifp, m, dst->sa_family, 0));
}
+ loop_copy = pflags & RT_MAY_LOOP;
/*
* Add local net header. If no space in first mbuf,
* allocate another.
*/
- M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT);
+ M_PREPEND(m, hlen, M_NOWAIT);
if (m == NULL)
senderr(ENOBUFS);
- eh = mtod(m, struct ether_header *);
- if (hdrcmplt == 0) {
- memcpy(&eh->ether_type, &type, sizeof(eh->ether_type));
- memcpy(eh->ether_dhost, edst, sizeof (edst));
- memcpy(eh->ether_shost, IF_LLADDR(ifp),sizeof(eh->ether_shost));
+ if ((pflags & RT_HAS_HEADER) == 0) {
+ eh = mtod(m, struct ether_header *);
+ memcpy(eh, phdr, hlen);
}
/*
@@ -279,34 +337,27 @@
* on the wire). However, we don't do that here for security
* reasons and compatibility with the original behavior.
*/
- if ((ifp->if_flags & IFF_SIMPLEX) && loop_copy &&
+ if ((m->m_flags & M_BCAST) && loop_copy && (ifp->if_flags & IFF_SIMPLEX) &&
((t = pf_find_mtag(m)) == NULL || !t->routed)) {
- if (m->m_flags & M_BCAST) {
- struct mbuf *n;
+ struct mbuf *n;
- /*
- * Because if_simloop() modifies the packet, we need a
- * writable copy through m_dup() instead of a readonly
- * one as m_copy[m] would give us. The alternative would
- * be to modify if_simloop() to handle the readonly mbuf,
- * but performancewise it is mostly equivalent (trading
- * extra data copying vs. extra locking).
- *
- * XXX This is a local workaround. A number of less
- * often used kernel parts suffer from the same bug.
- * See PR kern/105943 for a proposed general solution.
- */
- if ((n = m_dup(m, M_NOWAIT)) != NULL) {
- update_mbuf_csumflags(m, n);
- (void)if_simloop(ifp, n, dst->sa_family, hlen);
- } else
- if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
- } else if (bcmp(eh->ether_dhost, eh->ether_shost,
- ETHER_ADDR_LEN) == 0) {
- update_mbuf_csumflags(m, m);
- (void) if_simloop(ifp, m, dst->sa_family, hlen);
- return (0); /* XXX */
- }
+ /*
+ * Because if_simloop() modifies the packet, we need a
+ * writable copy through m_dup() instead of a readonly
+ * one as m_copy[m] would give us. The alternative would
+ * be to modify if_simloop() to handle the readonly mbuf,
+ * but performancewise it is mostly equivalent (trading
+ * extra data copying vs. extra locking).
+ *
+ * XXX This is a local workaround. A number of less
+ * often used kernel parts suffer from the same bug.
+ * See PR kern/105943 for a proposed general solution.
+ */
+ if ((n = m_dup(m, M_NOWAIT)) != NULL) {
+ update_mbuf_csumflags(m, n);
+ (void)if_simloop(ifp, n, dst->sa_family, hlen);
+ } else
+ if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
}
/*
@@ -798,6 +849,7 @@
ifp->if_output = ether_output;
ifp->if_input = ether_input;
ifp->if_resolvemulti = ether_resolvemulti;
+ ifp->if_requestencap = ether_requestencap;
#ifdef VIMAGE
ifp->if_reassign = ether_reassign;
#endif
Index: sys/net/if_llatbl.h
===================================================================
--- sys/net/if_llatbl.h
+++ sys/net/if_llatbl.h
@@ -48,6 +48,7 @@
#define LLTABLE_WUNLOCK() rw_wunlock(&lltable_rwlock)
#define LLTABLE_LOCK_ASSERT() rw_assert(&lltable_rwlock, RA_LOCKED)
+#define LLE_MAX_LINKHDR 24 /* Full IB header */
/*
* Code referencing llentry must at least hold
* a shared lock
@@ -58,12 +59,9 @@
struct in_addr addr4;
struct in6_addr addr6;
} r_l3addr;
- union {
- uint64_t mac_aligned;
- uint16_t mac16[3];
- uint8_t mac8[20]; /* IB needs 20 bytes. */
- } ll_addr;
- uint32_t spare0;
+ char r_linkdata[LLE_MAX_LINKHDR]; /* L2 data */
+ uint8_t r_hdrlen; /* length for LL header */
+ uint8_t spare0[3];
uint64_t spare1;
struct lltable *lle_tbl;
@@ -79,6 +77,7 @@
uint16_t ln_router;
time_t ln_ntick;
int lle_refcnt;
+ char *ll_addr; /* link-layer address */
LIST_ENTRY(llentry) lle_chain; /* chain of deleted items */
struct callout lle_timer;
@@ -187,6 +186,8 @@
#define LLE_LINKED 0x0040 /* linked to lookup structure */
/* LLE request flags */
#define LLE_EXCLUSIVE 0x2000 /* return lle xlocked */
+#define LLE_ADDRONLY 0x4000 /* return lladdr instead of full header */
+#define LLE_CREATE 0x8000 /* hint to avoid lle lookup */
#define LLATBL_HASH(key, mask) \
(((((((key >> 8) ^ key) >> 8) ^ key) >> 8) ^ key) & mask)
@@ -208,8 +209,11 @@
/* helper functions */
size_t lltable_drop_entry_queue(struct llentry *);
void lltable_set_entry_addr(struct ifnet *ifp, struct llentry *lle,
- const char *lladdr);
+ const char *linkhdr, size_t linkhdrsize, int lladdr_off);
+int lltable_calc_llheader(struct ifnet *ifp, int family, char *lladdr,
+ char *buf, size_t *bufsize, int *lladdr_off);
+int lltable_update_ifaddr(struct lltable *llt);
struct llentry *lltable_alloc_entry(struct lltable *llt, u_int flags,
const struct sockaddr *l4addr);
void lltable_free_entry(struct lltable *llt, struct llentry *lle);
Index: sys/net/if_llatbl.c
===================================================================
--- sys/net/if_llatbl.c
+++ sys/net/if_llatbl.c
@@ -279,14 +279,98 @@
void
lltable_set_entry_addr(struct ifnet *ifp, struct llentry *lle,
- const char *lladdr)
+ const char *linkhdr, size_t linkhdrsize, int lladdr_off)
{
- bcopy(lladdr, &lle->ll_addr, ifp->if_addrlen);
+ memcpy(lle->r_linkdata, linkhdr, linkhdrsize);
+ lle->r_hdrlen = linkhdrsize;
+ lle->ll_addr = &lle->r_linkdata[lladdr_off];
lle->la_flags |= LLE_VALID;
}
/*
+ * Helper function used to pre-compute full/partial link-layer
+ * header data suitable for feeding into if_output().
+ */
+int
+lltable_calc_llheader(struct ifnet *ifp, int family, char *lladdr,
+ char *buf, size_t *bufsize, int *lladdr_off)
+{
+ struct if_encap_req ereq;
+ int error;
+
+ bzero(buf, *bufsize);
+ bzero(&ereq, sizeof(ereq));
+ ereq.buf = buf;
+ ereq.bufsize = *bufsize;
+ ereq.rtype = IFENCAP_LL;
+ ereq.family = family;
+ ereq.lladdr = lladdr;
+ ereq.lladdr_len = ifp->if_addrlen;
+ error = ifp->if_requestencap(ifp, &ereq);
+ if (error == 0) {
+ *bufsize = ereq.bufsize;
+ *lladdr_off = ereq.lladdr_off;
+ }
+
+ return (error);
+}
+
+/*
+ * Update link-layer header for given @lle after
+ * interface lladdr was changed.
+ */
+static int
+llentry_update_ifaddr(struct lltable *llt, struct llentry *lle, void *farg)
+{
+ struct ifnet *ifp;
+ u_char linkhdr[LLE_MAX_LINKHDR];
+ size_t linkhdrsize;
+ u_char *lladdr;
+ int lladdr_off;
+
+ ifp = (struct ifnet *)farg;
+
+ lladdr = lle->ll_addr;
+
+ LLE_WLOCK(lle);
+ if ((lle->la_flags & LLE_VALID) == 0) {
+ LLE_WUNLOCK(lle);
+ return (0);
+ }
+
+ if ((lle->la_flags & LLE_IFADDR) != 0)
+ lladdr = IF_LLADDR(ifp);
+
+ linkhdrsize = sizeof(linkhdr);
+ lltable_calc_llheader(ifp, llt->llt_af, lladdr, linkhdr, &linkhdrsize,
+ &lladdr_off);
+ memcpy(lle->r_linkdata, linkhdr, linkhdrsize);
+ LLE_WUNLOCK(lle);
+
+ return (0);
+}
+
+/*
+ * Update all calculated headers for given @llt
+ */
+int
+lltable_update_ifaddr(struct lltable *llt)
+{
+ int error;
+
+ if (llt->llt_ifp->if_flags & IFF_LOOPBACK)
+ return (0);
+ error = 0;
+
+ IF_AFDATA_WLOCK(llt->llt_ifp);
+ lltable_foreach_lle(llt, llentry_update_ifaddr, llt->llt_ifp);
+ IF_AFDATA_WUNLOCK(llt->llt_ifp);
+
+ return (error);
+}
+
+/*
*
* Performes generic cleanup routines and frees lle.
*
@@ -601,6 +685,9 @@
struct ifnet *ifp;
struct lltable *llt;
struct llentry *lle, *lle_tmp;
+ uint8_t linkhdr[LLE_MAX_LINKHDR];
+ size_t linkhdrsize;
+ int lladdr_off;
u_int laflags = 0;
int error;
@@ -636,10 +723,14 @@
if (lle == NULL)
return (ENOMEM);
- bcopy(LLADDR(dl), &lle->ll_addr, ifp->if_addrlen);
+ linkhdrsize = sizeof(linkhdr);
+ if (lltable_calc_llheader(ifp, dst->sa_family, LLADDR(dl),
+ linkhdr, &linkhdrsize, &lladdr_off) != 0)
+ return (EINVAL);
+ lltable_set_entry_addr(ifp, lle, linkhdr, linkhdrsize,
+ lladdr_off);
if ((rtm->rtm_flags & RTF_ANNOUNCE))
lle->la_flags |= LLE_PUB;
- lle->la_flags |= LLE_VALID;
lle->la_expire = rtm->rtm_rmx.rmx_expire;
laflags = lle->la_flags;
@@ -734,7 +825,7 @@
db_printf(" ln_router=%u\n", lle->ln_router);
db_printf(" ln_ntick=%ju\n", (uintmax_t)lle->ln_ntick);
db_printf(" lle_refcnt=%d\n", lle->lle_refcnt);
- bcopy(&lle->ll_addr.mac16, octet, sizeof(octet));
+ bcopy(lle->ll_addr, octet, sizeof(octet));
db_printf(" ll_addr=%02x:%02x:%02x:%02x:%02x:%02x\n",
octet[0], octet[1], octet[2], octet[3], octet[4], octet[5]);
db_printf(" lle_timer=%p\n", &lle->lle_timer);
Index: sys/net/if_var.h
===================================================================
--- sys/net/if_var.h
+++ sys/net/if_var.h
@@ -126,6 +126,8 @@
u_int tsomaxsegsize; /* TSO maximum segment size in bytes */
};
+struct if_encap_req;
+
/*
* Structure defining a network interface.
*
@@ -227,6 +229,8 @@
void (*if_reassign) /* reassign to vnet routine */
(struct ifnet *, struct vnet *, char *);
if_get_counter_t if_get_counter; /* get counter values */
+ int (*if_requestencap) /* make link header from request */
+ (struct ifnet *, struct if_encap_req *);
/* Statistics. */
counter_u64_t if_counters[IFCOUNTERS];
Index: sys/net/route.h
===================================================================
--- sys/net/route.h
+++ sys/net/route.h
@@ -51,14 +51,21 @@
*/
struct route {
struct rtentry *ro_rt;
- struct llentry *ro_lle;
- struct in_ifaddr *ro_ia;
- int ro_flags;
+ char *ro_prepend;
+ uint16_t ro_plen;
+ uint16_t ro_flags;
struct sockaddr ro_dst;
};
+#define RT_L2_ME_BIT 2 /* dst L2 addr is our address */
+#define RT_MAY_LOOP_BIT 3 /* dst may require loop copy */
+#define RT_HAS_HEADER_BIT 4 /* mbuf already have its header prepended */
+
#define RT_CACHING_CONTEXT 0x1 /* XXX: not used anywhere */
#define RT_NORTREF 0x2 /* doesn't hold reference on ro_rt */
+#define RT_L2_ME (1 << RT_L2_ME_BIT)
+#define RT_MAY_LOOP (1 << RT_MAY_LOOP_BIT)
+#define RT_HAS_HEADER (1 << RT_HAS_HEADER_BIT)
struct rt_metrics {
u_long rmx_locks; /* Kernel must leave these values alone */
@@ -343,6 +350,27 @@
} \
} while (0)
+
+/* Encap request types */
+typedef enum {
+ IFENCAP_LL = 1 /* pre-calculate link-layer header */
+} ife_type;
+
+struct if_encap_req {
+ u_char *buf; /* Destination buffer */
+ size_t bufsize; /* pointer to size of provided buffer */
+ ife_type rtype; /* request type */
+ uint32_t flags; /* Request flags */
+ int family; /* Address family */
+ int lladdr_off; /* offset from header start (w) */
+ int lladdr_len; /* lladdr length */
+ char *lladdr; /* link-level address pointer */
+ char *hdata; /* Upper layer header data */
+};
+
+#define IFENCAP_FLAG_BROADCAST 0x02 /* Destination is broadcast */
+int if_requestencap_default(struct ifnet *ifp, struct if_encap_req *req);
+
struct radix_node_head *rt_tables_get_rnh(int, int);
struct ifmultiaddr;
Index: sys/net/route.c
===================================================================
--- sys/net/route.c
+++ sys/net/route.c
@@ -1119,6 +1119,25 @@
}
}
+/*
+ * Blank function for default encapsulation requests.
+ */
+int
+if_requestencap_default(struct ifnet *ifp, struct if_encap_req *req)
+{
+
+ if (req->rtype != IFENCAP_LL)
+ return (EOPNOTSUPP);
+
+ if (req->bufsize < req->lladdr_len)
+ return (ENOMEM);
+
+ /* Copy lladdr to storage as is */
+ memmove(req->buf, req->lladdr, req->lladdr_len);
+ req->lladdr_off = 0;
+
+ return (0);
+}
#if 0
int p_sockaddr(char *buf, int buflen, struct sockaddr *s);
Index: sys/netinet/if_ether.h
===================================================================
--- sys/netinet/if_ether.h
+++ sys/netinet/if_ether.h
@@ -114,6 +114,8 @@
struct ifaddr;
+int arpresolve_addr(struct ifnet *ifp, int flags,
+ const struct sockaddr *dst, char *desten, uint32_t *pflags);
int arpresolve(struct ifnet *ifp, int is_gw, struct mbuf *m,
const struct sockaddr *dst, u_char *desten, uint32_t *pflags);
void arprequest(struct ifnet *, const struct in_addr *,
Index: sys/netinet/if_ether.c
===================================================================
--- sys/netinet/if_ether.c
+++ sys/netinet/if_ether.c
@@ -142,7 +142,9 @@
static void arp_check_update_lle(struct arphdr *ah, struct in_addr isaddr,
struct ifnet *ifp, int bridged, struct llentry *la);
static void arp_mark_lle_reachable(struct llentry *la);
+static void arp_iflladdr(void *arg __unused, struct ifnet *ifp);
+static eventhandler_tag iflladdr_tag;
static const struct netisr_handler arp_nh = {
.nh_name = "arp",
@@ -218,6 +220,31 @@
CURVNET_RESTORE();
}
+static int
+arp_fillheader(struct ifnet *ifp, struct arphdr *ah, int bcast, u_char *buf,
+ size_t *bufsize)
+{
+ struct if_encap_req ereq;
+ int error;
+
+ bzero(buf, *bufsize);
+ bzero(&ereq, sizeof(ereq));
+ ereq.buf = buf;
+ ereq.bufsize = *bufsize;
+ ereq.rtype = IFENCAP_LL;
+ ereq.family = AF_ARP;
+ ereq.lladdr = ar_tha(ah);
+ ereq.hdata = (u_char *)ah;
+ if (bcast)
+ ereq.flags = IFENCAP_FLAG_BROADCAST;
+ error = ifp->if_requestencap(ifp, &ereq);
+ if (error == 0)
+ *bufsize = ereq.bufsize;
+
+ return (error);
+}
+
+
/*
* Broadcast an ARP request. Caller specifies:
* - arp header source ip address
@@ -232,6 +259,10 @@
struct arphdr *ah;
struct sockaddr sa;
u_char *carpaddr = NULL;
+ uint8_t linkhdr[LLE_MAX_LINKHDR];
+ size_t linkhdrsize;
+ struct route ro;
+ int error;
if (sip == NULL) {
/*
@@ -287,12 +318,27 @@
bcopy(tip, ar_tpa(ah), ah->ar_pln);
sa.sa_family = AF_ARP;
sa.sa_len = 2;
+
+ /* Calculate link header for sending frame */
+ bzero(&ro, sizeof(ro));
+ linkhdrsize = sizeof(linkhdr);
+ error = arp_fillheader(ifp, ah, 1, linkhdr, &linkhdrsize);
+ if (error != 0) {
+ if_printf(ifp, "Failed to calculate ARP header: %d\n", error);
+ return;
+ }
+
+ ro.ro_prepend = linkhdr;
+ ro.ro_plen = linkhdrsize;
+ ro.ro_flags = 0;
+
m->m_flags |= M_BCAST;
m_clrprotoflags(m); /* Avoid confusing lower layers. */
- (*ifp->if_output)(ifp, m, &sa, NULL);
+ (*ifp->if_output)(ifp, m, &sa, &ro);
ARPSTAT_INC(txrequests);
}
+
/*
* Resolve an IP address into an ethernet address - heavy version.
* Used internally by arpresolve().
@@ -305,18 +351,20 @@
* Note that m_freem() handles NULL.
*/
static int
-arpresolve_full(struct ifnet *ifp, int is_gw, int create, struct mbuf *m,
+arpresolve_full(struct ifnet *ifp, int is_gw, int flags, struct mbuf *m,
const struct sockaddr *dst, u_char *desten, uint32_t *pflags)
{
struct llentry *la = NULL, *la_tmp;
struct mbuf *curr = NULL;
struct mbuf *next = NULL;
int error, renew;
+ char *lladdr;
+ int ll_len;
if (pflags != NULL)
*pflags = 0;
- if (create == 0) {
+ if ((flags & LLE_CREATE) == 0) {
IF_AFDATA_RLOCK(ifp);
la = lla_lookup(LLTABLE(ifp), LLE_EXCLUSIVE, dst);
IF_AFDATA_RUNLOCK(ifp);
@@ -350,7 +398,14 @@
if ((la->la_flags & LLE_VALID) &&
((la->la_flags & LLE_STATIC) || la->la_expire > time_uptime)) {
- bcopy(&la->ll_addr, desten, ifp->if_addrlen);
+ if (flags & LLE_ADDRONLY) {
+ lladdr = la->ll_addr;
+ ll_len = ifp->if_addrlen;
+ } else {
+ lladdr = la->r_linkdata;
+ ll_len = la->r_hdrlen;
+ }
+ bcopy(lladdr, desten, ll_len);
renew = 0;
/*
* If entry has an expiry time and it is approaching,
@@ -364,7 +419,7 @@
}
if (pflags != NULL)
- *pflags = la->la_flags;
+ *pflags = la->la_flags & LLE_IFADDR;
LLE_WUNLOCK(la);
@@ -432,15 +487,30 @@
/*
* Resolve an IP address into an ethernet address.
+ */
+int
+arpresolve_addr(struct ifnet *ifp, int flags, const struct sockaddr *dst,
+ char *desten, uint32_t *pflags)
+{
+ int error;
+
+ flags |= LLE_ADDRONLY;
+ error = arpresolve_full(ifp, 0, flags, NULL, dst, desten, pflags);
+ return (error);
+}
+
+/*
+ * Lookups link header based on an IP address.
* On input:
* ifp is the interface we use
* is_gw != 0 if @dst represents gateway to some destination
* m is the mbuf. May be NULL if we don't have a packet.
* dst is the next hop,
- * desten is the storage to put LL address.
+ * desten is the storage to put LL header.
* flags returns lle entry flags.
*
- * On success, desten and flags are filled in and the function returns 0;
+ * On success, full/partial link header and flags are filled in and
+ * the function returns 0.
* If the packet must be held pending resolution, we return EWOULDBLOCK
* On other errors, we return the corresponding error code.
* Note that m_freem() handles NULL.
@@ -474,11 +544,12 @@
IF_AFDATA_RUNLOCK(ifp);
if (la == NULL)
- return (arpresolve_full(ifp, is_gw, 1, m, dst, desten, pflags));
+ return (arpresolve_full(ifp, is_gw, LLE_CREATE, m, dst, desten,
+ pflags));
if ((la->la_flags & LLE_VALID) &&
((la->la_flags & LLE_STATIC) || la->la_expire > time_uptime)) {
- bcopy(&la->ll_addr, desten, ifp->if_addrlen);
+ bcopy(la->r_linkdata, desten, la->r_hdrlen);
renew = 0;
/*
* If entry has an expiry time and it is approaching,
@@ -492,7 +563,7 @@
}
if (pflags != NULL)
- *pflags = la->la_flags;
+ *pflags = la->la_flags & LLE_IFADDR;
LLE_RUNLOCK(la);
@@ -503,7 +574,7 @@
}
LLE_RUNLOCK(la);
- return (arpresolve_full(ifp, is_gw, 0, m, dst, desten, pflags));
+ return (arpresolve_full(ifp, is_gw, LLE_CREATE, m, dst, desten,pflags));
}
/*
@@ -647,6 +718,13 @@
int carped;
struct sockaddr_in sin;
struct sockaddr *dst;
+ uint8_t linkhdr[LLE_MAX_LINKHDR];
+ size_t linkhdrsize;
+ int lladdr_off;
+ struct route ro;
+ int error;
+
+
sin.sin_len = sizeof(struct sockaddr_in);
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = 0;
@@ -811,14 +889,19 @@
if (la != NULL)
arp_check_update_lle(ah, isaddr, ifp, bridged, la);
else if (itaddr.s_addr == myaddr.s_addr) {
- /*
- * Reply to our address, but no lle exists yet.
- * do we really have to create an entry?
- */
+ /* Reply to our address, but no lle exists yet. */
+ /* Calculate full link prepend to use in lle */
+ linkhdrsize = sizeof(linkhdr);
+ if (lltable_calc_llheader(ifp, AF_INET, ar_sha(ah), linkhdr,
+ &linkhdrsize, &lladdr_off) != 0)
+ goto drop;
+
+ /* Allocate new entry */
la = lltable_alloc_entry(LLTABLE(ifp), 0, dst);
if (la == NULL)
goto drop;
- lltable_set_entry_addr(ifp, la, ar_sha(ah));
+ lltable_set_entry_addr(ifp, la, linkhdr, linkhdrsize,
+ lladdr_off);
IF_AFDATA_WLOCK(ifp);
LLE_WLOCK(la);
@@ -876,7 +959,7 @@
if ((lle != NULL) && (lle->la_flags & LLE_PUB)) {
(void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln);
- (void)memcpy(ar_sha(ah), &lle->ll_addr, ah->ar_hln);
+ (void)memcpy(ar_sha(ah), lle->ll_addr, ah->ar_hln);
LLE_RUNLOCK(lle);
} else {
@@ -956,8 +1039,31 @@
m->m_pkthdr.rcvif = NULL;
sa.sa_family = AF_ARP;
sa.sa_len = 2;
+
+ /* Calculate link header for sending frame */
+ bzero(&ro, sizeof(ro));
+ linkhdrsize = sizeof(linkhdr);
+ error = arp_fillheader(ifp, ah, 0, linkhdr, &linkhdrsize);
+
+ /*
+ * arp_fillheader() may fail due to lack of support inside encap request
+ * routing. This is not necessary an error, AF_ARP can/should be handled
+ * ny if_output().
+ */
+ if (error != 0 && error != EAFNOSUPPORT) {
+ printf("Failed to calculate ARP header: %d\n", error);
+ goto drop;
+ }
+
+ if (error == 0) {
+ ro.ro_prepend = linkhdr;
+ ro.ro_plen = linkhdrsize;
+ ro.ro_flags = 0;
+
+ }
+
m_clrprotoflags(m); /* Avoid confusing lower layers. */
- (*ifp->if_output)(ifp, m, &sa, NULL);
+ (*ifp->if_output)(ifp, m, &sa, &ro);
ARPSTAT_INC(txreplies);
return;
@@ -976,6 +1082,9 @@
{
struct sockaddr sa;
struct mbuf *m_hold, *m_hold_next;
+ uint8_t linkhdr[LLE_MAX_LINKHDR];
+ size_t linkhdrsize;
+ int lladdr_off;
LLE_WLOCK_ASSERT(la);
@@ -992,7 +1101,7 @@
return;
}
if ((la->la_flags & LLE_VALID) &&
- bcmp(ar_sha(ah), &la->ll_addr, ifp->if_addrlen)) {
+ bcmp(ar_sha(ah), la->ll_addr, ifp->if_addrlen)) {
if (la->la_flags & LLE_STATIC) {
LLE_WUNLOCK(la);
if (log_arp_permanent_modify)
@@ -1015,8 +1124,14 @@
}
}
+ /* Calculate full link prepend to use in lle */
+ linkhdrsize = sizeof(linkhdr);
+ if (lltable_calc_llheader(ifp, AF_INET, ar_sha(ah), linkhdr,
+ &linkhdrsize, &lladdr_off) != 0)
+ return;
+
/* Check if something has changed */
- if (memcmp(&la->ll_addr, ar_sha(ah), ifp->if_addrlen) != 0 ||
+ if (memcmp(la->r_linkdata, linkhdr, linkhdrsize) != 0 ||
(la->la_flags & LLE_VALID) == 0) {
/* Perform real LLE update */
/* use afdata WLOCK to update fields */
@@ -1036,7 +1151,8 @@
}
/* Update data */
- lltable_set_entry_addr(ifp, la, ar_sha(ah));
+ lltable_set_entry_addr(ifp, la, linkhdr, linkhdrsize,
+ lladdr_off);
IF_AFDATA_WUNLOCK(ifp);
LLE_REMREF(la);
@@ -1150,10 +1266,23 @@
ifa->ifa_rtrequest = NULL;
}
+/*
+ * A handler for interface link layer address change event.
+ */
+static __noinline void
+arp_iflladdr(void *arg __unused, struct ifnet *ifp)
+{
+
+ lltable_update_ifaddr(LLTABLE(ifp));
+}
+
static void
arp_init(void)
{
netisr_register(&arp_nh);
+ if (IS_DEFAULT_VNET(curvnet))
+ iflladdr_tag = EVENTHANDLER_REGISTER(iflladdr_event,
+ arp_iflladdr, NULL, EVENTHANDLER_PRI_ANY);
}
SYSINIT(arp, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, arp_init, 0);
Index: sys/netinet/in.c
===================================================================
--- sys/netinet/in.c
+++ sys/netinet/in.c
@@ -1238,6 +1238,9 @@
const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr;
struct ifnet *ifp = llt->llt_ifp;
struct llentry *lle;
+ char linkhdr[LLE_MAX_LINKHDR];
+ size_t linkhdrsize;
+ int lladdr_off;
KASSERT(l3addr->sa_family == AF_INET,
("sin_family %d", l3addr->sa_family));
@@ -1258,7 +1261,12 @@
}
lle->la_flags = flags;
if ((flags & LLE_IFADDR) == LLE_IFADDR) {
- lltable_set_entry_addr(ifp, lle, IF_LLADDR(ifp));
+ linkhdrsize = LLE_MAX_LINKHDR;
+ if (lltable_calc_llheader(ifp, AF_INET, IF_LLADDR(ifp),
+ linkhdr, &linkhdrsize, &lladdr_off) != 0)
+ return (NULL);
+ lltable_set_entry_addr(ifp, lle, linkhdr, linkhdrsize,
+ lladdr_off);
lle->la_flags |= LLE_STATIC;
}
@@ -1337,7 +1345,7 @@
sdl->sdl_type = ifp->if_type;
if ((lle->la_flags & LLE_VALID) == LLE_VALID) {
sdl->sdl_alen = ifp->if_addrlen;
- bcopy(&lle->ll_addr, LLADDR(sdl), ifp->if_addrlen);
+ bcopy(lle->ll_addr, LLADDR(sdl), ifp->if_addrlen);
} else {
sdl->sdl_alen = 0;
bzero(LLADDR(sdl), ifp->if_addrlen);
Index: sys/netinet/ip_output.c
===================================================================
--- sys/netinet/ip_output.c
+++ sys/netinet/ip_output.c
@@ -567,7 +567,7 @@
RO_RTFREE(ro);
if (have_ia_ref)
ifa_free(&ia->ia_ifa);
- ro->ro_lle = NULL;
+ ro->ro_prepend = NULL;
rte = NULL;
gw = dst;
ip = mtod(m, struct ip *);
Index: sys/netinet/toecore.c
===================================================================
--- sys/netinet/toecore.c
+++ sys/netinet/toecore.c
@@ -428,7 +428,7 @@
KASSERT(lle->la_flags & LLE_VALID,
("%s: %p resolved but not valid?", __func__, lle));
- lladdr = (uint8_t *)&lle->ll_addr;
+ lladdr = (uint8_t *)lle->ll_addr;
#ifdef VLAN_TAG
VLAN_TAG(ifp, &vtag);
#endif
Index: sys/netinet6/icmp6.c
===================================================================
--- sys/netinet6/icmp6.c
+++ sys/netinet6/icmp6.c
@@ -2641,7 +2641,7 @@
nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
nd_opt->nd_opt_len = len >> 3;
lladdr = (char *)(nd_opt + 1);
- bcopy(&ln->ll_addr, lladdr, ifp->if_addrlen);
+ bcopy(ln->ll_addr, lladdr, ifp->if_addrlen);
p += len;
}
}
Index: sys/netinet6/in6.h
===================================================================
--- sys/netinet6/in6.h
+++ sys/netinet6/in6.h
@@ -375,9 +375,9 @@
#if __BSD_VISIBLE
struct route_in6 {
struct rtentry *ro_rt;
- struct llentry *ro_lle;
- struct in6_addr *ro_ia6;
- int ro_flags;
+ char *ro_prepend;
+ uint16_t ro_plen;
+ uint16_t ro_flags;
struct sockaddr_in6 ro_dst;
};
#endif
Index: sys/netinet6/in6.c
===================================================================
--- sys/netinet6/in6.c
+++ sys/netinet6/in6.c
@@ -2241,6 +2241,9 @@
const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)l3addr;
struct ifnet *ifp = llt->llt_ifp;
struct llentry *lle;
+ char linkhdr[LLE_MAX_LINKHDR];
+ size_t linkhdrsize;
+ int lladdr_off;
KASSERT(l3addr->sa_family == AF_INET6,
("sin_family %d", l3addr->sa_family));
@@ -2261,7 +2264,12 @@
}
lle->la_flags = flags;
if ((flags & LLE_IFADDR) == LLE_IFADDR) {
- lltable_set_entry_addr(ifp, lle, IF_LLADDR(ifp));
+ linkhdrsize = LLE_MAX_LINKHDR;
+ if (lltable_calc_llheader(ifp, AF_INET6, IF_LLADDR(ifp),
+ linkhdr, &linkhdrsize, &lladdr_off) != 0)
+ return (NULL);
+ lltable_set_entry_addr(ifp, lle, linkhdr, linkhdrsize,
+ lladdr_off);
lle->la_flags |= LLE_STATIC;
}
@@ -2348,7 +2356,7 @@
sdl->sdl_alen = ifp->if_addrlen;
sdl->sdl_index = ifp->if_index;
sdl->sdl_type = ifp->if_type;
- bcopy(&lle->ll_addr, LLADDR(sdl), ifp->if_addrlen);
+ bcopy(lle->ll_addr, LLADDR(sdl), ifp->if_addrlen);
ndpc.rtm.rtm_rmx.rmx_expire =
lle->la_flags & LLE_STATIC ? 0 : lle->la_expire;
ndpc.rtm.rtm_flags |= (RTF_HOST | RTF_LLDATA);
Index: sys/netinet6/nd6.h
===================================================================
--- sys/netinet6/nd6.h
+++ sys/netinet6/nd6.h
@@ -410,6 +410,8 @@
void nd6_llinfo_setstate(struct llentry *lle, int newstate);
void nd6_timer(void *);
void nd6_purge(struct ifnet *);
+int nd6_resolve_addr(struct ifnet *ifp, int flags, const struct sockaddr *dst,
+ char *desten, uint32_t *pflags);
int nd6_resolve(struct ifnet *, int, struct mbuf *,
const struct sockaddr *, u_char *, uint32_t *);
int nd6_ioctl(u_long, caddr_t, struct ifnet *);
Index: sys/netinet6/nd6.c
===================================================================
--- sys/netinet6/nd6.c
+++ sys/netinet6/nd6.c
@@ -111,7 +111,7 @@
VNET_DEFINE(int, nd6_debug) = 0;
#endif
-static eventhandler_tag lle_event_eh;
+static eventhandler_tag lle_event_eh, iflladdr_event_eh;
/* for debugging? */
#if 0
@@ -137,7 +137,7 @@
static void nd6_llinfo_settimer_locked(struct llentry *, long);
static void clear_llinfo_pqueue(struct llentry *);
static void nd6_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
-static int nd6_resolve_slow(struct ifnet *, struct mbuf *,
+static int nd6_resolve_slow(struct ifnet *, int, struct mbuf *,
const struct sockaddr_in6 *, u_char *, uint32_t *);
static int nd6_need_cache(struct ifnet *);
@@ -188,7 +188,7 @@
gw.sdl_index = ifp->if_index;
gw.sdl_type = ifp->if_type;
if (evt == LLENTRY_RESOLVED)
- bcopy(&lle->ll_addr, gw.sdl_data, ifp->if_addrlen);
+ bcopy(lle->ll_addr, gw.sdl_data, ifp->if_addrlen);
rtinfo.rti_info[RTAX_DST] = (struct sockaddr *)&dst;
rtinfo.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&gw;
rtinfo.rti_addrs = RTA_DST | RTA_GATEWAY;
@@ -196,6 +196,16 @@
type == RTM_ADD ? RTF_UP: 0), 0, RT_DEFAULT_FIB);
}
+/*
+ * A handler for interface link layer address change event.
+ */
+static __noinline void
+nd6_iflladdr(void *arg __unused, struct ifnet *ifp)
+{
+
+ lltable_update_ifaddr(LLTABLE6(ifp));
+}
+
void
nd6_init(void)
{
@@ -211,9 +221,12 @@
nd6_slowtimo, curvnet);
nd6_dad_init();
- if (IS_DEFAULT_VNET(curvnet))
+ if (IS_DEFAULT_VNET(curvnet)) {
lle_event_eh = EVENTHANDLER_REGISTER(lle_event, nd6_lle_event,
NULL, EVENTHANDLER_PRI_ANY);
+ iflladdr_event_eh = EVENTHANDLER_REGISTER(iflladdr_event,
+ nd6_iflladdr, NULL, EVENTHANDLER_PRI_ANY);
+ }
}
#ifdef VIMAGE
@@ -223,8 +236,10 @@
callout_drain(&V_nd6_slowtimo_ch);
callout_drain(&V_nd6_timer_ch);
- if (IS_DEFAULT_VNET(curvnet))
+ if (IS_DEFAULT_VNET(curvnet)) {
EVENTHANDLER_DEREGISTER(lle_event, lle_event_eh);
+ EVENTHANDLER_DEREGISTER(iflladdr_event, iflladdr_event_eh);
+ }
}
#endif
@@ -1704,6 +1719,9 @@
uint16_t router = 0;
struct sockaddr_in6 sin6;
struct mbuf *chain = NULL;
+ u_char linkhdr[LLE_MAX_LINKHDR];
+ size_t linkhdrsize;
+ int lladdr_off;
IF_AFDATA_UNLOCK_ASSERT(ifp);
@@ -1738,8 +1756,15 @@
* Since we already know all the data for the new entry,
* fill it before insertion.
*/
- if (lladdr != NULL)
- lltable_set_entry_addr(ifp, ln, lladdr);
+ if (lladdr != NULL) {
+ linkhdrsize = sizeof(linkhdr);
+ if (lltable_calc_llheader(ifp, AF_INET6, lladdr,
+ linkhdr, &linkhdrsize, &lladdr_off) != 0)
+ return;
+ lltable_set_entry_addr(ifp, ln, linkhdr, linkhdrsize,
+ lladdr_off);
+ }
+
IF_AFDATA_WLOCK(ifp);
LLE_WLOCK(ln);
/* Prefer any existing lle over newly-created one */
@@ -1771,7 +1796,7 @@
olladdr = (ln->la_flags & LLE_VALID) ? 1 : 0;
if (olladdr && lladdr) {
- llchange = bcmp(lladdr, &ln->ll_addr,
+ llchange = bcmp(lladdr, ln->ll_addr,
ifp->if_addrlen);
} else if (!olladdr && lladdr)
llchange = 1;
@@ -1797,7 +1822,13 @@
* Record source link-layer address
* XXX is it dependent to ifp->if_type?
*/
- lltable_set_entry_addr(ifp, ln, lladdr);
+ linkhdrsize = sizeof(linkhdr);
+ if (lltable_calc_llheader(ifp, AF_INET6, lladdr,
+ linkhdr, &linkhdrsize, &lladdr_off) != 0)
+ return;
+ lltable_set_entry_addr(ifp, ln, linkhdr, linkhdrsize,
+ lladdr_off);
+
nd6_llinfo_setstate(ln, ND6_LLINFO_STALE);
EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED);
@@ -1949,8 +1980,8 @@
}
/*
- * Do L2 address resolution for @sa_dst address. Stores found
- * address in @desten buffer. Copy of lle ln_flags can be also
+ * Lookup link headerfor @sa_dst address. Stores found
+ * data in @desten buffer. Copy of lle ln_flags can be also
* saved in @pflags if @pflags is non-NULL.
*
* If destination LLE does not exists or lle state modification
@@ -2013,13 +2044,13 @@
/* Fall back to slow processing path */
if (ln != NULL)
LLE_RUNLOCK(ln);
- return (nd6_resolve_slow(ifp, m, dst6, desten, pflags));
+ return (nd6_resolve_slow(ifp, 0, m, dst6, desten, pflags));
}
- bcopy(&ln->ll_addr, desten, ifp->if_addrlen);
+ bcopy(ln->r_linkdata, desten, ln->r_hdrlen);
if (pflags != NULL)
- *pflags = ln->la_flags;
+ *pflags = ln->la_flags & LLE_IFADDR;
LLE_RUNLOCK(ln);
return (0);
}
@@ -2037,12 +2068,13 @@
* Set noinline to be dtrace-friendly
*/
static __noinline int
-nd6_resolve_slow(struct ifnet *ifp, struct mbuf *m,
+nd6_resolve_slow(struct ifnet *ifp, int flags, struct mbuf *m,
const struct sockaddr_in6 *dst, u_char *desten, uint32_t *pflags)
{
struct llentry *lle = NULL, *lle_tmp;
struct in6_addr *psrc, src;
- int send_ns;
+ int send_ns, ll_len;
+ char *lladdr;
/*
* Address resolution or Neighbor Unreachability Detection
@@ -2114,7 +2146,14 @@
* send the packet.
*/
if (lle->ln_state > ND6_LLINFO_INCOMPLETE) {
- bcopy(&lle->ll_addr, desten, ifp->if_addrlen);
+ if (flags & LLE_ADDRONLY) {
+ lladdr = lle->ll_addr;
+ ll_len = ifp->if_addrlen;
+ } else {
+ lladdr = lle->r_linkdata;
+ ll_len = lle->r_hdrlen;
+ }
+ bcopy(lladdr, desten, ll_len);
if (pflags != NULL)
*pflags = lle->la_flags;
LLE_WUNLOCK(lle);
@@ -2174,6 +2213,27 @@
return (EWOULDBLOCK);
}
+/*
+ * Do L2 address resolution for @sa_dst address. Stores found
+ * address in @desten buffer. Copy of lle ln_flags can be also
+ * saved in @pflags if @pflags is non-NULL.
+ *
+ * Return values:
+ * - 0 on success (address copied to buffer).
+ * - EWOULDBLOCK (no local error, but address is still unresolved)
+ * - other errors (alloc failure, etc)
+ */
+int
+nd6_resolve_addr(struct ifnet *ifp, int flags, const struct sockaddr *dst,
+ char *desten, uint32_t *pflags)
+{
+ int error;
+
+ flags |= LLE_ADDRONLY;
+ error = nd6_resolve_slow(ifp, flags, NULL,
+ (const struct sockaddr_in6 *)dst, desten, pflags);
+ return (error);
+}
int
nd6_flush_holdchain(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *chain,
Index: sys/netinet6/nd6_nbr.c
===================================================================
--- sys/netinet6/nd6_nbr.c
+++ sys/netinet6/nd6_nbr.c
@@ -643,6 +643,9 @@
union nd_opts ndopts;
struct mbuf *chain = NULL;
struct sockaddr_in6 sin6;
+ u_char linkhdr[LLE_MAX_LINKHDR];
+ size_t linkhdrsize;
+ int lladdr_off;
char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
if (ip6->ip6_hlim != 255) {
@@ -765,7 +768,13 @@
/*
* Record link-layer address, and update the state.
*/
- lltable_set_entry_addr(ifp, ln, lladdr);
+ linkhdrsize = sizeof(linkhdr);
+ if (lltable_calc_llheader(ifp, AF_INET6, lladdr,
+ linkhdr, &linkhdrsize, &lladdr_off) != 0)
+ return;
+ lltable_set_entry_addr(ifp, ln, linkhdr, linkhdrsize,
+ lladdr_off);
+
EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED);
if (is_solicited)
nd6_llinfo_setstate(ln, ND6_LLINFO_REACHABLE);
@@ -789,7 +798,7 @@
llchange = 0;
else {
if (ln->la_flags & LLE_VALID) {
- if (bcmp(lladdr, &ln->ll_addr, ifp->if_addrlen))
+ if (bcmp(lladdr, ln->ll_addr, ifp->if_addrlen))
llchange = 1;
else
llchange = 0;
@@ -831,7 +840,13 @@
* Update link-local address, if any.
*/
if (lladdr != NULL) {
- lltable_set_entry_addr(ifp, ln, lladdr);
+ linkhdrsize = sizeof(linkhdr);
+ if (lltable_calc_llheader(ifp, AF_INET6, lladdr,
+ linkhdr, &linkhdrsize, &lladdr_off) != 0)
+ return;
+ lltable_set_entry_addr(ifp, ln, linkhdr, linkhdrsize,
+ lladdr_off);
+
EVENTHANDLER_INVOKE(lle_event, ln,
LLENTRY_RESOLVED);
}
Index: sys/ofed/drivers/infiniband/core/addr.c
===================================================================
--- sys/ofed/drivers/infiniband/core/addr.c
+++ sys/ofed/drivers/infiniband/core/addr.c
@@ -281,8 +281,6 @@
RTFREE_LOCKED(rte);
return -EHOSTUNREACH;
}
- if (rte->rt_flags & RTF_GATEWAY)
- is_gw = 1;
/*
* If it's not multicast or broadcast and the route doesn't match the
* requested interface return unreachable. Otherwise fetch the
@@ -325,20 +323,18 @@
* Resolve the link local address.
*/
switch (dst_in->sa_family) {
-#ifdef INET
case AF_INET:
- error = arpresolve(ifp, is_gw, NULL, dst_in, edst, NULL);
+ error = arpresolve_addr(ifp, 0, dst_in, edst, NULL);
break;
-#endif
-#ifdef INET6
case AF_INET6:
- error = nd6_resolve(ifp, is_gw, NULL, dst_in, edst, NULL);
+ error = nd6_resolve_addr(ifp, 0, dst_in, edst, NULL);
break;
-#endif
default:
/* XXX: Shouldn't happen. */
error = -EINVAL;
}
+ if (error == EHOSTDOWN && (rte->rt_flags & RTF_GATEWAY))
+ error = EHOSTUNREACH;
RTFREE(rte);
if (error == 0) {
memcpy(src_in, ifa->ifa_addr, ip_addr_size(ifa->ifa_addr));
Index: sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c
===================================================================
--- sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -36,6 +36,7 @@
static int ipoib_resolvemulti(struct ifnet *, struct sockaddr **,
struct sockaddr *);
+static int ipoib_requestencap(struct ifnet *, struct if_encap_req *);
#include <linux/module.h>
@@ -876,6 +877,7 @@
dev->if_output = ipoib_output;
dev->if_input = ipoib_input;
dev->if_resolvemulti = ipoib_resolvemulti;
+ dev->if_requestencap = ipoib_requestencap;
dev->if_baudrate = IF_Gbps(10);
dev->if_broadcastaddr = priv->broadcastaddr;
dev->if_snd.ifq_maxlen = ipoib_sendq_size * 2;
@@ -1249,61 +1251,33 @@
destroy_workqueue(ipoib_workqueue);
}
-/*
- * Infiniband output routine.
- */
static int
-ipoib_output(struct ifnet *ifp, struct mbuf *m,
- const struct sockaddr *dst, struct route *ro)
+ipoib_requestencap(struct ifnet *ifp, struct if_encap_req *req)
{
- u_char edst[INFINIBAND_ALEN];
- struct llentry *lle = NULL;
- struct rtentry *rt0 = NULL;
- struct ipoib_header *eh;
- int error = 0, is_gw = 0;
+ struct ipoib_header *ih;
+ struct arphdr *ah;
short type;
+ const char *lladdr;
- if (ro != NULL) {
- if (!(m->m_flags & (M_BCAST | M_MCAST)))
- lle = ro->ro_lle;
- rt0 = ro->ro_rt;
- if (rt0 != NULL && (rt0->rt_flags & RTF_GATEWAY) != 0)
- is_gw = 1;
- }
-#ifdef MAC
- error = mac_ifnet_check_transmit(ifp, m);
- if (error)
- goto bad;
-#endif
+ if (req->rtype != IFENCAP_LL)
+ return (EOPNOTSUPP);
- M_PROFILE(m);
- if (ifp->if_flags & IFF_MONITOR) {
- error = ENETDOWN;
- goto bad;
- }
- if (!((ifp->if_flags & IFF_UP) &&
- (ifp->if_drv_flags & IFF_DRV_RUNNING))) {
- error = ENETDOWN;
- goto bad;
- }
+ if (req->bufsize < sizeof(struct ipoib_header))
+ return (ENOMEM);
- switch (dst->sa_family) {
-#ifdef INET
+ ih = (struct ipoib_header *)req->buf;
+ lladdr = req->lladdr;
+ req->lladdr_off = offsetof(struct ipoib_header, hwaddr);
+
+ switch (req->family) {
case AF_INET:
- if (lle != NULL && (lle->la_flags & LLE_VALID))
- memcpy(edst, &lle->ll_addr.mac8, sizeof(edst));
- else if (m->m_flags & M_MCAST)
- ip_ib_mc_map(((struct sockaddr_in *)dst)->sin_addr.s_addr, ifp->if_broadcastaddr, edst);
- else
- error = arpresolve(ifp, is_gw, m, dst, edst, NULL);
- if (error)
- return (error == EWOULDBLOCK ? 0 : error);
type = htons(ETHERTYPE_IP);
break;
+ case AF_INET6:
+ type = htons(ETHERTYPE_IPV6);
+ break;
case AF_ARP:
- {
- struct arphdr *ah;
- ah = mtod(m, struct arphdr *);
+ ah = (struct arphdr *)req->hdata;
ah->ar_hrd = htons(ARPHRD_INFINIBAND);
switch(ntohs(ah->ar_op)) {
@@ -1318,46 +1292,147 @@
break;
}
- if (m->m_flags & M_BCAST)
- bcopy(ifp->if_broadcastaddr, edst, INFINIBAND_ALEN);
- else
- bcopy(ar_tha(ah), edst, INFINIBAND_ALEN);
-
+ if (req->flags & IFENCAP_FLAG_BROADCAST)
+ lladdr = ifp->if_broadcastaddr;
+ break;
+ default:
+ return (EAFNOSUPPORT);
}
- break;
+
+ memcpy(&ih->proto , &type, sizeof(ih->proto));
+ memcpy(ih->hwaddr, lladdr, INFINIBAND_ALEN);
+ req->bufsize = sizeof(struct ipoib_header);
+
+ return (0);
+}
+
+static inline int
+ipoib_resolve_addr(struct ifnet *ifp, struct mbuf *m,
+ const struct sockaddr *dst, struct route *ro, char *phdr,
+ uint32_t *pflags)
+{
+ struct ipoib_header *ih;
+ uint32_t lleflags = 0;
+ struct rtentry *rt;
+ short type;
+ int error = 0;
+
+ ih = (struct ipoib_header *)phdr;
+
+ switch (dst->sa_family) {
+#ifdef INET
+ case AF_INET:
+ if ((m->m_flags & (M_BCAST | M_MCAST)) == 0)
+ error = arpresolve(ifp, 0, m, dst, phdr, &lleflags);
+ else {
+ if (m->m_flags & M_BCAST)
+ memcpy(&ih->hwaddr, ifp->if_broadcastaddr,
+ INFINIBAND_ALEN);
+ else {
+ const struct in_addr *a;
+ a = &(((const struct sockaddr_in *)dst)->sin_addr);
+ ip_ib_mc_map(a->s_addr, ifp->if_broadcastaddr,
+ (char *)&ih->hwaddr);
+ }
+ type = htons(ETHERTYPE_IP);
+ memcpy(&ih->proto, &type, sizeof(ih->proto));
+ }
+ break;
#endif
#ifdef INET6
case AF_INET6:
- if (lle != NULL && (lle->la_flags & LLE_VALID))
- memcpy(edst, &lle->ll_addr.mac8, sizeof(edst));
- else if (m->m_flags & M_MCAST)
- ipv6_ib_mc_map(&((struct sockaddr_in6 *)dst)->sin6_addr, ifp->if_broadcastaddr, edst);
- else
- error = nd6_resolve(ifp, is_gw, m, dst, edst, NULL);
- if (error)
- return error;
- type = htons(ETHERTYPE_IPV6);
+ if ((m->m_flags & M_MCAST) == 0)
+ error = nd6_resolve(ifp, 0, m, dst, phdr, &lleflags);
+ else {
+ const struct in6_addr *a6;
+ a6 = &(((const struct sockaddr_in6 *)dst)->sin6_addr);
+ ipv6_ib_mc_map(a6, ifp->if_broadcastaddr,
+ (char *)&ih->hwaddr);
+ type = htons(ETHERTYPE_IPV6);
+ memcpy(&ih->proto, &type, sizeof(ih->proto));
+ }
break;
#endif
-
default:
if_printf(ifp, "can't handle af%d\n", dst->sa_family);
- error = EAFNOSUPPORT;
+ if (m != NULL)
+ m_freem(m);
+ return (EAFNOSUPPORT);
+ }
+
+ if (error == EHOSTDOWN) {
+ rt = (ro != NULL) ? ro->ro_rt : NULL;
+ if (rt != NULL && (rt->rt_flags & RTF_GATEWAY) != 0)
+ error = EHOSTUNREACH;
+ }
+
+ if (error != 0)
+ return (error);
+
+ *pflags = ((!!(lleflags & LLE_IFADDR)) << RT_L2_ME_BIT) | RT_MAY_LOOP;
+
+ return (0);
+}
+
+/*
+ * Infiniband output routine.
+ */
+static int
+ipoib_output(struct ifnet *ifp, struct mbuf *m,
+ const struct sockaddr *dst, struct route *ro)
+{
+ char linkhdr[IPOIB_HEADER_LEN], *phdr;
+ struct ipoib_header *ih;
+ int hlen; /* link layer header length */
+ int error = 0;
+ uint32_t pflags;
+
+ phdr = NULL;
+ pflags = 0;
+ if (ro != NULL) {
+ phdr = ro->ro_prepend;
+ hlen = ro->ro_plen;
+ pflags = ro->ro_flags;
+ }
+#ifdef MAC
+ error = mac_ifnet_check_transmit(ifp, m);
+ if (error)
+ goto bad;
+#endif
+
+ M_PROFILE(m);
+ if (ifp->if_flags & IFF_MONITOR) {
+ error = ENETDOWN;
+ goto bad;
+ }
+ if (!((ifp->if_flags & IFF_UP) &&
+ (ifp->if_drv_flags & IFF_DRV_RUNNING))) {
+ error = ENETDOWN;
goto bad;
}
+ if (phdr == NULL) {
+ /* No prepend data supplied. Try to calculate ourselves. */
+ phdr = linkhdr;
+ hlen = IPOIB_HEADER_LEN;
+ error = ipoib_resolve_addr(ifp, m, dst, ro, phdr, &pflags);
+ if (error != 0)
+ return (error == EWOULDBLOCK ? 0 : error);
+ }
+
/*
* Add local net header. If no space in first mbuf,
* allocate another.
*/
- M_PREPEND(m, IPOIB_HEADER_LEN, M_NOWAIT);
+ M_PREPEND(m, hlen, M_NOWAIT);
if (m == NULL) {
error = ENOBUFS;
goto bad;
}
- eh = mtod(m, struct ipoib_header *);
- (void)memcpy(&eh->proto, &type, sizeof(eh->proto));
- (void)memcpy(&eh->hwaddr, edst, sizeof (edst));
+ if ((pflags & RT_HAS_HEADER) == 0) {
+ ih = mtod(m, struct ipoib_header *);
+ memcpy(ih, phdr, hlen);
+ }
/*
* Queue message on interface, update output statistics if

File Metadata

Mime Type
text/plain
Expires
Wed, Feb 18, 4:25 AM (18 h, 14 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
28836916
Default Alt Text
D4102.id10015.diff (52 KB)

Event Timeline