Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F145213393
D4102.id10015.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
52 KB
Referenced Files
None
Subscribers
None
D4102.id10015.diff
View Options
Index: sys/dev/cxgb/ulp/tom/cxgb_l2t.c
===================================================================
--- sys/dev/cxgb/ulp/tom/cxgb_l2t.c
+++ sys/dev/cxgb/ulp/tom/cxgb_l2t.c
@@ -215,7 +215,7 @@
struct tom_data *td = sc->tom_softc;
struct toedev *tod = &td->tod;
struct sockaddr_in sin = {0};
- uint8_t dmac[ETHER_ADDR_LEN];
+ uint8_t dmac[ETHER_HDR_LEN];
uint16_t vtag = EVL_VLID_MASK;
int rc;
Index: sys/dev/cxgbe/tom/t4_tom_l2t.c
===================================================================
--- sys/dev/cxgbe/tom/t4_tom_l2t.c
+++ sys/dev/cxgbe/tom/t4_tom_l2t.c
@@ -233,7 +233,7 @@
struct sockaddr_in sin = {0};
struct sockaddr_in6 sin6 = {0};
struct sockaddr *sa;
- uint8_t dmac[ETHER_ADDR_LEN];
+ uint8_t dmac[ETHER_HDR_LEN];
uint16_t vtag = VLAN_NONE;
int rc;
Index: sys/net/bpf.c
===================================================================
--- sys/net/bpf.c
+++ sys/net/bpf.c
@@ -69,6 +69,7 @@
#include <net/if.h>
#include <net/if_var.h>
+#include <net/if_dl.h>
#include <net/bpf.h>
#include <net/bpf_buffer.h>
#ifdef BPF_JITTER
@@ -76,6 +77,7 @@
#endif
#include <net/bpf_zerocopy.h>
#include <net/bpfdesc.h>
+#include <net/route.h>
#include <net/vnet.h>
#include <netinet/in.h>
@@ -164,7 +166,7 @@
static void bpf_detachd_locked(struct bpf_d *);
static void bpf_freed(struct bpf_d *);
static int bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **,
- struct sockaddr *, int *, struct bpf_insn *);
+ struct sockaddr *, int *, struct bpf_d *);
static int bpf_setif(struct bpf_d *, struct ifreq *);
static void bpf_timed_out(void *);
static __inline void
@@ -454,7 +456,7 @@
*/
static int
bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp,
- struct sockaddr *sockp, int *hdrlen, struct bpf_insn *wfilter)
+ struct sockaddr *sockp, int *hdrlen, struct bpf_d *d)
{
const struct ieee80211_bpf_params *p;
struct ether_header *eh;
@@ -549,7 +551,7 @@
if (error)
goto bad;
- slen = bpf_filter(wfilter, mtod(m, u_char *), len, len);
+ slen = bpf_filter(d->bd_wfilter, mtod(m, u_char *), len, len);
if (slen == 0) {
error = EPERM;
goto bad;
@@ -566,6 +568,10 @@
else
m->m_flags |= M_MCAST;
}
+ if (d->bd_hdrcmplt == 0) {
+ memcpy(eh->ether_shost, IF_LLADDR(ifp),
+ sizeof(eh->ether_shost));
+ }
break;
}
@@ -1088,6 +1094,7 @@
struct ifnet *ifp;
struct mbuf *m, *mc;
struct sockaddr dst;
+ struct route ro;
int error, hlen;
error = devfs_get_cdevpriv((void **)&d);
@@ -1119,7 +1126,7 @@
hlen = 0;
/* XXX: bpf_movein() can sleep */
error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp,
- &m, &dst, &hlen, d->bd_wfilter);
+ &m, &dst, &hlen, d);
if (error) {
d->bd_wdcount++;
return (error);
@@ -1151,7 +1158,14 @@
BPFD_UNLOCK(d);
#endif
- error = (*ifp->if_output)(ifp, m, &dst, NULL);
+ bzero(&ro, sizeof(ro));
+ if (hlen != 0) {
+ ro.ro_prepend = (u_char *)&dst.sa_data;
+ ro.ro_plen = hlen;
+ ro.ro_flags = RT_HAS_HEADER;
+ }
+
+ error = (*ifp->if_output)(ifp, m, &dst, &ro);
if (error)
d->bd_wdcount++;
Index: sys/net/ethernet.h
===================================================================
--- sys/net/ethernet.h
+++ sys/net/ethernet.h
@@ -387,6 +387,7 @@
struct route;
struct sockaddr;
struct bpf_if;
+struct if_encap_req;
extern uint32_t ether_crc32_le(const uint8_t *, size_t);
extern uint32_t ether_crc32_be(const uint8_t *, size_t);
@@ -397,6 +398,7 @@
extern int ether_output(struct ifnet *, struct mbuf *,
const struct sockaddr *, struct route *);
extern int ether_output_frame(struct ifnet *, struct mbuf *);
+extern int ether_requestencap(struct ifnet *, struct if_encap_req *);
extern char *ether_sprintf(const u_int8_t *);
void ether_vlan_mtap(struct bpf_if *, struct mbuf *,
void *, u_int);
Index: sys/net/flowtable.c
===================================================================
--- sys/net/flowtable.c
+++ sys/net/flowtable.c
@@ -665,6 +665,7 @@
flowtable_lookup(sa_family_t sa, struct mbuf *m, struct route *ro)
{
struct flentry *fle;
+ struct llentry *lle;
if (V_flowtable_enable == 0)
return (ENXIO);
@@ -693,8 +694,14 @@
}
ro->ro_rt = fle->f_rt;
- ro->ro_lle = fle->f_lle;
ro->ro_flags |= RT_NORTREF;
+ lle = fle->f_lle;
+ if (lle != NULL && (lle->la_flags & LLE_VALID)) {
+ ro->ro_prepend = lle->r_linkdata;
+ ro->ro_plen = lle->r_hdrlen;
+ ro->ro_flags |= RT_MAY_LOOP;
+ ro->ro_flags |= (!!(lle->la_flags & LLE_IFADDR)) << RT_L2_ME_BIT;
+ }
return (0);
}
Index: sys/net/if.c
===================================================================
--- sys/net/if.c
+++ sys/net/if.c
@@ -669,6 +669,9 @@
if (ifp->if_input == NULL)
ifp->if_input = if_input_default;
+ if (ifp->if_requestencap == NULL)
+ ifp->if_requestencap = if_requestencap_default;
+
if (!vmove) {
#ifdef MAC
mac_ifnet_create(ifp);
Index: sys/net/if_ethersubr.c
===================================================================
--- sys/net/if_ethersubr.c
+++ sys/net/if_ethersubr.c
@@ -136,138 +136,196 @@
}
/*
- * Ethernet output routine.
- * Encapsulate a packet of type family for the local net.
- * Use trailer local net encapsulation if enough data in first
- * packet leaves a multiple of 512 bytes of data in remainder.
+ * Handle link-layer encapsulation requests.
*/
int
-ether_output(struct ifnet *ifp, struct mbuf *m,
- const struct sockaddr *dst, struct route *ro)
+ether_requestencap(struct ifnet *ifp, struct if_encap_req *req)
{
- short type;
- int error = 0, hdrcmplt = 0;
- u_char edst[ETHER_ADDR_LEN];
- struct llentry *lle = NULL;
- struct rtentry *rt0 = NULL;
struct ether_header *eh;
- struct pf_mtag *t;
- int loop_copy = 1;
- int hlen; /* link layer header length */
- int is_gw = 0;
- uint32_t pflags = 0;
+ struct arphdr *ah;
+ uint16_t etype;
+ const u_char *lladdr;
- if (ro != NULL) {
- if (!(m->m_flags & (M_BCAST | M_MCAST))) {
- lle = ro->ro_lle;
- if (lle != NULL)
- pflags = lle->la_flags;
- }
- rt0 = ro->ro_rt;
- if (rt0 != NULL && (rt0->rt_flags & RTF_GATEWAY) != 0)
- is_gw = 1;
- }
-#ifdef MAC
- error = mac_ifnet_check_transmit(ifp, m);
- if (error)
- senderr(error);
-#endif
+ if (req->rtype != IFENCAP_LL)
+ return (EOPNOTSUPP);
- M_PROFILE(m);
- if (ifp->if_flags & IFF_MONITOR)
- senderr(ENETDOWN);
- if (!((ifp->if_flags & IFF_UP) &&
- (ifp->if_drv_flags & IFF_DRV_RUNNING)))
- senderr(ENETDOWN);
+ if (req->bufsize < ETHER_HDR_LEN)
+ return (ENOMEM);
- hlen = ETHER_HDR_LEN;
- switch (dst->sa_family) {
-#ifdef INET
+ eh = (struct ether_header *)req->buf;
+ lladdr = req->lladdr;
+ req->lladdr_off = 0;
+
+ switch (req->family) {
case AF_INET:
- if (lle != NULL && (pflags & LLE_VALID) != 0)
- memcpy(edst, &lle->ll_addr.mac16, sizeof(edst));
- else
- error = arpresolve(ifp, is_gw, m, dst, edst, &pflags);
- if (error)
- return (error == EWOULDBLOCK ? 0 : error);
- type = htons(ETHERTYPE_IP);
+ etype = htons(ETHERTYPE_IP);
+ break;
+ case AF_INET6:
+ etype = htons(ETHERTYPE_IPV6);
break;
case AF_ARP:
- {
- struct arphdr *ah;
- ah = mtod(m, struct arphdr *);
+ ah = (struct arphdr *)req->hdata;
ah->ar_hrd = htons(ARPHRD_ETHER);
- loop_copy = 0; /* if this is for us, don't do it */
-
switch(ntohs(ah->ar_op)) {
case ARPOP_REVREQUEST:
case ARPOP_REVREPLY:
- type = htons(ETHERTYPE_REVARP);
+ etype = htons(ETHERTYPE_REVARP);
break;
case ARPOP_REQUEST:
case ARPOP_REPLY:
default:
- type = htons(ETHERTYPE_ARP);
+ etype = htons(ETHERTYPE_ARP);
break;
}
- if (m->m_flags & M_BCAST)
- bcopy(ifp->if_broadcastaddr, edst, ETHER_ADDR_LEN);
- else
- bcopy(ar_tha(ah), edst, ETHER_ADDR_LEN);
-
+ if (req->flags & IFENCAP_FLAG_BROADCAST)
+ lladdr = ifp->if_broadcastaddr;
+ break;
+ default:
+ return (EAFNOSUPPORT);
}
- break;
+
+ memcpy(&eh->ether_type, &etype, sizeof(eh->ether_type));
+ memcpy(eh->ether_dhost, lladdr, ETHER_ADDR_LEN);
+ memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
+ req->bufsize = sizeof(struct ether_header);
+
+ return (0);
+}
+
+
+static inline int
+ether_resolve_addr(struct ifnet *ifp, struct mbuf *m,
+ const struct sockaddr *dst, struct route *ro, u_char *phdr,
+ uint32_t *pflags)
+{
+ struct ether_header *eh;
+ struct rtentry *rt;
+ uint32_t lleflags = 0;
+ uint16_t etype;
+ int error = 0;
+
+ eh = (struct ether_header *)phdr;
+
+ switch (dst->sa_family) {
+#ifdef INET
+ case AF_INET:
+ if ((m->m_flags & (M_BCAST | M_MCAST)) == 0)
+ error = arpresolve(ifp, 0, m, dst, phdr, &lleflags);
+ else {
+ if (m->m_flags & M_BCAST)
+ memcpy(eh->ether_dhost, ifp->if_broadcastaddr,
+ ETHER_ADDR_LEN);
+ else {
+ const struct in_addr *a;
+ a = &(((const struct sockaddr_in *)dst)->sin_addr);
+ ETHER_MAP_IP_MULTICAST(a, eh->ether_dhost);
+ }
+ etype = htons(ETHERTYPE_IP);
+ memcpy(&eh->ether_type, &etype, sizeof(etype));
+ memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
+ }
+ break;
#endif
#ifdef INET6
case AF_INET6:
- if (lle != NULL && (pflags & LLE_VALID))
- memcpy(edst, &lle->ll_addr.mac16, sizeof(edst));
- else
- error = nd6_resolve(ifp, is_gw, m, dst, (u_char *)edst,
- &pflags);
- if (error)
- return (error == EWOULDBLOCK ? 0 : error);
- type = htons(ETHERTYPE_IPV6);
+ if ((m->m_flags & M_MCAST) == 0)
+ error = nd6_resolve(ifp, 0, m, dst, phdr, &lleflags);
+ else {
+ const struct in6_addr *a6;
+ a6 = &(((const struct sockaddr_in6 *)dst)->sin6_addr);
+ ETHER_MAP_IPV6_MULTICAST(a6, eh->ether_dhost);
+ etype = htons(ETHERTYPE_IPV6);
+ memcpy(&eh->ether_type, &etype, sizeof(etype));
+ memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
+ }
break;
#endif
- case pseudo_AF_HDRCMPLT:
- {
- const struct ether_header *eh;
-
- hdrcmplt = 1;
- /* FALLTHROUGH */
-
- case AF_UNSPEC:
- loop_copy = 0; /* if this is for us, don't do it */
- eh = (const struct ether_header *)dst->sa_data;
- (void)memcpy(edst, eh->ether_dhost, sizeof (edst));
- type = eh->ether_type;
- break;
- }
default:
if_printf(ifp, "can't handle af%d\n", dst->sa_family);
- senderr(EAFNOSUPPORT);
+ if (m != NULL)
+ m_freem(m);
+ return (EAFNOSUPPORT);
+ }
+
+ if (error == EHOSTDOWN) {
+ rt = (ro != NULL) ? ro->ro_rt : NULL;
+ if (rt != NULL && (rt->rt_flags & RTF_GATEWAY) != 0)
+ error = EHOSTUNREACH;
}
- if ((pflags & LLE_IFADDR) != 0) {
+ if (error != 0)
+ return (error);
+
+ *pflags = ((!!(lleflags & LLE_IFADDR)) << RT_L2_ME_BIT) | RT_MAY_LOOP;
+
+ return (0);
+}
+
+/*
+ * Ethernet output routine.
+ * Encapsulate a packet of type family for the local net.
+ * Use trailer local net encapsulation if enough data in first
+ * packet leaves a multiple of 512 bytes of data in remainder.
+ */
+int
+ether_output(struct ifnet *ifp, struct mbuf *m,
+ const struct sockaddr *dst, struct route *ro)
+{
+ int error = 0;
+ char linkhdr[ETHER_HDR_LEN], *phdr;
+ struct ether_header *eh;
+ struct pf_mtag *t;
+ int loop_copy = 1;
+ int hlen; /* link layer header length */
+ uint32_t pflags;
+
+ phdr = NULL;
+ pflags = 0;
+ if (ro != NULL) {
+ phdr = ro->ro_prepend;
+ hlen = ro->ro_plen;
+ pflags = ro->ro_flags;
+ }
+#ifdef MAC
+ error = mac_ifnet_check_transmit(ifp, m);
+ if (error)
+ senderr(error);
+#endif
+
+ M_PROFILE(m);
+ if (ifp->if_flags & IFF_MONITOR)
+ senderr(ENETDOWN);
+ if (!((ifp->if_flags & IFF_UP) &&
+ (ifp->if_drv_flags & IFF_DRV_RUNNING)))
+ senderr(ENETDOWN);
+
+ if (phdr == NULL) {
+ /* No prepend data supplied. Try to calculate ourselves. */
+ phdr = linkhdr;
+ hlen = ETHER_HDR_LEN;
+ error = ether_resolve_addr(ifp, m, dst, ro, phdr, &pflags);
+ if (error != 0)
+ return (error == EWOULDBLOCK ? 0 : error);
+ }
+
+ if ((pflags & RT_L2_ME) != 0) {
update_mbuf_csumflags(m, m);
return (if_simloop(ifp, m, dst->sa_family, 0));
}
+ loop_copy = pflags & RT_MAY_LOOP;
/*
* Add local net header. If no space in first mbuf,
* allocate another.
*/
- M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT);
+ M_PREPEND(m, hlen, M_NOWAIT);
if (m == NULL)
senderr(ENOBUFS);
- eh = mtod(m, struct ether_header *);
- if (hdrcmplt == 0) {
- memcpy(&eh->ether_type, &type, sizeof(eh->ether_type));
- memcpy(eh->ether_dhost, edst, sizeof (edst));
- memcpy(eh->ether_shost, IF_LLADDR(ifp),sizeof(eh->ether_shost));
+ if ((pflags & RT_HAS_HEADER) == 0) {
+ eh = mtod(m, struct ether_header *);
+ memcpy(eh, phdr, hlen);
}
/*
@@ -279,34 +337,27 @@
* on the wire). However, we don't do that here for security
* reasons and compatibility with the original behavior.
*/
- if ((ifp->if_flags & IFF_SIMPLEX) && loop_copy &&
+ if ((m->m_flags & M_BCAST) && loop_copy && (ifp->if_flags & IFF_SIMPLEX) &&
((t = pf_find_mtag(m)) == NULL || !t->routed)) {
- if (m->m_flags & M_BCAST) {
- struct mbuf *n;
+ struct mbuf *n;
- /*
- * Because if_simloop() modifies the packet, we need a
- * writable copy through m_dup() instead of a readonly
- * one as m_copy[m] would give us. The alternative would
- * be to modify if_simloop() to handle the readonly mbuf,
- * but performancewise it is mostly equivalent (trading
- * extra data copying vs. extra locking).
- *
- * XXX This is a local workaround. A number of less
- * often used kernel parts suffer from the same bug.
- * See PR kern/105943 for a proposed general solution.
- */
- if ((n = m_dup(m, M_NOWAIT)) != NULL) {
- update_mbuf_csumflags(m, n);
- (void)if_simloop(ifp, n, dst->sa_family, hlen);
- } else
- if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
- } else if (bcmp(eh->ether_dhost, eh->ether_shost,
- ETHER_ADDR_LEN) == 0) {
- update_mbuf_csumflags(m, m);
- (void) if_simloop(ifp, m, dst->sa_family, hlen);
- return (0); /* XXX */
- }
+ /*
+ * Because if_simloop() modifies the packet, we need a
+ * writable copy through m_dup() instead of a readonly
+ * one as m_copy[m] would give us. The alternative would
+ * be to modify if_simloop() to handle the readonly mbuf,
+ * but performancewise it is mostly equivalent (trading
+ * extra data copying vs. extra locking).
+ *
+ * XXX This is a local workaround. A number of less
+ * often used kernel parts suffer from the same bug.
+ * See PR kern/105943 for a proposed general solution.
+ */
+ if ((n = m_dup(m, M_NOWAIT)) != NULL) {
+ update_mbuf_csumflags(m, n);
+ (void)if_simloop(ifp, n, dst->sa_family, hlen);
+ } else
+ if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
}
/*
@@ -798,6 +849,7 @@
ifp->if_output = ether_output;
ifp->if_input = ether_input;
ifp->if_resolvemulti = ether_resolvemulti;
+ ifp->if_requestencap = ether_requestencap;
#ifdef VIMAGE
ifp->if_reassign = ether_reassign;
#endif
Index: sys/net/if_llatbl.h
===================================================================
--- sys/net/if_llatbl.h
+++ sys/net/if_llatbl.h
@@ -48,6 +48,7 @@
#define LLTABLE_WUNLOCK() rw_wunlock(&lltable_rwlock)
#define LLTABLE_LOCK_ASSERT() rw_assert(&lltable_rwlock, RA_LOCKED)
+#define LLE_MAX_LINKHDR 24 /* Full IB header */
/*
* Code referencing llentry must at least hold
* a shared lock
@@ -58,12 +59,9 @@
struct in_addr addr4;
struct in6_addr addr6;
} r_l3addr;
- union {
- uint64_t mac_aligned;
- uint16_t mac16[3];
- uint8_t mac8[20]; /* IB needs 20 bytes. */
- } ll_addr;
- uint32_t spare0;
+ char r_linkdata[LLE_MAX_LINKHDR]; /* L2 data */
+ uint8_t r_hdrlen; /* length for LL header */
+ uint8_t spare0[3];
uint64_t spare1;
struct lltable *lle_tbl;
@@ -79,6 +77,7 @@
uint16_t ln_router;
time_t ln_ntick;
int lle_refcnt;
+ char *ll_addr; /* link-layer address */
LIST_ENTRY(llentry) lle_chain; /* chain of deleted items */
struct callout lle_timer;
@@ -187,6 +186,8 @@
#define LLE_LINKED 0x0040 /* linked to lookup structure */
/* LLE request flags */
#define LLE_EXCLUSIVE 0x2000 /* return lle xlocked */
+#define LLE_ADDRONLY 0x4000 /* return lladdr instead of full header */
+#define LLE_CREATE 0x8000 /* hint to avoid lle lookup */
#define LLATBL_HASH(key, mask) \
(((((((key >> 8) ^ key) >> 8) ^ key) >> 8) ^ key) & mask)
@@ -208,8 +209,11 @@
/* helper functions */
size_t lltable_drop_entry_queue(struct llentry *);
void lltable_set_entry_addr(struct ifnet *ifp, struct llentry *lle,
- const char *lladdr);
+ const char *linkhdr, size_t linkhdrsize, int lladdr_off);
+int lltable_calc_llheader(struct ifnet *ifp, int family, char *lladdr,
+ char *buf, size_t *bufsize, int *lladdr_off);
+int lltable_update_ifaddr(struct lltable *llt);
struct llentry *lltable_alloc_entry(struct lltable *llt, u_int flags,
const struct sockaddr *l4addr);
void lltable_free_entry(struct lltable *llt, struct llentry *lle);
Index: sys/net/if_llatbl.c
===================================================================
--- sys/net/if_llatbl.c
+++ sys/net/if_llatbl.c
@@ -279,14 +279,98 @@
void
lltable_set_entry_addr(struct ifnet *ifp, struct llentry *lle,
- const char *lladdr)
+ const char *linkhdr, size_t linkhdrsize, int lladdr_off)
{
- bcopy(lladdr, &lle->ll_addr, ifp->if_addrlen);
+ memcpy(lle->r_linkdata, linkhdr, linkhdrsize);
+ lle->r_hdrlen = linkhdrsize;
+ lle->ll_addr = &lle->r_linkdata[lladdr_off];
lle->la_flags |= LLE_VALID;
}
/*
+ * Helper function used to pre-compute full/partial link-layer
+ * header data suitable for feeding into if_output().
+ */
+int
+lltable_calc_llheader(struct ifnet *ifp, int family, char *lladdr,
+ char *buf, size_t *bufsize, int *lladdr_off)
+{
+ struct if_encap_req ereq;
+ int error;
+
+ bzero(buf, *bufsize);
+ bzero(&ereq, sizeof(ereq));
+ ereq.buf = buf;
+ ereq.bufsize = *bufsize;
+ ereq.rtype = IFENCAP_LL;
+ ereq.family = family;
+ ereq.lladdr = lladdr;
+ ereq.lladdr_len = ifp->if_addrlen;
+ error = ifp->if_requestencap(ifp, &ereq);
+ if (error == 0) {
+ *bufsize = ereq.bufsize;
+ *lladdr_off = ereq.lladdr_off;
+ }
+
+ return (error);
+}
+
+/*
+ * Update link-layer header for given @lle after
+ * interface lladdr was changed.
+ */
+static int
+llentry_update_ifaddr(struct lltable *llt, struct llentry *lle, void *farg)
+{
+ struct ifnet *ifp;
+ u_char linkhdr[LLE_MAX_LINKHDR];
+ size_t linkhdrsize;
+ u_char *lladdr;
+ int lladdr_off;
+
+ ifp = (struct ifnet *)farg;
+
+ lladdr = lle->ll_addr;
+
+ LLE_WLOCK(lle);
+ if ((lle->la_flags & LLE_VALID) == 0) {
+ LLE_WUNLOCK(lle);
+ return (0);
+ }
+
+ if ((lle->la_flags & LLE_IFADDR) != 0)
+ lladdr = IF_LLADDR(ifp);
+
+ linkhdrsize = sizeof(linkhdr);
+ lltable_calc_llheader(ifp, llt->llt_af, lladdr, linkhdr, &linkhdrsize,
+ &lladdr_off);
+ memcpy(lle->r_linkdata, linkhdr, linkhdrsize);
+ LLE_WUNLOCK(lle);
+
+ return (0);
+}
+
+/*
+ * Update all calculated headers for given @llt
+ */
+int
+lltable_update_ifaddr(struct lltable *llt)
+{
+ int error;
+
+ if (llt->llt_ifp->if_flags & IFF_LOOPBACK)
+ return (0);
+ error = 0;
+
+ IF_AFDATA_WLOCK(llt->llt_ifp);
+ lltable_foreach_lle(llt, llentry_update_ifaddr, llt->llt_ifp);
+ IF_AFDATA_WUNLOCK(llt->llt_ifp);
+
+ return (error);
+}
+
+/*
*
* Performes generic cleanup routines and frees lle.
*
@@ -601,6 +685,9 @@
struct ifnet *ifp;
struct lltable *llt;
struct llentry *lle, *lle_tmp;
+ uint8_t linkhdr[LLE_MAX_LINKHDR];
+ size_t linkhdrsize;
+ int lladdr_off;
u_int laflags = 0;
int error;
@@ -636,10 +723,14 @@
if (lle == NULL)
return (ENOMEM);
- bcopy(LLADDR(dl), &lle->ll_addr, ifp->if_addrlen);
+ linkhdrsize = sizeof(linkhdr);
+ if (lltable_calc_llheader(ifp, dst->sa_family, LLADDR(dl),
+ linkhdr, &linkhdrsize, &lladdr_off) != 0)
+ return (EINVAL);
+ lltable_set_entry_addr(ifp, lle, linkhdr, linkhdrsize,
+ lladdr_off);
if ((rtm->rtm_flags & RTF_ANNOUNCE))
lle->la_flags |= LLE_PUB;
- lle->la_flags |= LLE_VALID;
lle->la_expire = rtm->rtm_rmx.rmx_expire;
laflags = lle->la_flags;
@@ -734,7 +825,7 @@
db_printf(" ln_router=%u\n", lle->ln_router);
db_printf(" ln_ntick=%ju\n", (uintmax_t)lle->ln_ntick);
db_printf(" lle_refcnt=%d\n", lle->lle_refcnt);
- bcopy(&lle->ll_addr.mac16, octet, sizeof(octet));
+ bcopy(lle->ll_addr, octet, sizeof(octet));
db_printf(" ll_addr=%02x:%02x:%02x:%02x:%02x:%02x\n",
octet[0], octet[1], octet[2], octet[3], octet[4], octet[5]);
db_printf(" lle_timer=%p\n", &lle->lle_timer);
Index: sys/net/if_var.h
===================================================================
--- sys/net/if_var.h
+++ sys/net/if_var.h
@@ -126,6 +126,8 @@
u_int tsomaxsegsize; /* TSO maximum segment size in bytes */
};
+struct if_encap_req;
+
/*
* Structure defining a network interface.
*
@@ -227,6 +229,8 @@
void (*if_reassign) /* reassign to vnet routine */
(struct ifnet *, struct vnet *, char *);
if_get_counter_t if_get_counter; /* get counter values */
+ int (*if_requestencap) /* make link header from request */
+ (struct ifnet *, struct if_encap_req *);
/* Statistics. */
counter_u64_t if_counters[IFCOUNTERS];
Index: sys/net/route.h
===================================================================
--- sys/net/route.h
+++ sys/net/route.h
@@ -51,14 +51,21 @@
*/
struct route {
struct rtentry *ro_rt;
- struct llentry *ro_lle;
- struct in_ifaddr *ro_ia;
- int ro_flags;
+ char *ro_prepend;
+ uint16_t ro_plen;
+ uint16_t ro_flags;
struct sockaddr ro_dst;
};
+#define RT_L2_ME_BIT 2 /* dst L2 addr is our address */
+#define RT_MAY_LOOP_BIT 3 /* dst may require loop copy */
+#define RT_HAS_HEADER_BIT 4 /* mbuf already have its header prepended */
+
#define RT_CACHING_CONTEXT 0x1 /* XXX: not used anywhere */
#define RT_NORTREF 0x2 /* doesn't hold reference on ro_rt */
+#define RT_L2_ME (1 << RT_L2_ME_BIT)
+#define RT_MAY_LOOP (1 << RT_MAY_LOOP_BIT)
+#define RT_HAS_HEADER (1 << RT_HAS_HEADER_BIT)
struct rt_metrics {
u_long rmx_locks; /* Kernel must leave these values alone */
@@ -343,6 +350,27 @@
} \
} while (0)
+
+/* Encap request types */
+typedef enum {
+ IFENCAP_LL = 1 /* pre-calculate link-layer header */
+} ife_type;
+
+struct if_encap_req {
+ u_char *buf; /* Destination buffer */
+ size_t bufsize; /* pointer to size of provided buffer */
+ ife_type rtype; /* request type */
+ uint32_t flags; /* Request flags */
+ int family; /* Address family */
+ int lladdr_off; /* offset from header start (w) */
+ int lladdr_len; /* lladdr length */
+ char *lladdr; /* link-level address pointer */
+ char *hdata; /* Upper layer header data */
+};
+
+#define IFENCAP_FLAG_BROADCAST 0x02 /* Destination is broadcast */
+int if_requestencap_default(struct ifnet *ifp, struct if_encap_req *req);
+
struct radix_node_head *rt_tables_get_rnh(int, int);
struct ifmultiaddr;
Index: sys/net/route.c
===================================================================
--- sys/net/route.c
+++ sys/net/route.c
@@ -1119,6 +1119,25 @@
}
}
+/*
+ * Blank function for default encapsulation requests.
+ */
+int
+if_requestencap_default(struct ifnet *ifp, struct if_encap_req *req)
+{
+
+ if (req->rtype != IFENCAP_LL)
+ return (EOPNOTSUPP);
+
+ if (req->bufsize < req->lladdr_len)
+ return (ENOMEM);
+
+ /* Copy lladdr to storage as is */
+ memmove(req->buf, req->lladdr, req->lladdr_len);
+ req->lladdr_off = 0;
+
+ return (0);
+}
#if 0
int p_sockaddr(char *buf, int buflen, struct sockaddr *s);
Index: sys/netinet/if_ether.h
===================================================================
--- sys/netinet/if_ether.h
+++ sys/netinet/if_ether.h
@@ -114,6 +114,8 @@
struct ifaddr;
+int arpresolve_addr(struct ifnet *ifp, int flags,
+ const struct sockaddr *dst, char *desten, uint32_t *pflags);
int arpresolve(struct ifnet *ifp, int is_gw, struct mbuf *m,
const struct sockaddr *dst, u_char *desten, uint32_t *pflags);
void arprequest(struct ifnet *, const struct in_addr *,
Index: sys/netinet/if_ether.c
===================================================================
--- sys/netinet/if_ether.c
+++ sys/netinet/if_ether.c
@@ -142,7 +142,9 @@
static void arp_check_update_lle(struct arphdr *ah, struct in_addr isaddr,
struct ifnet *ifp, int bridged, struct llentry *la);
static void arp_mark_lle_reachable(struct llentry *la);
+static void arp_iflladdr(void *arg __unused, struct ifnet *ifp);
+static eventhandler_tag iflladdr_tag;
static const struct netisr_handler arp_nh = {
.nh_name = "arp",
@@ -218,6 +220,31 @@
CURVNET_RESTORE();
}
+static int
+arp_fillheader(struct ifnet *ifp, struct arphdr *ah, int bcast, u_char *buf,
+ size_t *bufsize)
+{
+ struct if_encap_req ereq;
+ int error;
+
+ bzero(buf, *bufsize);
+ bzero(&ereq, sizeof(ereq));
+ ereq.buf = buf;
+ ereq.bufsize = *bufsize;
+ ereq.rtype = IFENCAP_LL;
+ ereq.family = AF_ARP;
+ ereq.lladdr = ar_tha(ah);
+ ereq.hdata = (u_char *)ah;
+ if (bcast)
+ ereq.flags = IFENCAP_FLAG_BROADCAST;
+ error = ifp->if_requestencap(ifp, &ereq);
+ if (error == 0)
+ *bufsize = ereq.bufsize;
+
+ return (error);
+}
+
+
/*
* Broadcast an ARP request. Caller specifies:
* - arp header source ip address
@@ -232,6 +259,10 @@
struct arphdr *ah;
struct sockaddr sa;
u_char *carpaddr = NULL;
+ uint8_t linkhdr[LLE_MAX_LINKHDR];
+ size_t linkhdrsize;
+ struct route ro;
+ int error;
if (sip == NULL) {
/*
@@ -287,12 +318,27 @@
bcopy(tip, ar_tpa(ah), ah->ar_pln);
sa.sa_family = AF_ARP;
sa.sa_len = 2;
+
+ /* Calculate link header for sending frame */
+ bzero(&ro, sizeof(ro));
+ linkhdrsize = sizeof(linkhdr);
+ error = arp_fillheader(ifp, ah, 1, linkhdr, &linkhdrsize);
+ if (error != 0) {
+ if_printf(ifp, "Failed to calculate ARP header: %d\n", error);
+ return;
+ }
+
+ ro.ro_prepend = linkhdr;
+ ro.ro_plen = linkhdrsize;
+ ro.ro_flags = 0;
+
m->m_flags |= M_BCAST;
m_clrprotoflags(m); /* Avoid confusing lower layers. */
- (*ifp->if_output)(ifp, m, &sa, NULL);
+ (*ifp->if_output)(ifp, m, &sa, &ro);
ARPSTAT_INC(txrequests);
}
+
/*
* Resolve an IP address into an ethernet address - heavy version.
* Used internally by arpresolve().
@@ -305,18 +351,20 @@
* Note that m_freem() handles NULL.
*/
static int
-arpresolve_full(struct ifnet *ifp, int is_gw, int create, struct mbuf *m,
+arpresolve_full(struct ifnet *ifp, int is_gw, int flags, struct mbuf *m,
const struct sockaddr *dst, u_char *desten, uint32_t *pflags)
{
struct llentry *la = NULL, *la_tmp;
struct mbuf *curr = NULL;
struct mbuf *next = NULL;
int error, renew;
+ char *lladdr;
+ int ll_len;
if (pflags != NULL)
*pflags = 0;
- if (create == 0) {
+ if ((flags & LLE_CREATE) == 0) {
IF_AFDATA_RLOCK(ifp);
la = lla_lookup(LLTABLE(ifp), LLE_EXCLUSIVE, dst);
IF_AFDATA_RUNLOCK(ifp);
@@ -350,7 +398,14 @@
if ((la->la_flags & LLE_VALID) &&
((la->la_flags & LLE_STATIC) || la->la_expire > time_uptime)) {
- bcopy(&la->ll_addr, desten, ifp->if_addrlen);
+ if (flags & LLE_ADDRONLY) {
+ lladdr = la->ll_addr;
+ ll_len = ifp->if_addrlen;
+ } else {
+ lladdr = la->r_linkdata;
+ ll_len = la->r_hdrlen;
+ }
+ bcopy(lladdr, desten, ll_len);
renew = 0;
/*
* If entry has an expiry time and it is approaching,
@@ -364,7 +419,7 @@
}
if (pflags != NULL)
- *pflags = la->la_flags;
+ *pflags = la->la_flags & LLE_IFADDR;
LLE_WUNLOCK(la);
@@ -432,15 +487,30 @@
/*
* Resolve an IP address into an ethernet address.
+ */
+int
+arpresolve_addr(struct ifnet *ifp, int flags, const struct sockaddr *dst,
+ char *desten, uint32_t *pflags)
+{
+ int error;
+
+ flags |= LLE_ADDRONLY;
+ error = arpresolve_full(ifp, 0, flags, NULL, dst, desten, pflags);
+ return (error);
+}
+
+/*
+ * Lookups link header based on an IP address.
* On input:
* ifp is the interface we use
* is_gw != 0 if @dst represents gateway to some destination
* m is the mbuf. May be NULL if we don't have a packet.
* dst is the next hop,
- * desten is the storage to put LL address.
+ * desten is the storage to put LL header.
* flags returns lle entry flags.
*
- * On success, desten and flags are filled in and the function returns 0;
+ * On success, full/partial link header and flags are filled in and
+ * the function returns 0.
* If the packet must be held pending resolution, we return EWOULDBLOCK
* On other errors, we return the corresponding error code.
* Note that m_freem() handles NULL.
@@ -474,11 +544,12 @@
IF_AFDATA_RUNLOCK(ifp);
if (la == NULL)
- return (arpresolve_full(ifp, is_gw, 1, m, dst, desten, pflags));
+ return (arpresolve_full(ifp, is_gw, LLE_CREATE, m, dst, desten,
+ pflags));
if ((la->la_flags & LLE_VALID) &&
((la->la_flags & LLE_STATIC) || la->la_expire > time_uptime)) {
- bcopy(&la->ll_addr, desten, ifp->if_addrlen);
+ bcopy(la->r_linkdata, desten, la->r_hdrlen);
renew = 0;
/*
* If entry has an expiry time and it is approaching,
@@ -492,7 +563,7 @@
}
if (pflags != NULL)
- *pflags = la->la_flags;
+ *pflags = la->la_flags & LLE_IFADDR;
LLE_RUNLOCK(la);
@@ -503,7 +574,7 @@
}
LLE_RUNLOCK(la);
- return (arpresolve_full(ifp, is_gw, 0, m, dst, desten, pflags));
+ return (arpresolve_full(ifp, is_gw, LLE_CREATE, m, dst, desten,pflags));
}
/*
@@ -647,6 +718,13 @@
int carped;
struct sockaddr_in sin;
struct sockaddr *dst;
+ uint8_t linkhdr[LLE_MAX_LINKHDR];
+ size_t linkhdrsize;
+ int lladdr_off;
+ struct route ro;
+ int error;
+
+
sin.sin_len = sizeof(struct sockaddr_in);
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = 0;
@@ -811,14 +889,19 @@
if (la != NULL)
arp_check_update_lle(ah, isaddr, ifp, bridged, la);
else if (itaddr.s_addr == myaddr.s_addr) {
- /*
- * Reply to our address, but no lle exists yet.
- * do we really have to create an entry?
- */
+ /* Reply to our address, but no lle exists yet. */
+ /* Calculate full link prepend to use in lle */
+ linkhdrsize = sizeof(linkhdr);
+ if (lltable_calc_llheader(ifp, AF_INET, ar_sha(ah), linkhdr,
+ &linkhdrsize, &lladdr_off) != 0)
+ goto drop;
+
+ /* Allocate new entry */
la = lltable_alloc_entry(LLTABLE(ifp), 0, dst);
if (la == NULL)
goto drop;
- lltable_set_entry_addr(ifp, la, ar_sha(ah));
+ lltable_set_entry_addr(ifp, la, linkhdr, linkhdrsize,
+ lladdr_off);
IF_AFDATA_WLOCK(ifp);
LLE_WLOCK(la);
@@ -876,7 +959,7 @@
if ((lle != NULL) && (lle->la_flags & LLE_PUB)) {
(void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln);
- (void)memcpy(ar_sha(ah), &lle->ll_addr, ah->ar_hln);
+ (void)memcpy(ar_sha(ah), lle->ll_addr, ah->ar_hln);
LLE_RUNLOCK(lle);
} else {
@@ -956,8 +1039,31 @@
m->m_pkthdr.rcvif = NULL;
sa.sa_family = AF_ARP;
sa.sa_len = 2;
+
+ /* Calculate link header for sending frame */
+ bzero(&ro, sizeof(ro));
+ linkhdrsize = sizeof(linkhdr);
+ error = arp_fillheader(ifp, ah, 0, linkhdr, &linkhdrsize);
+
+ /*
+ * arp_fillheader() may fail due to lack of support inside encap request
+ * routing. This is not necessary an error, AF_ARP can/should be handled
+ * ny if_output().
+ */
+ if (error != 0 && error != EAFNOSUPPORT) {
+ printf("Failed to calculate ARP header: %d\n", error);
+ goto drop;
+ }
+
+ if (error == 0) {
+ ro.ro_prepend = linkhdr;
+ ro.ro_plen = linkhdrsize;
+ ro.ro_flags = 0;
+
+ }
+
m_clrprotoflags(m); /* Avoid confusing lower layers. */
- (*ifp->if_output)(ifp, m, &sa, NULL);
+ (*ifp->if_output)(ifp, m, &sa, &ro);
ARPSTAT_INC(txreplies);
return;
@@ -976,6 +1082,9 @@
{
struct sockaddr sa;
struct mbuf *m_hold, *m_hold_next;
+ uint8_t linkhdr[LLE_MAX_LINKHDR];
+ size_t linkhdrsize;
+ int lladdr_off;
LLE_WLOCK_ASSERT(la);
@@ -992,7 +1101,7 @@
return;
}
if ((la->la_flags & LLE_VALID) &&
- bcmp(ar_sha(ah), &la->ll_addr, ifp->if_addrlen)) {
+ bcmp(ar_sha(ah), la->ll_addr, ifp->if_addrlen)) {
if (la->la_flags & LLE_STATIC) {
LLE_WUNLOCK(la);
if (log_arp_permanent_modify)
@@ -1015,8 +1124,14 @@
}
}
+ /* Calculate full link prepend to use in lle */
+ linkhdrsize = sizeof(linkhdr);
+ if (lltable_calc_llheader(ifp, AF_INET, ar_sha(ah), linkhdr,
+ &linkhdrsize, &lladdr_off) != 0)
+ return;
+
/* Check if something has changed */
- if (memcmp(&la->ll_addr, ar_sha(ah), ifp->if_addrlen) != 0 ||
+ if (memcmp(la->r_linkdata, linkhdr, linkhdrsize) != 0 ||
(la->la_flags & LLE_VALID) == 0) {
/* Perform real LLE update */
/* use afdata WLOCK to update fields */
@@ -1036,7 +1151,8 @@
}
/* Update data */
- lltable_set_entry_addr(ifp, la, ar_sha(ah));
+ lltable_set_entry_addr(ifp, la, linkhdr, linkhdrsize,
+ lladdr_off);
IF_AFDATA_WUNLOCK(ifp);
LLE_REMREF(la);
@@ -1150,10 +1266,23 @@
ifa->ifa_rtrequest = NULL;
}
+/*
+ * A handler for interface link layer address change event.
+ */
+static __noinline void
+arp_iflladdr(void *arg __unused, struct ifnet *ifp)
+{
+
+ lltable_update_ifaddr(LLTABLE(ifp));
+}
+
static void
arp_init(void)
{
netisr_register(&arp_nh);
+ if (IS_DEFAULT_VNET(curvnet))
+ iflladdr_tag = EVENTHANDLER_REGISTER(iflladdr_event,
+ arp_iflladdr, NULL, EVENTHANDLER_PRI_ANY);
}
SYSINIT(arp, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, arp_init, 0);
Index: sys/netinet/in.c
===================================================================
--- sys/netinet/in.c
+++ sys/netinet/in.c
@@ -1238,6 +1238,9 @@
const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr;
struct ifnet *ifp = llt->llt_ifp;
struct llentry *lle;
+ char linkhdr[LLE_MAX_LINKHDR];
+ size_t linkhdrsize;
+ int lladdr_off;
KASSERT(l3addr->sa_family == AF_INET,
("sin_family %d", l3addr->sa_family));
@@ -1258,7 +1261,12 @@
}
lle->la_flags = flags;
if ((flags & LLE_IFADDR) == LLE_IFADDR) {
- lltable_set_entry_addr(ifp, lle, IF_LLADDR(ifp));
+ linkhdrsize = LLE_MAX_LINKHDR;
+ if (lltable_calc_llheader(ifp, AF_INET, IF_LLADDR(ifp),
+ linkhdr, &linkhdrsize, &lladdr_off) != 0)
+ return (NULL);
+ lltable_set_entry_addr(ifp, lle, linkhdr, linkhdrsize,
+ lladdr_off);
lle->la_flags |= LLE_STATIC;
}
@@ -1337,7 +1345,7 @@
sdl->sdl_type = ifp->if_type;
if ((lle->la_flags & LLE_VALID) == LLE_VALID) {
sdl->sdl_alen = ifp->if_addrlen;
- bcopy(&lle->ll_addr, LLADDR(sdl), ifp->if_addrlen);
+ bcopy(lle->ll_addr, LLADDR(sdl), ifp->if_addrlen);
} else {
sdl->sdl_alen = 0;
bzero(LLADDR(sdl), ifp->if_addrlen);
Index: sys/netinet/ip_output.c
===================================================================
--- sys/netinet/ip_output.c
+++ sys/netinet/ip_output.c
@@ -567,7 +567,7 @@
RO_RTFREE(ro);
if (have_ia_ref)
ifa_free(&ia->ia_ifa);
- ro->ro_lle = NULL;
+ ro->ro_prepend = NULL;
rte = NULL;
gw = dst;
ip = mtod(m, struct ip *);
Index: sys/netinet/toecore.c
===================================================================
--- sys/netinet/toecore.c
+++ sys/netinet/toecore.c
@@ -428,7 +428,7 @@
KASSERT(lle->la_flags & LLE_VALID,
("%s: %p resolved but not valid?", __func__, lle));
- lladdr = (uint8_t *)&lle->ll_addr;
+ lladdr = (uint8_t *)lle->ll_addr;
#ifdef VLAN_TAG
VLAN_TAG(ifp, &vtag);
#endif
Index: sys/netinet6/icmp6.c
===================================================================
--- sys/netinet6/icmp6.c
+++ sys/netinet6/icmp6.c
@@ -2641,7 +2641,7 @@
nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
nd_opt->nd_opt_len = len >> 3;
lladdr = (char *)(nd_opt + 1);
- bcopy(&ln->ll_addr, lladdr, ifp->if_addrlen);
+ bcopy(ln->ll_addr, lladdr, ifp->if_addrlen);
p += len;
}
}
Index: sys/netinet6/in6.h
===================================================================
--- sys/netinet6/in6.h
+++ sys/netinet6/in6.h
@@ -375,9 +375,9 @@
#if __BSD_VISIBLE
struct route_in6 {
struct rtentry *ro_rt;
- struct llentry *ro_lle;
- struct in6_addr *ro_ia6;
- int ro_flags;
+ char *ro_prepend;
+ uint16_t ro_plen;
+ uint16_t ro_flags;
struct sockaddr_in6 ro_dst;
};
#endif
Index: sys/netinet6/in6.c
===================================================================
--- sys/netinet6/in6.c
+++ sys/netinet6/in6.c
@@ -2241,6 +2241,9 @@
const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)l3addr;
struct ifnet *ifp = llt->llt_ifp;
struct llentry *lle;
+ char linkhdr[LLE_MAX_LINKHDR];
+ size_t linkhdrsize;
+ int lladdr_off;
KASSERT(l3addr->sa_family == AF_INET6,
("sin_family %d", l3addr->sa_family));
@@ -2261,7 +2264,12 @@
}
lle->la_flags = flags;
if ((flags & LLE_IFADDR) == LLE_IFADDR) {
- lltable_set_entry_addr(ifp, lle, IF_LLADDR(ifp));
+ linkhdrsize = LLE_MAX_LINKHDR;
+ if (lltable_calc_llheader(ifp, AF_INET6, IF_LLADDR(ifp),
+ linkhdr, &linkhdrsize, &lladdr_off) != 0)
+ return (NULL);
+ lltable_set_entry_addr(ifp, lle, linkhdr, linkhdrsize,
+ lladdr_off);
lle->la_flags |= LLE_STATIC;
}
@@ -2348,7 +2356,7 @@
sdl->sdl_alen = ifp->if_addrlen;
sdl->sdl_index = ifp->if_index;
sdl->sdl_type = ifp->if_type;
- bcopy(&lle->ll_addr, LLADDR(sdl), ifp->if_addrlen);
+ bcopy(lle->ll_addr, LLADDR(sdl), ifp->if_addrlen);
ndpc.rtm.rtm_rmx.rmx_expire =
lle->la_flags & LLE_STATIC ? 0 : lle->la_expire;
ndpc.rtm.rtm_flags |= (RTF_HOST | RTF_LLDATA);
Index: sys/netinet6/nd6.h
===================================================================
--- sys/netinet6/nd6.h
+++ sys/netinet6/nd6.h
@@ -410,6 +410,8 @@
void nd6_llinfo_setstate(struct llentry *lle, int newstate);
void nd6_timer(void *);
void nd6_purge(struct ifnet *);
+int nd6_resolve_addr(struct ifnet *ifp, int flags, const struct sockaddr *dst,
+ char *desten, uint32_t *pflags);
int nd6_resolve(struct ifnet *, int, struct mbuf *,
const struct sockaddr *, u_char *, uint32_t *);
int nd6_ioctl(u_long, caddr_t, struct ifnet *);
Index: sys/netinet6/nd6.c
===================================================================
--- sys/netinet6/nd6.c
+++ sys/netinet6/nd6.c
@@ -111,7 +111,7 @@
VNET_DEFINE(int, nd6_debug) = 0;
#endif
-static eventhandler_tag lle_event_eh;
+static eventhandler_tag lle_event_eh, iflladdr_event_eh;
/* for debugging? */
#if 0
@@ -137,7 +137,7 @@
static void nd6_llinfo_settimer_locked(struct llentry *, long);
static void clear_llinfo_pqueue(struct llentry *);
static void nd6_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
-static int nd6_resolve_slow(struct ifnet *, struct mbuf *,
+static int nd6_resolve_slow(struct ifnet *, int, struct mbuf *,
const struct sockaddr_in6 *, u_char *, uint32_t *);
static int nd6_need_cache(struct ifnet *);
@@ -188,7 +188,7 @@
gw.sdl_index = ifp->if_index;
gw.sdl_type = ifp->if_type;
if (evt == LLENTRY_RESOLVED)
- bcopy(&lle->ll_addr, gw.sdl_data, ifp->if_addrlen);
+ bcopy(lle->ll_addr, gw.sdl_data, ifp->if_addrlen);
rtinfo.rti_info[RTAX_DST] = (struct sockaddr *)&dst;
rtinfo.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&gw;
rtinfo.rti_addrs = RTA_DST | RTA_GATEWAY;
@@ -196,6 +196,16 @@
type == RTM_ADD ? RTF_UP: 0), 0, RT_DEFAULT_FIB);
}
+/*
+ * A handler for interface link layer address change event.
+ */
+static __noinline void
+nd6_iflladdr(void *arg __unused, struct ifnet *ifp)
+{
+
+ lltable_update_ifaddr(LLTABLE6(ifp));
+}
+
void
nd6_init(void)
{
@@ -211,9 +221,12 @@
nd6_slowtimo, curvnet);
nd6_dad_init();
- if (IS_DEFAULT_VNET(curvnet))
+ if (IS_DEFAULT_VNET(curvnet)) {
lle_event_eh = EVENTHANDLER_REGISTER(lle_event, nd6_lle_event,
NULL, EVENTHANDLER_PRI_ANY);
+ iflladdr_event_eh = EVENTHANDLER_REGISTER(iflladdr_event,
+ nd6_iflladdr, NULL, EVENTHANDLER_PRI_ANY);
+ }
}
#ifdef VIMAGE
@@ -223,8 +236,10 @@
callout_drain(&V_nd6_slowtimo_ch);
callout_drain(&V_nd6_timer_ch);
- if (IS_DEFAULT_VNET(curvnet))
+ if (IS_DEFAULT_VNET(curvnet)) {
EVENTHANDLER_DEREGISTER(lle_event, lle_event_eh);
+ EVENTHANDLER_DEREGISTER(iflladdr_event, iflladdr_event_eh);
+ }
}
#endif
@@ -1704,6 +1719,9 @@
uint16_t router = 0;
struct sockaddr_in6 sin6;
struct mbuf *chain = NULL;
+ u_char linkhdr[LLE_MAX_LINKHDR];
+ size_t linkhdrsize;
+ int lladdr_off;
IF_AFDATA_UNLOCK_ASSERT(ifp);
@@ -1738,8 +1756,15 @@
* Since we already know all the data for the new entry,
* fill it before insertion.
*/
- if (lladdr != NULL)
- lltable_set_entry_addr(ifp, ln, lladdr);
+ if (lladdr != NULL) {
+ linkhdrsize = sizeof(linkhdr);
+ if (lltable_calc_llheader(ifp, AF_INET6, lladdr,
+ linkhdr, &linkhdrsize, &lladdr_off) != 0)
+ return;
+ lltable_set_entry_addr(ifp, ln, linkhdr, linkhdrsize,
+ lladdr_off);
+ }
+
IF_AFDATA_WLOCK(ifp);
LLE_WLOCK(ln);
/* Prefer any existing lle over newly-created one */
@@ -1771,7 +1796,7 @@
olladdr = (ln->la_flags & LLE_VALID) ? 1 : 0;
if (olladdr && lladdr) {
- llchange = bcmp(lladdr, &ln->ll_addr,
+ llchange = bcmp(lladdr, ln->ll_addr,
ifp->if_addrlen);
} else if (!olladdr && lladdr)
llchange = 1;
@@ -1797,7 +1822,13 @@
* Record source link-layer address
* XXX is it dependent to ifp->if_type?
*/
- lltable_set_entry_addr(ifp, ln, lladdr);
+ linkhdrsize = sizeof(linkhdr);
+ if (lltable_calc_llheader(ifp, AF_INET6, lladdr,
+ linkhdr, &linkhdrsize, &lladdr_off) != 0)
+ return;
+ lltable_set_entry_addr(ifp, ln, linkhdr, linkhdrsize,
+ lladdr_off);
+
nd6_llinfo_setstate(ln, ND6_LLINFO_STALE);
EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED);
@@ -1949,8 +1980,8 @@
}
/*
- * Do L2 address resolution for @sa_dst address. Stores found
- * address in @desten buffer. Copy of lle ln_flags can be also
+ * Lookup link headerfor @sa_dst address. Stores found
+ * data in @desten buffer. Copy of lle ln_flags can be also
* saved in @pflags if @pflags is non-NULL.
*
* If destination LLE does not exists or lle state modification
@@ -2013,13 +2044,13 @@
/* Fall back to slow processing path */
if (ln != NULL)
LLE_RUNLOCK(ln);
- return (nd6_resolve_slow(ifp, m, dst6, desten, pflags));
+ return (nd6_resolve_slow(ifp, 0, m, dst6, desten, pflags));
}
- bcopy(&ln->ll_addr, desten, ifp->if_addrlen);
+ bcopy(ln->r_linkdata, desten, ln->r_hdrlen);
if (pflags != NULL)
- *pflags = ln->la_flags;
+ *pflags = ln->la_flags & LLE_IFADDR;
LLE_RUNLOCK(ln);
return (0);
}
@@ -2037,12 +2068,13 @@
* Set noinline to be dtrace-friendly
*/
static __noinline int
-nd6_resolve_slow(struct ifnet *ifp, struct mbuf *m,
+nd6_resolve_slow(struct ifnet *ifp, int flags, struct mbuf *m,
const struct sockaddr_in6 *dst, u_char *desten, uint32_t *pflags)
{
struct llentry *lle = NULL, *lle_tmp;
struct in6_addr *psrc, src;
- int send_ns;
+ int send_ns, ll_len;
+ char *lladdr;
/*
* Address resolution or Neighbor Unreachability Detection
@@ -2114,7 +2146,14 @@
* send the packet.
*/
if (lle->ln_state > ND6_LLINFO_INCOMPLETE) {
- bcopy(&lle->ll_addr, desten, ifp->if_addrlen);
+ if (flags & LLE_ADDRONLY) {
+ lladdr = lle->ll_addr;
+ ll_len = ifp->if_addrlen;
+ } else {
+ lladdr = lle->r_linkdata;
+ ll_len = lle->r_hdrlen;
+ }
+ bcopy(lladdr, desten, ll_len);
if (pflags != NULL)
*pflags = lle->la_flags;
LLE_WUNLOCK(lle);
@@ -2174,6 +2213,27 @@
return (EWOULDBLOCK);
}
+/*
+ * Do L2 address resolution for @sa_dst address. Stores found
+ * address in @desten buffer. Copy of lle ln_flags can be also
+ * saved in @pflags if @pflags is non-NULL.
+ *
+ * Return values:
+ * - 0 on success (address copied to buffer).
+ * - EWOULDBLOCK (no local error, but address is still unresolved)
+ * - other errors (alloc failure, etc)
+ */
+int
+nd6_resolve_addr(struct ifnet *ifp, int flags, const struct sockaddr *dst,
+ char *desten, uint32_t *pflags)
+{
+ int error;
+
+ flags |= LLE_ADDRONLY;
+ error = nd6_resolve_slow(ifp, flags, NULL,
+ (const struct sockaddr_in6 *)dst, desten, pflags);
+ return (error);
+}
int
nd6_flush_holdchain(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *chain,
Index: sys/netinet6/nd6_nbr.c
===================================================================
--- sys/netinet6/nd6_nbr.c
+++ sys/netinet6/nd6_nbr.c
@@ -643,6 +643,9 @@
union nd_opts ndopts;
struct mbuf *chain = NULL;
struct sockaddr_in6 sin6;
+ u_char linkhdr[LLE_MAX_LINKHDR];
+ size_t linkhdrsize;
+ int lladdr_off;
char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
if (ip6->ip6_hlim != 255) {
@@ -765,7 +768,13 @@
/*
* Record link-layer address, and update the state.
*/
- lltable_set_entry_addr(ifp, ln, lladdr);
+ linkhdrsize = sizeof(linkhdr);
+ if (lltable_calc_llheader(ifp, AF_INET6, lladdr,
+ linkhdr, &linkhdrsize, &lladdr_off) != 0)
+ return;
+ lltable_set_entry_addr(ifp, ln, linkhdr, linkhdrsize,
+ lladdr_off);
+
EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED);
if (is_solicited)
nd6_llinfo_setstate(ln, ND6_LLINFO_REACHABLE);
@@ -789,7 +798,7 @@
llchange = 0;
else {
if (ln->la_flags & LLE_VALID) {
- if (bcmp(lladdr, &ln->ll_addr, ifp->if_addrlen))
+ if (bcmp(lladdr, ln->ll_addr, ifp->if_addrlen))
llchange = 1;
else
llchange = 0;
@@ -831,7 +840,13 @@
* Update link-local address, if any.
*/
if (lladdr != NULL) {
- lltable_set_entry_addr(ifp, ln, lladdr);
+ linkhdrsize = sizeof(linkhdr);
+ if (lltable_calc_llheader(ifp, AF_INET6, lladdr,
+ linkhdr, &linkhdrsize, &lladdr_off) != 0)
+ return;
+ lltable_set_entry_addr(ifp, ln, linkhdr, linkhdrsize,
+ lladdr_off);
+
EVENTHANDLER_INVOKE(lle_event, ln,
LLENTRY_RESOLVED);
}
Index: sys/ofed/drivers/infiniband/core/addr.c
===================================================================
--- sys/ofed/drivers/infiniband/core/addr.c
+++ sys/ofed/drivers/infiniband/core/addr.c
@@ -281,8 +281,6 @@
RTFREE_LOCKED(rte);
return -EHOSTUNREACH;
}
- if (rte->rt_flags & RTF_GATEWAY)
- is_gw = 1;
/*
* If it's not multicast or broadcast and the route doesn't match the
* requested interface return unreachable. Otherwise fetch the
@@ -325,20 +323,18 @@
* Resolve the link local address.
*/
switch (dst_in->sa_family) {
-#ifdef INET
case AF_INET:
- error = arpresolve(ifp, is_gw, NULL, dst_in, edst, NULL);
+ error = arpresolve_addr(ifp, 0, dst_in, edst, NULL);
break;
-#endif
-#ifdef INET6
case AF_INET6:
- error = nd6_resolve(ifp, is_gw, NULL, dst_in, edst, NULL);
+ error = nd6_resolve_addr(ifp, 0, dst_in, edst, NULL);
break;
-#endif
default:
/* XXX: Shouldn't happen. */
error = -EINVAL;
}
+ if (error == EHOSTDOWN && (rte->rt_flags & RTF_GATEWAY))
+ error = EHOSTUNREACH;
RTFREE(rte);
if (error == 0) {
memcpy(src_in, ifa->ifa_addr, ip_addr_size(ifa->ifa_addr));
Index: sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c
===================================================================
--- sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -36,6 +36,7 @@
static int ipoib_resolvemulti(struct ifnet *, struct sockaddr **,
struct sockaddr *);
+static int ipoib_requestencap(struct ifnet *, struct if_encap_req *);
#include <linux/module.h>
@@ -876,6 +877,7 @@
dev->if_output = ipoib_output;
dev->if_input = ipoib_input;
dev->if_resolvemulti = ipoib_resolvemulti;
+ dev->if_requestencap = ipoib_requestencap;
dev->if_baudrate = IF_Gbps(10);
dev->if_broadcastaddr = priv->broadcastaddr;
dev->if_snd.ifq_maxlen = ipoib_sendq_size * 2;
@@ -1249,61 +1251,33 @@
destroy_workqueue(ipoib_workqueue);
}
-/*
- * Infiniband output routine.
- */
static int
-ipoib_output(struct ifnet *ifp, struct mbuf *m,
- const struct sockaddr *dst, struct route *ro)
+ipoib_requestencap(struct ifnet *ifp, struct if_encap_req *req)
{
- u_char edst[INFINIBAND_ALEN];
- struct llentry *lle = NULL;
- struct rtentry *rt0 = NULL;
- struct ipoib_header *eh;
- int error = 0, is_gw = 0;
+ struct ipoib_header *ih;
+ struct arphdr *ah;
short type;
+ const char *lladdr;
- if (ro != NULL) {
- if (!(m->m_flags & (M_BCAST | M_MCAST)))
- lle = ro->ro_lle;
- rt0 = ro->ro_rt;
- if (rt0 != NULL && (rt0->rt_flags & RTF_GATEWAY) != 0)
- is_gw = 1;
- }
-#ifdef MAC
- error = mac_ifnet_check_transmit(ifp, m);
- if (error)
- goto bad;
-#endif
+ if (req->rtype != IFENCAP_LL)
+ return (EOPNOTSUPP);
- M_PROFILE(m);
- if (ifp->if_flags & IFF_MONITOR) {
- error = ENETDOWN;
- goto bad;
- }
- if (!((ifp->if_flags & IFF_UP) &&
- (ifp->if_drv_flags & IFF_DRV_RUNNING))) {
- error = ENETDOWN;
- goto bad;
- }
+ if (req->bufsize < sizeof(struct ipoib_header))
+ return (ENOMEM);
- switch (dst->sa_family) {
-#ifdef INET
+ ih = (struct ipoib_header *)req->buf;
+ lladdr = req->lladdr;
+ req->lladdr_off = offsetof(struct ipoib_header, hwaddr);
+
+ switch (req->family) {
case AF_INET:
- if (lle != NULL && (lle->la_flags & LLE_VALID))
- memcpy(edst, &lle->ll_addr.mac8, sizeof(edst));
- else if (m->m_flags & M_MCAST)
- ip_ib_mc_map(((struct sockaddr_in *)dst)->sin_addr.s_addr, ifp->if_broadcastaddr, edst);
- else
- error = arpresolve(ifp, is_gw, m, dst, edst, NULL);
- if (error)
- return (error == EWOULDBLOCK ? 0 : error);
type = htons(ETHERTYPE_IP);
break;
+ case AF_INET6:
+ type = htons(ETHERTYPE_IPV6);
+ break;
case AF_ARP:
- {
- struct arphdr *ah;
- ah = mtod(m, struct arphdr *);
+ ah = (struct arphdr *)req->hdata;
ah->ar_hrd = htons(ARPHRD_INFINIBAND);
switch(ntohs(ah->ar_op)) {
@@ -1318,46 +1292,147 @@
break;
}
- if (m->m_flags & M_BCAST)
- bcopy(ifp->if_broadcastaddr, edst, INFINIBAND_ALEN);
- else
- bcopy(ar_tha(ah), edst, INFINIBAND_ALEN);
-
+ if (req->flags & IFENCAP_FLAG_BROADCAST)
+ lladdr = ifp->if_broadcastaddr;
+ break;
+ default:
+ return (EAFNOSUPPORT);
}
- break;
+
+ memcpy(&ih->proto , &type, sizeof(ih->proto));
+ memcpy(ih->hwaddr, lladdr, INFINIBAND_ALEN);
+ req->bufsize = sizeof(struct ipoib_header);
+
+ return (0);
+}
+
+static inline int
+ipoib_resolve_addr(struct ifnet *ifp, struct mbuf *m,
+ const struct sockaddr *dst, struct route *ro, char *phdr,
+ uint32_t *pflags)
+{
+ struct ipoib_header *ih;
+ uint32_t lleflags = 0;
+ struct rtentry *rt;
+ short type;
+ int error = 0;
+
+ ih = (struct ipoib_header *)phdr;
+
+ switch (dst->sa_family) {
+#ifdef INET
+ case AF_INET:
+ if ((m->m_flags & (M_BCAST | M_MCAST)) == 0)
+ error = arpresolve(ifp, 0, m, dst, phdr, &lleflags);
+ else {
+ if (m->m_flags & M_BCAST)
+ memcpy(&ih->hwaddr, ifp->if_broadcastaddr,
+ INFINIBAND_ALEN);
+ else {
+ const struct in_addr *a;
+ a = &(((const struct sockaddr_in *)dst)->sin_addr);
+ ip_ib_mc_map(a->s_addr, ifp->if_broadcastaddr,
+ (char *)&ih->hwaddr);
+ }
+ type = htons(ETHERTYPE_IP);
+ memcpy(&ih->proto, &type, sizeof(ih->proto));
+ }
+ break;
#endif
#ifdef INET6
case AF_INET6:
- if (lle != NULL && (lle->la_flags & LLE_VALID))
- memcpy(edst, &lle->ll_addr.mac8, sizeof(edst));
- else if (m->m_flags & M_MCAST)
- ipv6_ib_mc_map(&((struct sockaddr_in6 *)dst)->sin6_addr, ifp->if_broadcastaddr, edst);
- else
- error = nd6_resolve(ifp, is_gw, m, dst, edst, NULL);
- if (error)
- return error;
- type = htons(ETHERTYPE_IPV6);
+ if ((m->m_flags & M_MCAST) == 0)
+ error = nd6_resolve(ifp, 0, m, dst, phdr, &lleflags);
+ else {
+ const struct in6_addr *a6;
+ a6 = &(((const struct sockaddr_in6 *)dst)->sin6_addr);
+ ipv6_ib_mc_map(a6, ifp->if_broadcastaddr,
+ (char *)&ih->hwaddr);
+ type = htons(ETHERTYPE_IPV6);
+ memcpy(&ih->proto, &type, sizeof(ih->proto));
+ }
break;
#endif
-
default:
if_printf(ifp, "can't handle af%d\n", dst->sa_family);
- error = EAFNOSUPPORT;
+ if (m != NULL)
+ m_freem(m);
+ return (EAFNOSUPPORT);
+ }
+
+ if (error == EHOSTDOWN) {
+ rt = (ro != NULL) ? ro->ro_rt : NULL;
+ if (rt != NULL && (rt->rt_flags & RTF_GATEWAY) != 0)
+ error = EHOSTUNREACH;
+ }
+
+ if (error != 0)
+ return (error);
+
+ *pflags = ((!!(lleflags & LLE_IFADDR)) << RT_L2_ME_BIT) | RT_MAY_LOOP;
+
+ return (0);
+}
+
+/*
+ * Infiniband output routine.
+ */
+static int
+ipoib_output(struct ifnet *ifp, struct mbuf *m,
+ const struct sockaddr *dst, struct route *ro)
+{
+ char linkhdr[IPOIB_HEADER_LEN], *phdr;
+ struct ipoib_header *ih;
+ int hlen; /* link layer header length */
+ int error = 0;
+ uint32_t pflags;
+
+ phdr = NULL;
+ pflags = 0;
+ if (ro != NULL) {
+ phdr = ro->ro_prepend;
+ hlen = ro->ro_plen;
+ pflags = ro->ro_flags;
+ }
+#ifdef MAC
+ error = mac_ifnet_check_transmit(ifp, m);
+ if (error)
+ goto bad;
+#endif
+
+ M_PROFILE(m);
+ if (ifp->if_flags & IFF_MONITOR) {
+ error = ENETDOWN;
+ goto bad;
+ }
+ if (!((ifp->if_flags & IFF_UP) &&
+ (ifp->if_drv_flags & IFF_DRV_RUNNING))) {
+ error = ENETDOWN;
goto bad;
}
+ if (phdr == NULL) {
+ /* No prepend data supplied. Try to calculate ourselves. */
+ phdr = linkhdr;
+ hlen = IPOIB_HEADER_LEN;
+ error = ipoib_resolve_addr(ifp, m, dst, ro, phdr, &pflags);
+ if (error != 0)
+ return (error == EWOULDBLOCK ? 0 : error);
+ }
+
/*
* Add local net header. If no space in first mbuf,
* allocate another.
*/
- M_PREPEND(m, IPOIB_HEADER_LEN, M_NOWAIT);
+ M_PREPEND(m, hlen, M_NOWAIT);
if (m == NULL) {
error = ENOBUFS;
goto bad;
}
- eh = mtod(m, struct ipoib_header *);
- (void)memcpy(&eh->proto, &type, sizeof(eh->proto));
- (void)memcpy(&eh->hwaddr, edst, sizeof (edst));
+ if ((pflags & RT_HAS_HEADER) == 0) {
+ ih = mtod(m, struct ipoib_header *);
+ memcpy(ih, phdr, hlen);
+ }
/*
* Queue message on interface, update output statistics if
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Feb 18, 4:25 AM (18 h, 14 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
28836916
Default Alt Text
D4102.id10015.diff (52 KB)
Attached To
Mode
D4102: Add link header precomputation for ethernet/infiniband. Make arp/ndp/bpf/flowtable use it.
Attached
Detach File
Event Timeline
Log In to Comment