Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F145597365
D26254.id78536.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
57 KB
Referenced Files
None
Subscribers
None
D26254.id78536.diff
View Options
Index: sbin/ifconfig/ifconfig.8
===================================================================
--- sbin/ifconfig/ifconfig.8
+++ sbin/ifconfig/ifconfig.8
@@ -28,7 +28,7 @@
.\" From: @(#)ifconfig.8 8.3 (Berkeley) 1/5/94
.\" $FreeBSD$
.\"
-.Dd September 17, 2020
+.Dd October 21, 2020
.Dt IFCONFIG 8
.Os
.Sh NAME
@@ -2481,6 +2481,12 @@
.Pp
The following parameters are specific to lagg interfaces:
.Bl -tag -width indent
+.It Cm laggtype Ar type
+When creating a lagg interface the type can be specified as either
+.Cm ethernet
+or
+.Cm infiniband .
+If not specified ethernet is the default lagg type.
.It Cm laggport Ar interface
Add the interface named by
.Ar interface
Index: sbin/ifconfig/iflagg.c
===================================================================
--- sbin/ifconfig/iflagg.c
+++ sbin/ifconfig/iflagg.c
@@ -30,8 +30,12 @@
#include "ifconfig.h"
-char lacpbuf[120]; /* LACP peer '[(a,a,a),(p,p,p)]' */
+static struct iflaggparam params = {
+ .lagg_type = LAGG_TYPE_DEFAULT,
+};
+static char lacpbuf[120]; /* LACP peer '[(a,a,a),(p,p,p)]' */
+
static void
setlaggport(const char *val, int d, int s, const struct afswtch *afp)
{
@@ -301,7 +305,31 @@
}
}
+static
+DECL_CMD_FUNC(setlaggtype, arg, d)
+{
+ static const struct lagg_types lt[] = LAGG_TYPES;
+ int i;
+
+ for (i = 0; i < nitems(lt); i++) {
+ if (strcmp(arg, lt[i].lt_name) == 0) {
+ params.lagg_type = lt[i].lt_value;
+ return;
+ }
+ }
+ errx(1, "invalid lagg type: %s", arg);
+}
+
+static void
+lagg_create(int s, struct ifreq *ifr)
+{
+ ifr->ifr_data = (caddr_t) ¶ms;
+ if (ioctl(s, SIOCIFCREATE2, ifr) < 0)
+ err(1, "SIOCIFCREATE2");
+}
+
static struct cmd lagg_cmds[] = {
+ DEF_CLONE_CMD_ARG("laggtype", setlaggtype),
DEF_CMD_ARG("laggport", setlaggport),
DEF_CMD_ARG("-laggport", unsetlaggport),
DEF_CMD_ARG("laggproto", setlaggproto),
@@ -335,4 +363,5 @@
for (i = 0; i < nitems(lagg_cmds); i++)
cmd_register(&lagg_cmds[i]);
af_register(&af_lagg);
+ clone_setdefcallback("lagg", lagg_create);
}
Index: share/man/man4/lagg.4
===================================================================
--- share/man/man4/lagg.4
+++ share/man/man4/lagg.4
@@ -16,7 +16,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd November 18, 2017
+.Dd October 21, 2020
.Dt LAGG 4
.Os
.Sh NAME
@@ -192,6 +192,15 @@
.Pp
(Note the mac address of the wireless device is forced to match the wired
device as a workaround.)
+.Pp
+The following example shows how to create an infiniband failover interface.
+.Bd -literal -offset indent
+# ifconfig ib0 up
+# ifconfig ib1 up
+# ifconfig lagg0 create laggtype infiniband
+# ifconfig lagg0 laggproto failover laggport ib0 laggport ib1 \e
+ 1.1.1.1 netmask 255.255.255.0
+.Ed
.Sh SEE ALSO
.Xr ng_one2many 4 ,
.Xr ifconfig 8 ,
Index: sys/conf/files
===================================================================
--- sys/conf/files
+++ sys/conf/files
@@ -4571,6 +4571,7 @@
compile-with "${LINUXKPI_C}"
# OpenFabrics Enterprise Distribution (Infiniband)
+net/if_infiniband.c optional ofed
ofed/drivers/infiniband/core/ib_addr.c optional ofed \
compile-with "${OFED_C}"
ofed/drivers/infiniband/core/ib_agent.c optional ofed \
Index: sys/kern/uipc_mbufhash.c
===================================================================
--- sys/kern/uipc_mbufhash.c
+++ sys/kern/uipc_mbufhash.c
@@ -28,6 +28,7 @@
#include <sys/fnv_hash.h>
#include <net/ethernet.h>
+#include <net/infiniband.h>
#if defined(INET) || defined(INET6)
#include <netinet/in.h>
@@ -42,7 +43,7 @@
#endif
static const void *
-m_ether_tcpip_hash_gethdr(const struct mbuf *m, const u_int off,
+m_common_hash_gethdr(const struct mbuf *m, const u_int off,
const u_int len, void *buf)
{
@@ -65,9 +66,18 @@
}
uint32_t
-m_ether_tcpip_hash(const uint32_t flags, const struct mbuf *m,
- const uint32_t key)
+m_infiniband_tcpip_hash_init(void)
{
+ uint32_t seed;
+
+ seed = arc4random();
+ return (fnv_32_buf(&seed, sizeof(seed), FNV1_32_INIT));
+}
+
+static inline uint32_t
+m_tcpip_hash(const uint32_t flags, const struct mbuf *m,
+ uint32_t p, int off, const uint16_t etype)
+{
union {
#ifdef INET
struct ip ip;
@@ -75,49 +85,19 @@
#ifdef INET6
struct ip6_hdr ip6;
#endif
- struct ether_vlan_header vlan;
uint32_t port;
} buf;
- const struct ether_header *eh;
- const struct ether_vlan_header *vlan;
#ifdef INET
const struct ip *ip;
#endif
#ifdef INET6
const struct ip6_hdr *ip6;
#endif
- uint32_t p;
- int off;
- uint16_t etype;
- p = key;
- off = sizeof(*eh);
- if (m->m_len < off)
- goto done;
- eh = mtod(m, struct ether_header *);
- etype = ntohs(eh->ether_type);
- if (flags & MBUF_HASHFLAG_L2) {
- p = fnv_32_buf(&eh->ether_shost, ETHER_ADDR_LEN, p);
- p = fnv_32_buf(&eh->ether_dhost, ETHER_ADDR_LEN, p);
- }
- /* Special handling for encapsulating VLAN frames */
- if ((m->m_flags & M_VLANTAG) && (flags & MBUF_HASHFLAG_L2)) {
- p = fnv_32_buf(&m->m_pkthdr.ether_vtag,
- sizeof(m->m_pkthdr.ether_vtag), p);
- } else if (etype == ETHERTYPE_VLAN) {
- vlan = m_ether_tcpip_hash_gethdr(m, off, sizeof(*vlan), &buf);
- if (vlan == NULL)
- goto done;
-
- if (flags & MBUF_HASHFLAG_L2)
- p = fnv_32_buf(&vlan->evl_tag, sizeof(vlan->evl_tag), p);
- etype = ntohs(vlan->evl_proto);
- off += sizeof(*vlan) - sizeof(*eh);
- }
switch (etype) {
#ifdef INET
case ETHERTYPE_IP:
- ip = m_ether_tcpip_hash_gethdr(m, off, sizeof(*ip), &buf);
+ ip = m_common_hash_gethdr(m, off, sizeof(*ip), &buf);
if (ip == NULL)
break;
if (flags & MBUF_HASHFLAG_L3) {
@@ -136,7 +116,7 @@
if (iphlen < sizeof(*ip))
break;
off += iphlen;
- ports = m_ether_tcpip_hash_gethdr(m,
+ ports = m_common_hash_gethdr(m,
off, sizeof(*ports), &buf);
if (ports == NULL)
break;
@@ -150,7 +130,7 @@
#endif
#ifdef INET6
case ETHERTYPE_IPV6:
- ip6 = m_ether_tcpip_hash_gethdr(m, off, sizeof(*ip6), &buf);
+ ip6 = m_common_hash_gethdr(m, off, sizeof(*ip6), &buf);
if (ip6 == NULL)
break;
if (flags & MBUF_HASHFLAG_L3) {
@@ -169,6 +149,62 @@
default:
break;
}
-done:
return (p);
}
+
+uint32_t
+m_ether_tcpip_hash(const uint32_t flags, const struct mbuf *m,
+ uint32_t p)
+{
+ union {
+ struct ether_vlan_header vlan;
+ } buf;
+ const struct ether_header *eh;
+ const struct ether_vlan_header *vlan;
+ int off;
+ uint16_t etype;
+
+ off = sizeof(*eh);
+ if (m->m_len < off)
+ return (p);
+ eh = mtod(m, struct ether_header *);
+ etype = ntohs(eh->ether_type);
+ if (flags & MBUF_HASHFLAG_L2) {
+ p = fnv_32_buf(&eh->ether_shost, ETHER_ADDR_LEN, p);
+ p = fnv_32_buf(&eh->ether_dhost, ETHER_ADDR_LEN, p);
+ }
+ /* Special handling for encapsulating VLAN frames */
+ if ((m->m_flags & M_VLANTAG) && (flags & MBUF_HASHFLAG_L2)) {
+ p = fnv_32_buf(&m->m_pkthdr.ether_vtag,
+ sizeof(m->m_pkthdr.ether_vtag), p);
+ } else if (etype == ETHERTYPE_VLAN) {
+ vlan = m_common_hash_gethdr(m, off, sizeof(*vlan), &buf);
+ if (vlan == NULL)
+ return (p);
+
+ if (flags & MBUF_HASHFLAG_L2)
+ p = fnv_32_buf(&vlan->evl_tag, sizeof(vlan->evl_tag), p);
+ etype = ntohs(vlan->evl_proto);
+ off += sizeof(*vlan) - sizeof(*eh);
+ }
+ return (m_tcpip_hash(flags, m, p, off, etype));
+}
+
+uint32_t
+m_infiniband_tcpip_hash(const uint32_t flags, const struct mbuf *m,
+ uint32_t p)
+{
+ const struct infiniband_header *ibh;
+ int off;
+ uint16_t etype;
+
+ off = sizeof(*ibh);
+ if (m->m_len < off)
+ return (p);
+ ibh = mtod(m, struct infiniband_header *);
+ etype = ntohs(ibh->ib_protocol);
+ if (flags & MBUF_HASHFLAG_L2)
+ p = fnv_32_buf(&ibh->ib_hwaddr, INFINIBAND_ADDR_LEN, p);
+
+ return (m_tcpip_hash(flags, m, p, off, etype));
+}
Index: sys/modules/Makefile
===================================================================
--- sys/modules/Makefile
+++ sys/modules/Makefile
@@ -154,6 +154,7 @@
${_if_gif} \
${_if_gre} \
${_if_me} \
+ if_infiniband \
if_lagg \
${_if_ndis} \
${_if_stf} \
Index: sys/modules/if_infiniband/Makefile
===================================================================
--- sys/modules/if_infiniband/Makefile
+++ sys/modules/if_infiniband/Makefile
@@ -0,0 +1,10 @@
+# $FreeBSD$
+
+.PATH: ${SRCTOP}/sys/net
+
+KMOD= if_infiniband
+SRCS= if_infiniband.c \
+ opt_inet.h \
+ opt_inet6.h
+
+.include <bsd.kmod.mk>
Index: sys/net/ieee8023ad_lacp.c
===================================================================
--- sys/net/ieee8023ad_lacp.c
+++ sys/net/ieee8023ad_lacp.c
@@ -54,6 +54,7 @@
#include <net/if_var.h>
#include <net/if_dl.h>
#include <net/ethernet.h>
+#include <net/infiniband.h>
#include <net/if_media.h>
#include <net/if_types.h>
Index: sys/net/if_ethersubr.c
===================================================================
--- sys/net/if_ethersubr.c
+++ sys/net/if_ethersubr.c
@@ -110,7 +110,7 @@
void (*bridge_dn_p)(struct mbuf *, struct ifnet *);
/* if_lagg(4) support */
-struct mbuf *(*lagg_input_p)(struct ifnet *, struct mbuf *);
+struct mbuf *(*lagg_input_ethernet_p)(struct ifnet *, struct mbuf *);
static const u_char etherbroadcastaddr[ETHER_ADDR_LEN] =
{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
@@ -601,9 +601,9 @@
/* Handle input from a lagg(4) port */
if (ifp->if_type == IFT_IEEE8023ADLAG) {
- KASSERT(lagg_input_p != NULL,
+ KASSERT(lagg_input_ethernet_p != NULL,
("%s: if_lagg not loaded!", __func__));
- m = (*lagg_input_p)(ifp, m);
+ m = (*lagg_input_ethernet_p)(ifp, m);
if (m != NULL)
ifp = m->m_pkthdr.rcvif;
else {
Index: sys/net/if_infiniband.c
===================================================================
--- sys/net/if_infiniband.c
+++ sys/net/if_infiniband.c
@@ -0,0 +1,539 @@
+/*-
+ * Copyright (c) 2020 Mellanox Technologies. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "opt_inet.h"
+#include "opt_inet6.h"
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD:");
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/eventhandler.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+#include <sys/devctl.h>
+#include <sys/module.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/route.h>
+#include <net/ethernet.h>
+#include <net/infiniband.h>
+#include <net/bpf.h>
+#include <net/if_llatbl.h>
+#include <net/netisr.h>
+#include <net/if_dl.h>
+#include <net/if_types.h>
+#include <net/if_media.h>
+#include <net/if_lagg.h>
+
+#include <netinet/in.h>
+#include <netinet/if_ether.h>
+#include <netinet/ip6.h>
+
+#include <netinet6/in6_var.h>
+#include <netinet6/nd6.h>
+
+#include <security/mac/mac_framework.h>
+
+/* if_lagg(4) support */
+struct mbuf *(*lagg_input_infiniband_p)(struct ifnet *, struct mbuf *);
+
+#ifdef INET
+static inline void
+infiniband_ipv4_multicast_map(uint32_t addr,
+ const uint8_t *broadcast, uint8_t *buf)
+{
+ uint8_t scope;
+
+ addr = ntohl(addr);
+ scope = broadcast[5] & 0xF;
+
+ buf[0] = 0;
+ buf[1] = 0xff;
+ buf[2] = 0xff;
+ buf[3] = 0xff;
+ buf[4] = 0xff;
+ buf[5] = 0x10 | scope;
+ buf[6] = 0x40;
+ buf[7] = 0x1b;
+ buf[8] = broadcast[8];
+ buf[9] = broadcast[9];
+ buf[10] = 0;
+ buf[11] = 0;
+ buf[12] = 0;
+ buf[13] = 0;
+ buf[14] = 0;
+ buf[15] = 0;
+ buf[16] = (addr >> 24) & 0xff;
+ buf[17] = (addr >> 16) & 0xff;
+ buf[18] = (addr >> 8) & 0xff;
+ buf[19] = addr & 0xff;
+}
+#endif
+
+#ifdef INET6
+static inline void
+infiniband_ipv6_multicast_map(const struct in6_addr *addr,
+ const uint8_t *broadcast, uint8_t *buf)
+{
+ uint8_t scope;
+
+ scope = broadcast[5] & 0xF;
+
+ buf[0] = 0;
+ buf[1] = 0xff;
+ buf[2] = 0xff;
+ buf[3] = 0xff;
+ buf[4] = 0xff;
+ buf[5] = 0x10 | scope;
+ buf[6] = 0x60;
+ buf[7] = 0x1b;
+ buf[8] = broadcast[8];
+ buf[9] = broadcast[9];
+ memcpy(&buf[10], &addr->s6_addr[6], 10);
+}
+#endif
+
+/*
+ * This is for clients that have an infiniband_header in the mbuf.
+ */
+void
+infiniband_bpf_mtap(struct ifnet *ifp, struct mbuf *mb)
+{
+ struct infiniband_header *ibh;
+ struct ether_header eh;
+
+ if (mb->m_len < sizeof(*ibh))
+ return;
+
+ ibh = mtod(mb, struct infiniband_header *);
+ eh.ether_type = ibh->ib_protocol;
+ memset(eh.ether_shost, 0, ETHER_ADDR_LEN);
+ memcpy(eh.ether_dhost, ibh->ib_hwaddr + 4, ETHER_ADDR_LEN);
+ mb->m_data += sizeof(*ibh);
+ mb->m_len -= sizeof(*ibh);
+ mb->m_pkthdr.len -= sizeof(*ibh);
+ bpf_mtap2(ifp->if_bpf, &eh, sizeof(eh), mb);
+ mb->m_data -= sizeof(*ibh);
+ mb->m_len += sizeof(*ibh);
+ mb->m_pkthdr.len += sizeof(*ibh);
+}
+
+/*
+ * Infiniband output routine.
+ */
+static int
+infiniband_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+ struct route *ro)
+{
+ uint8_t edst[INFINIBAND_ADDR_LEN];
+#if defined(INET) || defined(INET6)
+ struct llentry *lle = NULL;
+#endif
+ struct infiniband_header *ibh;
+ int error = 0;
+ uint16_t type;
+ bool is_gw;
+
+ NET_EPOCH_ASSERT();
+
+ is_gw = ((ro != NULL) && (ro->ro_flags & RT_HAS_GW) != 0);
+
+#ifdef MAC
+ error = mac_ifnet_check_transmit(ifp, m);
+ if (error)
+ goto bad;
+#endif
+
+ M_PROFILE(m);
+ if (ifp->if_flags & IFF_MONITOR) {
+ error = ENETDOWN;
+ goto bad;
+ }
+ if (!((ifp->if_flags & IFF_UP) &&
+ (ifp->if_drv_flags & IFF_DRV_RUNNING))) {
+ error = ENETDOWN;
+ goto bad;
+ }
+
+ switch (dst->sa_family) {
+ case AF_LINK:
+ goto output;
+#ifdef INET
+ case AF_INET:
+ if (lle != NULL && (lle->la_flags & LLE_VALID)) {
+ memcpy(edst, lle->ll_addr, sizeof(edst));
+ } else if (m->m_flags & M_MCAST) {
+ infiniband_ipv4_multicast_map(
+ ((const struct sockaddr_in *)dst)->sin_addr.s_addr,
+ ifp->if_broadcastaddr, edst);
+ } else {
+ error = arpresolve(ifp, is_gw, m, dst, edst, NULL, NULL);
+ if (error) {
+ if (error == EWOULDBLOCK)
+ error = 0;
+ m = NULL; /* mbuf is consumed by resolver */
+ goto bad;
+ }
+ }
+ type = htons(ETHERTYPE_IP);
+ break;
+ case AF_ARP: {
+ struct arphdr *ah;
+
+ if (m->m_len < sizeof(*ah)) {
+ error = EINVAL;
+ goto bad;
+ }
+
+ ah = mtod(m, struct arphdr *);
+
+ if (m->m_len < arphdr_len(ah)) {
+ error = EINVAL;
+ goto bad;
+ }
+ ah->ar_hrd = htons(ARPHRD_INFINIBAND);
+
+ switch (ntohs(ah->ar_op)) {
+ case ARPOP_REVREQUEST:
+ case ARPOP_REVREPLY:
+ type = htons(ETHERTYPE_REVARP);
+ break;
+ case ARPOP_REQUEST:
+ case ARPOP_REPLY:
+ default:
+ type = htons(ETHERTYPE_ARP);
+ break;
+ }
+
+ if (m->m_flags & M_BCAST) {
+ memcpy(edst, ifp->if_broadcastaddr, INFINIBAND_ADDR_LEN);
+ } else {
+ if (ah->ar_hln != INFINIBAND_ADDR_LEN) {
+ error = EINVAL;
+ goto bad;
+ }
+ memcpy(edst, ar_tha(ah), INFINIBAND_ADDR_LEN);
+ }
+ break;
+ }
+#endif
+#ifdef INET6
+ case AF_INET6: {
+ const struct ip6_hdr *ip6;
+
+ ip6 = mtod(m, const struct ip6_hdr *);
+ if (m->m_len < sizeof(*ip6)) {
+ error = EINVAL;
+ goto bad;
+ } else if (lle != NULL && (lle->la_flags & LLE_VALID)) {
+ memcpy(edst, lle->ll_addr, sizeof(edst));
+ } else if (m->m_flags & M_MCAST) {
+ infiniband_ipv6_multicast_map(
+ &((const struct sockaddr_in6 *)dst)->sin6_addr,
+ ifp->if_broadcastaddr, edst);
+ } else if (ip6->ip6_nxt == IPPROTO_ICMPV6) {
+ memcpy(edst, ifp->if_broadcastaddr, INFINIBAND_ADDR_LEN);
+ } else {
+ error = nd6_resolve(ifp, is_gw, m, dst, edst, NULL, NULL);
+ if (error) {
+ if (error == EWOULDBLOCK)
+ error = 0;
+ m = NULL; /* mbuf is consumed by resolver */
+ goto bad;
+ }
+ }
+ type = htons(ETHERTYPE_IPV6);
+ break;
+ }
+#endif
+ default:
+ error = EAFNOSUPPORT;
+ goto bad;
+ }
+
+ /*
+ * Add local net header. If no space in first mbuf,
+ * allocate another.
+ */
+ M_PREPEND(m, INFINIBAND_HDR_LEN, M_NOWAIT);
+ if (m == NULL) {
+ error = ENOBUFS;
+ goto bad;
+ }
+ ibh = mtod(m, struct infiniband_header *);
+
+ ibh->ib_protocol = type;
+ memcpy(ibh->ib_hwaddr, edst, sizeof(edst));
+
+ /*
+ * Queue message on interface, update output statistics if
+ * successful, and start output if interface not yet active.
+ */
+output:
+ return (ifp->if_transmit(ifp, m));
+bad:
+ if (m != NULL)
+ m_freem(m);
+ return (error);
+}
+
+/*
+ * Process a received Infiniband packet.
+ */
+static void
+infiniband_input(struct ifnet *ifp, struct mbuf *m)
+{
+ struct infiniband_header *ibh;
+ struct epoch_tracker et;
+ int isr;
+
+ CURVNET_SET_QUIET(ifp->if_vnet);
+
+ if ((ifp->if_flags & IFF_UP) == 0) {
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
+ m_freem(m);
+ goto done;
+ }
+
+ ibh = mtod(m, struct infiniband_header *);
+
+ /*
+ * Reset layer specific mbuf flags to avoid confusing upper
+ * layers:
+ */
+ m->m_flags &= ~M_VLANTAG;
+ m_clrprotoflags(m);
+
+ if (INFINIBAND_IS_MULTICAST(ibh->ib_hwaddr)) {
+ if (memcmp(ibh->ib_hwaddr, ifp->if_broadcastaddr,
+ ifp->if_addrlen) == 0)
+ m->m_flags |= M_BCAST;
+ else
+ m->m_flags |= M_MCAST;
+ if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
+ }
+
+ /* Let BPF have it before we strip the header. */
+ INFINIBAND_BPF_MTAP(ifp, m);
+
+ /* Allow monitor mode to claim this frame, after stats are updated. */
+ if (ifp->if_flags & IFF_MONITOR) {
+ m_freem(m);
+ goto done;
+ }
+
+ /* Direct packet to correct FIB based on interface config. */
+ M_SETFIB(m, ifp->if_fib);
+
+ /* Handle input from a lagg<N> port */
+ if (ifp->if_type == IFT_INFINIBANDLAG) {
+ KASSERT(lagg_input_infiniband_p != NULL,
+ ("%s: if_lagg not loaded!", __func__));
+ m = (*lagg_input_infiniband_p)(ifp, m);
+ if (__predict_false(m == NULL))
+ goto done;
+ ifp = m->m_pkthdr.rcvif;
+ }
+
+ /*
+ * Dispatch frame to upper layer.
+ */
+ switch (ibh->ib_protocol) {
+#ifdef INET
+ case htons(ETHERTYPE_IP):
+ isr = NETISR_IP;
+ break;
+
+ case htons(ETHERTYPE_ARP):
+ if (ifp->if_flags & IFF_NOARP) {
+ /* Discard packet if ARP is disabled on interface */
+ m_freem(m);
+ goto done;
+ }
+ isr = NETISR_ARP;
+ break;
+#endif
+#ifdef INET6
+ case htons(ETHERTYPE_IPV6):
+ isr = NETISR_IPV6;
+ break;
+#endif
+ default:
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
+ m_freem(m);
+ goto done;
+ }
+
+ /* Strip off the Infiniband header. */
+ m_adj(m, INFINIBAND_HDR_LEN);
+
+#ifdef MAC
+ /*
+ * Tag the mbuf with an appropriate MAC label before any other
+ * consumers can get to it.
+ */
+ mac_ifnet_create_mbuf(ifp, m);
+#endif
+ /* Allow monitor mode to claim this frame, after stats are updated. */
+ NET_EPOCH_ENTER(et);
+ netisr_dispatch(isr, m);
+ NET_EPOCH_EXIT(et);
+done:
+ CURVNET_RESTORE();
+}
+
+static int
+infiniband_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
+ struct sockaddr *sa)
+{
+ struct sockaddr_dl *sdl;
+#ifdef INET
+ struct sockaddr_in *sin;
+#endif
+#ifdef INET6
+ struct sockaddr_in6 *sin6;
+#endif
+ uint8_t *e_addr;
+
+ switch (sa->sa_family) {
+ case AF_LINK:
+ /*
+ * No mapping needed. Just check that it's a valid MC address.
+ */
+ sdl = (struct sockaddr_dl *)sa;
+ e_addr = LLADDR(sdl);
+ if (!INFINIBAND_IS_MULTICAST(e_addr))
+ return (EADDRNOTAVAIL);
+ *llsa = NULL;
+ return 0;
+
+#ifdef INET
+ case AF_INET:
+ sin = (struct sockaddr_in *)sa;
+ if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
+ return (EADDRNOTAVAIL);
+ sdl = link_init_sdl(ifp, *llsa, IFT_INFINIBAND);
+ sdl->sdl_alen = INFINIBAND_ADDR_LEN;
+ e_addr = LLADDR(sdl);
+ infiniband_ipv4_multicast_map(sin->sin_addr.s_addr, ifp->if_broadcastaddr,
+ e_addr);
+ *llsa = (struct sockaddr *)sdl;
+ return (0);
+#endif
+#ifdef INET6
+ case AF_INET6:
+ sin6 = (struct sockaddr_in6 *)sa;
+ /*
+ * An IP6 address of 0 means listen to all of the
+ * multicast address used for IP6. This has no meaning
+ * in infiniband.
+ */
+ if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
+ return (EADDRNOTAVAIL);
+ if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
+ return (EADDRNOTAVAIL);
+ sdl = link_init_sdl(ifp, *llsa, IFT_INFINIBAND);
+ sdl->sdl_alen = INFINIBAND_ADDR_LEN;
+ e_addr = LLADDR(sdl);
+ infiniband_ipv6_multicast_map(&sin6->sin6_addr, ifp->if_broadcastaddr, e_addr);
+ *llsa = (struct sockaddr *)sdl;
+ return (0);
+#endif
+ default:
+ return (EAFNOSUPPORT);
+ }
+}
+
+void
+infiniband_ifattach(struct ifnet *ifp, const uint8_t *lla, const uint8_t *llb)
+{
+ struct sockaddr_dl *sdl;
+ struct ifaddr *ifa;
+ int i;
+
+ ifp->if_addrlen = INFINIBAND_ADDR_LEN;
+ ifp->if_hdrlen = INFINIBAND_HDR_LEN;
+ ifp->if_mtu = INFINIBAND_MTU;
+ if_attach(ifp);
+ ifp->if_output = infiniband_output;
+ ifp->if_input = infiniband_input;
+ ifp->if_resolvemulti = infiniband_resolvemulti;
+
+ if (ifp->if_baudrate == 0)
+ ifp->if_baudrate = IF_Gbps(10); /* default value */
+ if (llb != NULL)
+ ifp->if_broadcastaddr = llb;
+
+ ifa = ifp->if_addr;
+ KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__));
+ sdl = (struct sockaddr_dl *)ifa->ifa_addr;
+ sdl->sdl_type = IFT_INFINIBAND;
+ sdl->sdl_alen = ifp->if_addrlen;
+
+ if (lla != NULL) {
+ memcpy(LLADDR(sdl), lla, ifp->if_addrlen);
+
+ if (ifp->if_hw_addr != NULL)
+ memcpy(ifp->if_hw_addr, lla, ifp->if_addrlen);
+ } else {
+ lla = LLADDR(sdl);
+ }
+
+ /* Attach ethernet compatible network device */
+ bpfattach(ifp, DLT_EN10MB, ETHER_HDR_LEN);
+
+ /* Announce Infiniband MAC address if non-zero. */
+ for (i = 0; i < ifp->if_addrlen; i++)
+ if (lla[i] != 0)
+ break;
+ if (i != ifp->if_addrlen)
+ if_printf(ifp, "Infiniband address: %20D\n", lla, ":");
+
+ /* Add necessary bits are setup; announce it now. */
+ EVENTHANDLER_INVOKE(infiniband_ifattach_event, ifp);
+
+ if (IS_DEFAULT_VNET(curvnet))
+ devctl_notify("INFINIBAND", ifp->if_xname, "IFATTACH", NULL);
+}
+
+/*
+ * Perform common duties while detaching an Infiniband interface
+ */
+void
+infiniband_ifdetach(struct ifnet *ifp)
+{
+ bpfdetach(ifp);
+ if_detach(ifp);
+}
+
+static moduledata_t infiniband_mod = {
+ .name = "if_infiniband",
+};
+
+DECLARE_MODULE(if_infiniband, infiniband_mod, SI_SUB_INIT_IF, SI_ORDER_ANY);
+MODULE_VERSION(if_infiniband, 1);
Index: sys/net/if_lagg.h
===================================================================
--- sys/net/if_lagg.h
+++ sys/net/if_lagg.h
@@ -72,7 +72,33 @@
{ "default", LAGG_PROTO_DEFAULT } \
}
+/* Supported lagg TYPEs */
+typedef enum {
+ LAGG_TYPE_ETHERNET = 0, /* ethernet (default) */
+ LAGG_TYPE_INFINIBAND, /* infiniband */
+ LAGG_TYPE_MAX,
+} lagg_type;
+
+struct lagg_types {
+ const char *lt_name;
+ lagg_type lt_value;
+};
+
+#define LAGG_TYPE_DEFAULT LAGG_TYPE_ETHERNET
+#define LAGG_TYPES { \
+ { "ethernet", LAGG_TYPE_ETHERNET }, \
+ { "infiniband", LAGG_TYPE_INFINIBAND }, \
+}
+
/*
+ * lagg create clone params
+ */
+struct iflaggparam {
+ uint8_t lagg_type; /* see LAGG_TYPE_XXX */
+ uint8_t reserved[3];
+};
+
+/*
* lagg ioctls.
*/
@@ -206,7 +232,7 @@
struct lagg_softc {
struct ifnet *sc_ifp; /* virtual interface */
- struct rmlock sc_mtx;
+ struct mtx sc_mtx; /* watchdog mutex */
struct sx sc_sx;
int sc_proto; /* lagg protocol */
u_int sc_count; /* number of ports */
@@ -230,12 +256,15 @@
u_int sc_opts;
int flowid_shift; /* shift the flowid */
struct lagg_counters detached_counters; /* detached ports sum */
+ struct callout sc_watchdog; /* watchdog timer */
};
struct lagg_port {
struct ifnet *lp_ifp; /* physical interface */
struct lagg_softc *lp_softc; /* parent lagg */
- uint8_t lp_lladdr[ETHER_ADDR_LEN];
+#define LAGG_ADDR_LEN \
+ MAX(INFINIBAND_ADDR_LEN, ETHER_ADDR_LEN)
+ uint8_t lp_lladdr[LAGG_ADDR_LEN];
u_char lp_iftype; /* interface type */
uint32_t lp_prio; /* port priority */
@@ -257,7 +286,8 @@
struct epoch_context lp_epoch_ctx;
};
-extern struct mbuf *(*lagg_input_p)(struct ifnet *, struct mbuf *);
+extern struct mbuf *(*lagg_input_ethernet_p)(struct ifnet *, struct mbuf *);
+extern struct mbuf *(*lagg_input_infiniband_p)(struct ifnet *, struct mbuf *);
extern void (*lagg_linkstate_p)(struct ifnet *, int );
int lagg_enqueue(struct ifnet *, struct mbuf *);
Index: sys/net/if_lagg.c
===================================================================
--- sys/net/if_lagg.c
+++ sys/net/if_lagg.c
@@ -55,6 +55,7 @@
#include <net/bpf.h>
#include <net/route.h>
#include <net/vnet.h>
+#include <net/infiniband.h>
#if defined(INET) || defined(INET6)
#include <netinet/in.h>
@@ -131,7 +132,8 @@
static void lagg_capabilities(struct lagg_softc *);
static int lagg_port_create(struct lagg_softc *, struct ifnet *);
static int lagg_port_destroy(struct lagg_port *, int);
-static struct mbuf *lagg_input(struct ifnet *, struct mbuf *);
+static struct mbuf *lagg_input_ethernet(struct ifnet *, struct mbuf *);
+static struct mbuf *lagg_input_infiniband(struct ifnet *, struct mbuf *);
static void lagg_linkstate(struct lagg_softc *);
static void lagg_port_state(struct ifnet *, int);
static int lagg_port_ioctl(struct ifnet *, u_long, caddr_t);
@@ -164,7 +166,8 @@
int (*func)(struct ifnet *, int));
static int lagg_setflags(struct lagg_port *, int status);
static uint64_t lagg_get_counter(struct ifnet *ifp, ift_counter cnt);
-static int lagg_transmit(struct ifnet *, struct mbuf *);
+static int lagg_transmit_ethernet(struct ifnet *, struct mbuf *);
+static int lagg_transmit_infiniband(struct ifnet *, struct mbuf *);
static void lagg_qflush(struct ifnet *);
static int lagg_media_change(struct ifnet *);
static void lagg_media_status(struct ifnet *, struct ifmediareq *);
@@ -327,7 +330,8 @@
switch (type) {
case MOD_LOAD:
- lagg_input_p = lagg_input;
+ lagg_input_ethernet_p = lagg_input_ethernet;
+ lagg_input_infiniband_p = lagg_input_infiniband;
lagg_linkstate_p = lagg_port_state;
lagg_detach_cookie = EVENTHANDLER_REGISTER(
ifnet_departure_event, lagg_port_ifdetach, NULL,
@@ -336,7 +340,8 @@
case MOD_UNLOAD:
EVENTHANDLER_DEREGISTER(ifnet_departure_event,
lagg_detach_cookie);
- lagg_input_p = NULL;
+ lagg_input_ethernet_p = NULL;
+ lagg_input_infiniband_p = NULL;
lagg_linkstate_p = NULL;
break;
default:
@@ -353,6 +358,7 @@
DECLARE_MODULE(if_lagg, lagg_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
MODULE_VERSION(if_lagg, 1);
+MODULE_DEPEND(if_lagg, if_infiniband, 1, 1, 1);
static void
lagg_proto_attach(struct lagg_softc *sc, lagg_proto pr)
@@ -504,18 +510,48 @@
static int
lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
{
+ struct iflaggparam iflp;
struct lagg_softc *sc;
struct ifnet *ifp;
- static const u_char eaddr[6]; /* 00:00:00:00:00:00 */
+ int if_type;
+ int error;
+ static const uint8_t eaddr[LAGG_ADDR_LEN];
+ static const uint8_t ib_bcast_addr[INFINIBAND_ADDR_LEN] = {
+ 0x00, 0xff, 0xff, 0xff,
+ 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff
+ };
+ if (params != NULL) {
+ error = copyin(params, &iflp, sizeof(iflp));
+ if (error)
+ return (error);
+
+ switch (iflp.lagg_type) {
+ case LAGG_TYPE_ETHERNET:
+ if_type = IFT_ETHER;
+ break;
+ case LAGG_TYPE_INFINIBAND:
+ if_type = IFT_INFINIBAND;
+ break;
+ default:
+ return (EINVAL);
+ }
+ } else {
+ if_type = IFT_ETHER;
+ }
+
sc = malloc(sizeof(*sc), M_LAGG, M_WAITOK|M_ZERO);
- ifp = sc->sc_ifp = if_alloc(IFT_ETHER);
+ ifp = sc->sc_ifp = if_alloc(if_type);
if (ifp == NULL) {
free(sc, M_LAGG);
return (ENOSPC);
}
LAGG_SX_INIT(sc);
+ mtx_init(&sc->sc_mtx, "lagg-mtx", NULL, MTX_DEF);
+ callout_init_mtx(&sc->sc_watchdog, &sc->sc_mtx, 0);
+
LAGG_XLOCK(sc);
if (V_def_use_flowid)
sc->sc_opts |= LAGG_OPT_USE_FLOWID;
@@ -530,15 +566,25 @@
CK_SLIST_INIT(&sc->sc_ports);
- /* Initialise pseudo media types */
- ifmedia_init(&sc->sc_media, 0, lagg_media_change,
- lagg_media_status);
- ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
- ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
+ switch (if_type) {
+ case IFT_ETHER:
+ /* Initialise pseudo media types */
+ ifmedia_init(&sc->sc_media, 0, lagg_media_change,
+ lagg_media_status);
+ ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
+ ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
- if_initname(ifp, laggname, unit);
+ if_initname(ifp, laggname, unit);
+ ifp->if_transmit = lagg_transmit_ethernet;
+ break;
+ case IFT_INFINIBAND:
+ if_initname(ifp, laggname, unit);
+ ifp->if_transmit = lagg_transmit_infiniband;
+ break;
+ default:
+ break;
+ }
ifp->if_softc = sc;
- ifp->if_transmit = lagg_transmit;
ifp->if_qflush = lagg_qflush;
ifp->if_init = lagg_init;
ifp->if_ioctl = lagg_ioctl;
@@ -555,9 +601,18 @@
/*
* Attach as an ordinary ethernet device, children will be attached
- * as special device IFT_IEEE8023ADLAG.
+ * as special device IFT_IEEE8023ADLAG or IFT_INFINIBANDLAG.
*/
- ether_ifattach(ifp, eaddr);
+ switch (if_type) {
+ case IFT_ETHER:
+ ether_ifattach(ifp, eaddr);
+ break;
+ case IFT_INFINIBAND:
+ infiniband_ifattach(ifp, eaddr, ib_bcast_addr);
+ break;
+ default:
+ break;
+ }
sc->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
lagg_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
@@ -595,14 +650,24 @@
lagg_proto_detach(sc);
LAGG_XUNLOCK(sc);
- ifmedia_removeall(&sc->sc_media);
- ether_ifdetach(ifp);
+ switch (ifp->if_type) {
+ case IFT_ETHER:
+ ifmedia_removeall(&sc->sc_media);
+ ether_ifdetach(ifp);
+ break;
+ case IFT_INFINIBAND:
+ infiniband_ifdetach(ifp);
+ break;
+ default:
+ break;
+ }
if_free(ifp);
LAGG_LIST_LOCK();
SLIST_REMOVE(&V_lagg_list, sc, lagg_softc, sc_entries);
LAGG_LIST_UNLOCK();
+ mtx_destroy(&sc->sc_mtx);
LAGG_SX_DESTROY(sc);
free(sc, M_LAGG);
}
@@ -669,6 +734,7 @@
struct lagg_port *lp, *tlp;
struct ifreq ifr;
int error, i, oldmtu;
+ int if_type;
uint64_t *pval;
LAGG_XLOCK_ASSERT(sc);
@@ -695,9 +761,22 @@
return (EBUSY);
}
- /* XXX Disallow non-ethernet interfaces (this should be any of 802) */
- if (ifp->if_type != IFT_ETHER && ifp->if_type != IFT_L2VLAN)
- return (EPROTONOSUPPORT);
+ switch (sc->sc_ifp->if_type) {
+ case IFT_ETHER:
+ /* XXX Disallow non-ethernet interfaces (this should be any of 802) */
+ if (ifp->if_type != IFT_ETHER && ifp->if_type != IFT_L2VLAN)
+ return (EPROTONOSUPPORT);
+ if_type = IFT_IEEE8023ADLAG;
+ break;
+ case IFT_INFINIBAND:
+ /* XXX Disallow non-infiniband interfaces */
+ if (ifp->if_type != IFT_INFINIBAND)
+ return (EPROTONOSUPPORT);
+ if_type = IFT_INFINIBANDLAG;
+ break;
+ default:
+ break;
+ }
/* Allow the first Ethernet member to define the MTU */
oldmtu = -1;
@@ -754,14 +833,14 @@
if_ref(ifp);
lp->lp_ifp = ifp;
- bcopy(IF_LLADDR(ifp), lp->lp_lladdr, ETHER_ADDR_LEN);
+ bcopy(IF_LLADDR(ifp), lp->lp_lladdr, ifp->if_addrlen);
lp->lp_ifcapenable = ifp->if_capenable;
if (CK_SLIST_EMPTY(&sc->sc_ports)) {
- bcopy(IF_LLADDR(ifp), IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
+ bcopy(IF_LLADDR(ifp), IF_LLADDR(sc->sc_ifp), ifp->if_addrlen);
lagg_proto_lladdr(sc);
EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp);
} else {
- if_setlladdr(ifp, IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
+ if_setlladdr(ifp, IF_LLADDR(sc->sc_ifp), ifp->if_addrlen);
}
lagg_setflags(lp, 1);
@@ -770,7 +849,7 @@
/* Change the interface type */
lp->lp_iftype = ifp->if_type;
- ifp->if_type = IFT_IEEE8023ADLAG;
+ ifp->if_type = if_type;
ifp->if_lagg = lp;
lp->lp_ioctl = ifp->if_ioctl;
ifp->if_ioctl = lagg_port_ioctl;
@@ -887,15 +966,15 @@
/* Update the primary interface */
if (lp == sc->sc_primary) {
- uint8_t lladdr[ETHER_ADDR_LEN];
+ uint8_t lladdr[LAGG_ADDR_LEN];
if ((lp0 = CK_SLIST_FIRST(&sc->sc_ports)) == NULL)
- bzero(&lladdr, ETHER_ADDR_LEN);
+ bzero(&lladdr, LAGG_ADDR_LEN);
else
- bcopy(lp0->lp_lladdr, lladdr, ETHER_ADDR_LEN);
+ bcopy(lp0->lp_lladdr, lladdr, LAGG_ADDR_LEN);
sc->sc_primary = lp0;
if (sc->sc_destroying == 0) {
- bcopy(lladdr, IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
+ bcopy(lladdr, IF_LLADDR(sc->sc_ifp), sc->sc_ifp->if_addrlen);
lagg_proto_lladdr(sc);
EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp);
}
@@ -905,7 +984,7 @@
* as well, to switch from old lladdr to its 'real' one)
*/
CK_SLIST_FOREACH(lp_ptr, &sc->sc_ports, lp_entries)
- if_setlladdr(lp_ptr->lp_ifp, lladdr, ETHER_ADDR_LEN);
+ if_setlladdr(lp_ptr->lp_ifp, lladdr, lp_ptr->lp_ifp->if_addrlen);
}
if (lp->lp_ifflags)
@@ -914,7 +993,7 @@
if (lp->lp_detaching == 0) {
lagg_setflags(lp, 0);
lagg_setcaps(lp, lp->lp_ifcapenable);
- if_setlladdr(ifp, lp->lp_lladdr, ETHER_ADDR_LEN);
+ if_setlladdr(ifp, lp->lp_lladdr, ifp->if_addrlen);
}
/*
@@ -938,9 +1017,15 @@
int error = 0;
/* Should be checked by the caller */
- if (ifp->if_type != IFT_IEEE8023ADLAG ||
- (lp = ifp->if_lagg) == NULL || (sc = lp->lp_softc) == NULL)
+ switch (ifp->if_type) {
+ case IFT_IEEE8023ADLAG:
+ case IFT_INFINIBANDLAG:
+ if ((lp = ifp->if_lagg) == NULL || (sc = lp->lp_softc) == NULL)
+ goto fallback;
+ break;
+ default:
goto fallback;
+ }
switch (cmd) {
case SIOCGLAGGPORT:
@@ -1130,6 +1215,41 @@
}
static void
+lagg_watchdog_infiniband(void *arg)
+{
+ struct lagg_softc *sc;
+ struct lagg_port *lp;
+ struct ifnet *ifp;
+ struct ifnet *lp_ifp;
+
+ sc = arg;
+
+ /*
+ * Because infiniband nodes have a fixed mac address, we need
+ * to regularly update the link level address of the parent
+ * lagg<N> device instead. This operation does not have to be
+ * atomic.
+ */
+ LAGG_RLOCK();
+ lp = lagg_link_active(sc, sc->sc_primary);
+ if (lp != NULL) {
+ ifp = sc->sc_ifp;
+ lp_ifp = lp->lp_ifp;
+
+ if (ifp != NULL && lp_ifp != NULL &&
+ memcmp(IF_LLADDR(ifp), IF_LLADDR(lp_ifp), ifp->if_addrlen) != 0) {
+ memcpy(IF_LLADDR(ifp), IF_LLADDR(lp_ifp), ifp->if_addrlen);
+ CURVNET_SET(ifp->if_vnet);
+ EVENTHANDLER_INVOKE(iflladdr_event, ifp);
+ CURVNET_RESTORE();
+ }
+ }
+ LAGG_RUNLOCK();
+
+ callout_reset(&sc->sc_watchdog, hz, &lagg_watchdog_infiniband, arg);
+}
+
+static void
lagg_init(void *xsc)
{
struct lagg_softc *sc = (struct lagg_softc *)xsc;
@@ -1151,12 +1271,18 @@
*/
CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
if (memcmp(IF_LLADDR(ifp), IF_LLADDR(lp->lp_ifp),
- ETHER_ADDR_LEN) != 0)
- if_setlladdr(lp->lp_ifp, IF_LLADDR(ifp), ETHER_ADDR_LEN);
+ ifp->if_addrlen) != 0)
+ if_setlladdr(lp->lp_ifp, IF_LLADDR(ifp), ifp->if_addrlen);
}
lagg_proto_init(sc);
+ if (ifp->if_type == IFT_INFINIBAND) {
+ mtx_lock(&sc->sc_mtx);
+ lagg_watchdog_infiniband(sc);
+ mtx_unlock(&sc->sc_mtx);
+ }
+
LAGG_XUNLOCK(sc);
}
@@ -1173,6 +1299,12 @@
ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
lagg_proto_stop(sc);
+
+ mtx_lock(&sc->sc_mtx);
+ callout_stop(&sc->sc_watchdog);
+ mtx_unlock(&sc->sc_mtx);
+
+ callout_drain(&sc->sc_watchdog);
}
static int
@@ -1228,7 +1360,12 @@
error = EPROTONOSUPPORT;
break;
}
-
+ /* Infiniband only supports the failover protocol. */
+ if (ra->ra_proto != LAGG_PROTO_FAILOVER &&
+ ifp->if_type == IFT_INFINIBAND) {
+ error = EPROTONOSUPPORT;
+ break;
+ }
LAGG_XLOCK(sc);
lagg_proto_detach(sc);
LAGG_UNLOCK_ASSERT();
@@ -1546,7 +1683,10 @@
break;
case SIOCSIFMEDIA:
case SIOCGIFMEDIA:
- error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
+ if (ifp->if_type == IFT_INFINIBAND)
+ error = EINVAL;
+ else
+ error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
break;
case SIOCSIFCAP:
@@ -1855,7 +1995,7 @@
}
static int
-lagg_transmit(struct ifnet *ifp, struct mbuf *m)
+lagg_transmit_ethernet(struct ifnet *ifp, struct mbuf *m)
{
struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
int error;
@@ -1880,6 +2020,32 @@
return (error);
}
+static int
+lagg_transmit_infiniband(struct ifnet *ifp, struct mbuf *m)
+{
+ struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
+ int error;
+
+#if defined(KERN_TLS) || defined(RATELIMIT)
+ if (m->m_pkthdr.csum_flags & CSUM_SND_TAG)
+ MPASS(m->m_pkthdr.snd_tag->ifp == ifp);
+#endif
+ LAGG_RLOCK();
+ /* We need a Tx algorithm and at least one port */
+ if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) {
+ LAGG_RUNLOCK();
+ m_freem(m);
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+ return (ENXIO);
+ }
+
+ INFINIBAND_BPF_MTAP(ifp, m);
+
+ error = lagg_proto_start(sc, m);
+ LAGG_RUNLOCK();
+ return (error);
+}
+
/*
* The ifp->if_qflush entry point for lagg(4) is no-op.
*/
@@ -1889,7 +2055,7 @@
}
static struct mbuf *
-lagg_input(struct ifnet *ifp, struct mbuf *m)
+lagg_input_ethernet(struct ifnet *ifp, struct mbuf *m)
{
struct lagg_port *lp = ifp->if_lagg;
struct lagg_softc *sc = lp->lp_softc;
@@ -1916,6 +2082,34 @@
return (m);
}
+static struct mbuf *
+lagg_input_infiniband(struct ifnet *ifp, struct mbuf *m)
+{
+ struct lagg_port *lp = ifp->if_lagg;
+ struct lagg_softc *sc = lp->lp_softc;
+ struct ifnet *scifp = sc->sc_ifp;
+
+ LAGG_RLOCK();
+ if ((scifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
+ lp->lp_detaching != 0 ||
+ sc->sc_proto == LAGG_PROTO_NONE) {
+ LAGG_RUNLOCK();
+ m_freem(m);
+ return (NULL);
+ }
+
+ INFINIBAND_BPF_MTAP(scifp, m);
+
+ m = lagg_proto_input(sc, lp, m);
+ if (m != NULL && (scifp->if_flags & IFF_MONITOR) != 0) {
+ m_freem(m);
+ m = NULL;
+ }
+
+ LAGG_RUNLOCK();
+ return (m);
+}
+
static int
lagg_media_change(struct ifnet *ifp)
{
@@ -2236,7 +2430,10 @@
LAGG_XLOCK_ASSERT(sc);
lb = malloc(sizeof(struct lagg_lb), M_LAGG, M_WAITOK | M_ZERO);
- lb->lb_key = m_ether_tcpip_hash_init();
+ if (sc->sc_ifp->if_type == IFT_INFINIBAND)
+ lb->lb_key = m_infiniband_tcpip_hash_init();
+ else
+ lb->lb_key = m_ether_tcpip_hash_init();
sc->sc_psc = lb;
CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
@@ -2303,6 +2500,8 @@
if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) &&
M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
p = m->m_pkthdr.flowid >> sc->flowid_shift;
+ else if (sc->sc_ifp->if_type == IFT_INFINIBAND)
+ p = m_infiniband_tcpip_hash(sc->sc_flags, m, lb->lb_key);
else
p = m_ether_tcpip_hash(sc->sc_flags, m, lb->lb_key);
p %= sc->sc_count;
Index: sys/net/if_types.h
===================================================================
--- sys/net/if_types.h
+++ sys/net/if_types.h
@@ -242,6 +242,7 @@
IFT_OPTICALCHANNEL = 0xc3, /* Optical Channel */
IFT_OPTICALTRANSPORT = 0xc4, /* Optical Transport */
IFT_INFINIBAND = 0xc7, /* Infiniband */
+ IFT_INFINIBANDLAG = 0xc8, /* Infiniband Link Aggregate */
IFT_BRIDGE = 0xd1, /* Transparent bridge interface */
IFT_STF = 0xd7, /* 6to4 interface */
Index: sys/net/infiniband.h
===================================================================
--- sys/net/infiniband.h
+++ sys/net/infiniband.h
@@ -0,0 +1,80 @@
+/*-
+ * Copyright (c) 2020 Mellanox Technologies. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef __INFINIBAND_H__
+#define __INFINIBAND_H__
+
+#include <sys/cdefs.h>
+#include <sys/stdint.h>
+
+#define INFINIBAND_ADDR_LEN 20 /* bytes */
+#define INFINIBAND_MTU 1500 /* bytes - default value */
+
+#define INFINIBAND_ENC_LEN 4 /* bytes */
+#define INFINIBAND_HDR_LEN \
+ (INFINIBAND_ADDR_LEN + INFINIBAND_ENC_LEN)
+
+#define INFINIBAND_IS_MULTICAST(addr) \
+ ((addr)[4] == 0xff)
+
+#define INFINIBAND_BPF_MTAP(_ifp, _m) \
+do { \
+ if (bpf_peers_present((_ifp)->if_bpf)) { \
+ M_ASSERTVALID(_m); \
+ infiniband_bpf_mtap(_ifp, _m); \
+ } \
+} while (0)
+
+struct infiniband_header {
+ uint8_t ib_hwaddr[INFINIBAND_ADDR_LEN];
+ uint16_t ib_protocol; /* big endian */
+ uint16_t ib_reserved; /* zero */
+} __packed;
+
+struct infiniband_address {
+ uint8_t octet[INFINIBAND_ADDR_LEN];
+} __packed;
+
+#ifdef _KERNEL
+
+#include <sys/_eventhandler.h>
+
+struct ifnet;
+struct mbuf;
+
+extern void infiniband_ifattach(struct ifnet *, const uint8_t *hwaddr, const uint8_t *bcaddr);
+extern void infiniband_ifdetach(struct ifnet *);
+extern void infiniband_bpf_mtap(struct ifnet *, struct mbuf *);
+
+/* new infiniband interface attached event */
+typedef void (*infiniband_ifattach_event_handler_t)(void *, struct ifnet *);
+
+EVENTHANDLER_DECLARE(infiniband_ifattach_event, infiniband_ifattach_event_handler_t);
+
+#endif
+
+#endif /* __INFINIBAND_H__ */
Index: sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h
===================================================================
--- sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h
+++ sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -438,16 +438,7 @@
extern struct workqueue_struct *ipoib_workqueue;
-#define IPOIB_MTAP_PROTO(_ifp, _m, _proto) \
-do { \
- if (bpf_peers_present((_ifp)->if_bpf)) { \
- M_ASSERTVALID(_m); \
- ipoib_mtap_proto((_ifp), (_m), (_proto)); \
- } \
-} while (0)
-
/* functions */
-void ipoib_mtap_proto(struct ifnet *ifp, struct mbuf *mb, uint16_t proto);
void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr);
void ipoib_send_comp_handler(struct ib_cq *cq, void *dev_ptr);
@@ -463,8 +454,6 @@
int ipoib_add_pkey_attr(struct ipoib_dev_priv *priv);
int ipoib_add_umcast_attr(struct ipoib_dev_priv *priv);
-void ipoib_demux(struct ifnet *ifp, struct mbuf *m, u_short proto);
-
void ipoib_send(struct ipoib_dev_priv *priv, struct mbuf *mb,
struct ipoib_ah *address, u32 qpn);
void ipoib_reap_ah(struct work_struct *work);
@@ -540,7 +529,7 @@
void ipoib_dma_unmap_rx(struct ipoib_dev_priv *priv, struct ipoib_rx_buf *rx_req);
void ipoib_dma_mb(struct ipoib_dev_priv *priv, struct mbuf *mb, unsigned int length);
-struct mbuf *ipoib_alloc_map_mb(struct ipoib_dev_priv *priv, struct ipoib_rx_buf *rx_req, int size);
+struct mbuf *ipoib_alloc_map_mb(struct ipoib_dev_priv *priv, struct ipoib_rx_buf *rx_req, int align, int size);
void ipoib_set_ethtool_ops(struct ifnet *dev);
Index: sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c
===================================================================
--- sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -153,7 +153,7 @@
ipoib_cm_alloc_rx_mb(struct ipoib_dev_priv *priv, struct ipoib_cm_rx_buf *rx_req)
{
return ipoib_alloc_map_mb(priv, (struct ipoib_rx_buf *)rx_req,
- priv->cm.max_cm_mtu);
+ sizeof(struct ipoib_pseudoheader), priv->cm.max_cm_mtu);
}
static void ipoib_cm_free_rx_ring(struct ipoib_dev_priv *priv,
@@ -484,10 +484,7 @@
struct mbuf *mb, *newmb;
struct ipoib_cm_rx *p;
int has_srq;
- u_short proto;
- CURVNET_SET_QUIET(dev->if_vnet);
-
ipoib_dbg_data(priv, "cm recv completion: id %d, status: %d\n",
wr_id, wc->status);
@@ -561,16 +558,24 @@
ipoib_dma_mb(priv, mb, wc->byte_len);
- if_inc_counter(dev, IFCOUNTER_IPACKETS, 1);
- if_inc_counter(dev, IFCOUNTER_IBYTES, mb->m_pkthdr.len);
-
mb->m_pkthdr.rcvif = dev;
- proto = *mtod(mb, uint16_t *);
- m_adj(mb, IPOIB_ENCAP_LEN);
- IPOIB_MTAP_PROTO(dev, mb, proto);
- ipoib_demux(dev, mb, ntohs(proto));
+ M_PREPEND(mb, sizeof(struct ipoib_pseudoheader), M_NOWAIT);
+ if (likely(mb != NULL)) {
+ struct ipoib_header *ibh;
+ if_inc_counter(dev, IFCOUNTER_IPACKETS, 1);
+ if_inc_counter(dev, IFCOUNTER_IBYTES, mb->m_pkthdr.len);
+
+ /* fixup destination infiniband address */
+ ibh = mtod(mb, struct ipoib_header *);
+ memset(ibh->hwaddr, 0, 4);
+ memcpy(ibh->hwaddr + 4, priv->local_gid.raw, sizeof(union ib_gid));
+
+ dev->if_input(dev, mb);
+ } else {
+ if_inc_counter(dev, IFCOUNTER_IERRORS, 1);
+ }
repost:
if (has_srq) {
if (unlikely(ipoib_cm_post_receive_srq(priv, wr_id)))
@@ -587,7 +592,6 @@
}
}
done:
- CURVNET_RESTORE();
return;
}
Index: sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c
===================================================================
--- sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -112,17 +112,19 @@
struct mbuf *
ipoib_alloc_map_mb(struct ipoib_dev_priv *priv, struct ipoib_rx_buf *rx_req,
- int size)
+ int align, int size)
{
struct mbuf *mb, *m;
int i, j;
rx_req->mb = NULL;
- mb = m_getm2(NULL, size, M_NOWAIT, MT_DATA, M_PKTHDR);
+ mb = m_getm2(NULL, align + size, M_NOWAIT, MT_DATA, M_PKTHDR);
if (mb == NULL)
return (NULL);
for (i = 0, m = mb; m != NULL; m = m->m_next, i++) {
- m->m_len = M_SIZE(m);
+ m->m_len = M_SIZE(m) - align;
+ m->m_data += align;
+ align = 0;
mb->m_pkthdr.len += m->m_len;
rx_req->mapping[i] = ib_dma_map_single(priv->ca,
mtod(m, void *), m->m_len, DMA_FROM_DEVICE);
@@ -174,7 +176,7 @@
{
return ipoib_alloc_map_mb(priv, &priv->rx_ring[id],
- priv->max_ib_mtu + IB_GRH_BYTES);
+ 0, priv->max_ib_mtu + IB_GRH_BYTES);
}
static int ipoib_ib_post_receives(struct ipoib_dev_priv *priv)
Index: sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c
===================================================================
--- sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -40,21 +40,16 @@
#include "ipoib.h"
#include <sys/eventhandler.h>
-static int ipoib_resolvemulti(struct ifnet *, struct sockaddr **,
- struct sockaddr *);
-
-
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/kernel.h>
#include <linux/vmalloc.h>
-#include <linux/if_arp.h> /* For ARPHRD_xxx */
#include <linux/if_vlan.h>
-#include <net/ip.h>
-#include <net/ipv6.h>
+#include <net/infiniband.h>
+
#include <rdma/ib_cache.h>
MODULE_AUTHOR("Roland Dreier");
@@ -98,19 +93,8 @@
const union ib_gid *gid, const struct sockaddr *addr,
void *client_data);
static void ipoib_start(struct ifnet *dev);
-static int ipoib_output(struct ifnet *ifp, struct mbuf *m,
- const struct sockaddr *dst, struct route *ro);
static int ipoib_ioctl(struct ifnet *ifp, u_long command, caddr_t data);
-static void ipoib_input(struct ifnet *ifp, struct mbuf *m);
-#define IPOIB_MTAP(_ifp, _m) \
-do { \
- if (bpf_peers_present((_ifp)->if_bpf)) { \
- M_ASSERTVALID(_m); \
- ipoib_mtap_mb((_ifp), (_m)); \
- } \
-} while (0)
-
static struct unrhdr *ipoib_unrhdr;
static void
@@ -136,37 +120,6 @@
}
SYSUNINIT(ipoib_unrhdr_uninit, SI_SUB_KLD - 1, SI_ORDER_ANY, ipoib_unrhdr_uninit, NULL);
-/*
- * This is for clients that have an ipoib_header in the mbuf.
- */
-static void
-ipoib_mtap_mb(struct ifnet *ifp, struct mbuf *mb)
-{
- struct ipoib_header *ih;
- struct ether_header eh;
-
- ih = mtod(mb, struct ipoib_header *);
- eh.ether_type = ih->proto;
- bcopy(ih->hwaddr, &eh.ether_dhost, ETHER_ADDR_LEN);
- bzero(&eh.ether_shost, ETHER_ADDR_LEN);
- mb->m_data += sizeof(struct ipoib_header);
- mb->m_len -= sizeof(struct ipoib_header);
- bpf_mtap2(ifp->if_bpf, &eh, sizeof(eh), mb);
- mb->m_data -= sizeof(struct ipoib_header);
- mb->m_len += sizeof(struct ipoib_header);
-}
-
-void
-ipoib_mtap_proto(struct ifnet *ifp, struct mbuf *mb, uint16_t proto)
-{
- struct ether_header eh;
-
- eh.ether_type = proto;
- bzero(&eh.ether_shost, ETHER_ADDR_LEN);
- bzero(&eh.ether_dhost, ETHER_ADDR_LEN);
- bpf_mtap2(ifp->if_bpf, &eh, sizeof(eh), mb);
-}
-
static struct ib_client ipoib_client = {
.name = "ipoib",
.add = ipoib_add_one,
@@ -787,7 +740,7 @@
IFQ_DRV_DEQUEUE(&dev->if_snd, mb);
if (mb == NULL)
break;
- IPOIB_MTAP(dev, mb);
+ INFINIBAND_BPF_MTAP(dev, mb);
ipoib_send_one(priv, mb);
}
}
@@ -875,8 +828,7 @@
dev = priv->dev;
if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
priv->gone = 1;
- bpfdetach(dev);
- if_detach(dev);
+ infiniband_ifdetach(dev);
if_free(dev);
free_unr(ipoib_unrhdr, priv->unit);
} else
@@ -935,7 +887,6 @@
ipoib_intf_alloc(const char *name)
{
struct ipoib_dev_priv *priv;
- struct sockaddr_dl *sdl;
struct ifnet *dev;
priv = ipoib_priv_alloc();
@@ -953,24 +904,17 @@
}
if_initname(dev, name, priv->unit);
dev->if_flags = IFF_BROADCAST | IFF_MULTICAST;
- dev->if_addrlen = INFINIBAND_ALEN;
- dev->if_hdrlen = IPOIB_HEADER_LEN;
- if_attach(dev);
+
+ infiniband_ifattach(dev, NULL, priv->broadcastaddr);
+
dev->if_init = ipoib_init;
dev->if_ioctl = ipoib_ioctl;
dev->if_start = ipoib_start;
- dev->if_output = ipoib_output;
- dev->if_input = ipoib_input;
- dev->if_resolvemulti = ipoib_resolvemulti;
- dev->if_baudrate = IF_Gbps(10);
- dev->if_broadcastaddr = priv->broadcastaddr;
+
dev->if_snd.ifq_maxlen = ipoib_sendq_size * 2;
- sdl = (struct sockaddr_dl *)dev->if_addr->ifa_addr;
- sdl->sdl_type = IFT_INFINIBAND;
- sdl->sdl_alen = dev->if_addrlen;
+
priv->dev = dev;
if_link_state_change(dev, LINK_STATE_DOWN);
- bpfattach(dev, DLT_EN10MB, ETHER_HDR_LEN);
return dev->if_softc;
}
@@ -1165,7 +1109,6 @@
struct ifaddr *ifa;
int retval = 0;
- CURVNET_SET(dev->if_vnet);
NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifa, &dev->if_addrhead, ifa_link) {
if (ifa->ifa_addr == NULL ||
@@ -1179,7 +1122,6 @@
}
}
NET_EPOCH_EXIT(et);
- CURVNET_RESTORE();
return (retval);
}
@@ -1475,286 +1417,6 @@
ib_sa_unregister_client(&ipoib_sa_client);
destroy_workqueue(ipoib_workqueue);
}
-
-/*
- * Infiniband output routine.
- */
-static int
-ipoib_output(struct ifnet *ifp, struct mbuf *m,
- const struct sockaddr *dst, struct route *ro)
-{
- u_char edst[INFINIBAND_ALEN];
-#if defined(INET) || defined(INET6)
- struct llentry *lle = NULL;
-#endif
- struct ipoib_header *eh;
- int error = 0, is_gw = 0;
- short type;
-
- NET_EPOCH_ASSERT();
-
- if (ro != NULL)
- is_gw = (ro->ro_flags & RT_HAS_GW) != 0;
-#ifdef MAC
- error = mac_ifnet_check_transmit(ifp, m);
- if (error)
- goto bad;
-#endif
-
- M_PROFILE(m);
- if (ifp->if_flags & IFF_MONITOR) {
- error = ENETDOWN;
- goto bad;
- }
- if (!((ifp->if_flags & IFF_UP) &&
- (ifp->if_drv_flags & IFF_DRV_RUNNING))) {
- error = ENETDOWN;
- goto bad;
- }
-
- switch (dst->sa_family) {
-#ifdef INET
- case AF_INET:
- if (lle != NULL && (lle->la_flags & LLE_VALID))
- memcpy(edst, lle->ll_addr, sizeof(edst));
- else if (m->m_flags & M_MCAST)
- ip_ib_mc_map(((struct sockaddr_in *)dst)->sin_addr.s_addr, ifp->if_broadcastaddr, edst);
- else
- error = arpresolve(ifp, is_gw, m, dst, edst, NULL, NULL);
- if (error)
- return (error == EWOULDBLOCK ? 0 : error);
- type = htons(ETHERTYPE_IP);
- break;
- case AF_ARP:
- {
- struct arphdr *ah;
- ah = mtod(m, struct arphdr *);
- ah->ar_hrd = htons(ARPHRD_INFINIBAND);
-
- switch(ntohs(ah->ar_op)) {
- case ARPOP_REVREQUEST:
- case ARPOP_REVREPLY:
- type = htons(ETHERTYPE_REVARP);
- break;
- case ARPOP_REQUEST:
- case ARPOP_REPLY:
- default:
- type = htons(ETHERTYPE_ARP);
- break;
- }
-
- if (m->m_flags & M_BCAST)
- bcopy(ifp->if_broadcastaddr, edst, INFINIBAND_ALEN);
- else
- bcopy(ar_tha(ah), edst, INFINIBAND_ALEN);
-
- }
- break;
-#endif
-#ifdef INET6
- case AF_INET6:
- if (lle != NULL && (lle->la_flags & LLE_VALID))
- memcpy(edst, lle->ll_addr, sizeof(edst));
- else if (m->m_flags & M_MCAST)
- ipv6_ib_mc_map(&((struct sockaddr_in6 *)dst)->sin6_addr, ifp->if_broadcastaddr, edst);
- else
- error = nd6_resolve(ifp, is_gw, m, dst, edst, NULL, NULL);
- if (error)
- return error;
- type = htons(ETHERTYPE_IPV6);
- break;
-#endif
-
- default:
- if_printf(ifp, "can't handle af%d\n", dst->sa_family);
- error = EAFNOSUPPORT;
- goto bad;
- }
-
- /*
- * Add local net header. If no space in first mbuf,
- * allocate another.
- */
- M_PREPEND(m, IPOIB_HEADER_LEN, M_NOWAIT);
- if (m == NULL) {
- error = ENOBUFS;
- goto bad;
- }
- eh = mtod(m, struct ipoib_header *);
- (void)memcpy(&eh->proto, &type, sizeof(eh->proto));
- (void)memcpy(&eh->hwaddr, edst, sizeof (edst));
-
- /*
- * Queue message on interface, update output statistics if
- * successful, and start output if interface not yet active.
- */
- return ((ifp->if_transmit)(ifp, m));
-bad:
- if (m != NULL)
- m_freem(m);
- return (error);
-}
-
-/*
- * Upper layer processing for a received Infiniband packet.
- */
-void
-ipoib_demux(struct ifnet *ifp, struct mbuf *m, u_short proto)
-{
- struct epoch_tracker et;
- int isr;
-
-#ifdef MAC
- /*
- * Tag the mbuf with an appropriate MAC label before any other
- * consumers can get to it.
- */
- mac_ifnet_create_mbuf(ifp, m);
-#endif
- /* Allow monitor mode to claim this frame, after stats are updated. */
- if (ifp->if_flags & IFF_MONITOR) {
- if_printf(ifp, "discard frame at IFF_MONITOR\n");
- m_freem(m);
- return;
- }
- /* Direct packet to correct FIB based on interface config */
- M_SETFIB(m, ifp->if_fib);
- /*
- * Dispatch frame to upper layer.
- */
- switch (proto) {
-#ifdef INET
- case ETHERTYPE_IP:
- isr = NETISR_IP;
- break;
-
- case ETHERTYPE_ARP:
- if (ifp->if_flags & IFF_NOARP) {
- /* Discard packet if ARP is disabled on interface */
- m_freem(m);
- return;
- }
- isr = NETISR_ARP;
- break;
-#endif
-#ifdef INET6
- case ETHERTYPE_IPV6:
- isr = NETISR_IPV6;
- break;
-#endif
- default:
- goto discard;
- }
- NET_EPOCH_ENTER(et);
- netisr_dispatch(isr, m);
- NET_EPOCH_EXIT(et);
- return;
-
-discard:
- m_freem(m);
-}
-
-/*
- * Process a received Infiniband packet.
- */
-static void
-ipoib_input(struct ifnet *ifp, struct mbuf *m)
-{
- struct ipoib_header *eh;
-
- if ((ifp->if_flags & IFF_UP) == 0) {
- m_freem(m);
- return;
- }
- CURVNET_SET_QUIET(ifp->if_vnet);
-
- /* Let BPF have it before we strip the header. */
- IPOIB_MTAP(ifp, m);
- eh = mtod(m, struct ipoib_header *);
- /*
- * Reset layer specific mbuf flags to avoid confusing upper layers.
- * Strip off Infiniband header.
- */
- m->m_flags &= ~M_VLANTAG;
- m_clrprotoflags(m);
- m_adj(m, IPOIB_HEADER_LEN);
-
- if (IPOIB_IS_MULTICAST(eh->hwaddr)) {
- if (memcmp(eh->hwaddr, ifp->if_broadcastaddr,
- ifp->if_addrlen) == 0)
- m->m_flags |= M_BCAST;
- else
- m->m_flags |= M_MCAST;
- if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
- }
-
- ipoib_demux(ifp, m, ntohs(eh->proto));
- CURVNET_RESTORE();
-}
-
-static int
-ipoib_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
- struct sockaddr *sa)
-{
- struct sockaddr_dl *sdl;
-#ifdef INET
- struct sockaddr_in *sin;
-#endif
-#ifdef INET6
- struct sockaddr_in6 *sin6;
-#endif
- u_char *e_addr;
-
- switch(sa->sa_family) {
- case AF_LINK:
- /*
- * No mapping needed. Just check that it's a valid MC address.
- */
- sdl = (struct sockaddr_dl *)sa;
- e_addr = LLADDR(sdl);
- if (!IPOIB_IS_MULTICAST(e_addr))
- return EADDRNOTAVAIL;
- *llsa = NULL;
- return 0;
-
-#ifdef INET
- case AF_INET:
- sin = (struct sockaddr_in *)sa;
- if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
- return EADDRNOTAVAIL;
- sdl = link_init_sdl(ifp, *llsa, IFT_INFINIBAND);
- sdl->sdl_alen = INFINIBAND_ALEN;
- e_addr = LLADDR(sdl);
- ip_ib_mc_map(sin->sin_addr.s_addr, ifp->if_broadcastaddr,
- e_addr);
- *llsa = (struct sockaddr *)sdl;
- return 0;
-#endif
-#ifdef INET6
- case AF_INET6:
- sin6 = (struct sockaddr_in6 *)sa;
- /*
- * An IP6 address of 0 means listen to all
- * of the multicast address used for IP6.
- * This has no meaning in ipoib.
- */
- if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
- return EADDRNOTAVAIL;
- if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
- return EADDRNOTAVAIL;
- sdl = link_init_sdl(ifp, *llsa, IFT_INFINIBAND);
- sdl->sdl_alen = INFINIBAND_ALEN;
- e_addr = LLADDR(sdl);
- ipv6_ib_mc_map(&sin6->sin6_addr, ifp->if_broadcastaddr, e_addr);
- *llsa = (struct sockaddr *)sdl;
- return 0;
-#endif
-
- default:
- return EAFNOSUPPORT;
- }
-}
-
module_init_order(ipoib_init_module, SI_ORDER_FIFTH);
module_exit_order(ipoib_cleanup_module, SI_ORDER_FIFTH);
@@ -1771,4 +1433,5 @@
DECLARE_MODULE(ipoib, ipoib_mod, SI_SUB_LAST, SI_ORDER_ANY);
MODULE_DEPEND(ipoib, ibcore, 1, 1, 1);
+MODULE_DEPEND(ipoib, if_infiniband, 1, 1, 1);
MODULE_DEPEND(ipoib, linuxkpi, 1, 1, 1);
Index: sys/sys/mbuf.h
===================================================================
--- sys/sys/mbuf.h
+++ sys/sys/mbuf.h
@@ -1455,14 +1455,16 @@
((_m)->m_pkthdr.fibnum) = (_fib); \
} while (0)
-/* flags passed as first argument for "m_ether_tcpip_hash()" */
+/* flags passed as first argument for "m_xxx_tcpip_hash()" */
#define MBUF_HASHFLAG_L2 (1 << 2)
#define MBUF_HASHFLAG_L3 (1 << 3)
#define MBUF_HASHFLAG_L4 (1 << 4)
/* mbuf hashing helper routines */
uint32_t m_ether_tcpip_hash_init(void);
-uint32_t m_ether_tcpip_hash(const uint32_t, const struct mbuf *, const uint32_t);
+uint32_t m_ether_tcpip_hash(const uint32_t, const struct mbuf *, uint32_t);
+uint32_t m_infiniband_tcpip_hash_init(void);
+uint32_t m_infiniband_tcpip_hash(const uint32_t, const struct mbuf *, uint32_t);
#ifdef MBUF_PROFILING
void m_profile(struct mbuf *m);
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Feb 22, 11:58 PM (9 h, 11 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
28946414
Default Alt Text
D26254.id78536.diff (57 KB)
Attached To
Mode
D26254: Add support for IPoIB lagg devices in FreeBSD
Attached
Detach File
Event Timeline
Log In to Comment