Page MenuHomeFreeBSD

D26254.id78536.diff
No OneTemporary

D26254.id78536.diff

Index: sbin/ifconfig/ifconfig.8
===================================================================
--- sbin/ifconfig/ifconfig.8
+++ sbin/ifconfig/ifconfig.8
@@ -28,7 +28,7 @@
.\" From: @(#)ifconfig.8 8.3 (Berkeley) 1/5/94
.\" $FreeBSD$
.\"
-.Dd September 17, 2020
+.Dd October 21, 2020
.Dt IFCONFIG 8
.Os
.Sh NAME
@@ -2481,6 +2481,12 @@
.Pp
The following parameters are specific to lagg interfaces:
.Bl -tag -width indent
+.It Cm laggtype Ar type
+When creating a lagg interface the type can be specified as either
+.Cm ethernet
+or
+.Cm infiniband .
+If not specified ethernet is the default lagg type.
.It Cm laggport Ar interface
Add the interface named by
.Ar interface
Index: sbin/ifconfig/iflagg.c
===================================================================
--- sbin/ifconfig/iflagg.c
+++ sbin/ifconfig/iflagg.c
@@ -30,8 +30,12 @@
#include "ifconfig.h"
-char lacpbuf[120]; /* LACP peer '[(a,a,a),(p,p,p)]' */
+static struct iflaggparam params = {
+ .lagg_type = LAGG_TYPE_DEFAULT,
+};
+static char lacpbuf[120]; /* LACP peer '[(a,a,a),(p,p,p)]' */
+
static void
setlaggport(const char *val, int d, int s, const struct afswtch *afp)
{
@@ -301,7 +305,31 @@
}
}
+static
+DECL_CMD_FUNC(setlaggtype, arg, d)
+{
+ static const struct lagg_types lt[] = LAGG_TYPES;
+ int i;
+
+ for (i = 0; i < nitems(lt); i++) {
+ if (strcmp(arg, lt[i].lt_name) == 0) {
+ params.lagg_type = lt[i].lt_value;
+ return;
+ }
+ }
+ errx(1, "invalid lagg type: %s", arg);
+}
+
+static void
+lagg_create(int s, struct ifreq *ifr)
+{
+ ifr->ifr_data = (caddr_t) &params;
+ if (ioctl(s, SIOCIFCREATE2, ifr) < 0)
+ err(1, "SIOCIFCREATE2");
+}
+
static struct cmd lagg_cmds[] = {
+ DEF_CLONE_CMD_ARG("laggtype", setlaggtype),
DEF_CMD_ARG("laggport", setlaggport),
DEF_CMD_ARG("-laggport", unsetlaggport),
DEF_CMD_ARG("laggproto", setlaggproto),
@@ -335,4 +363,5 @@
for (i = 0; i < nitems(lagg_cmds); i++)
cmd_register(&lagg_cmds[i]);
af_register(&af_lagg);
+ clone_setdefcallback("lagg", lagg_create);
}
Index: share/man/man4/lagg.4
===================================================================
--- share/man/man4/lagg.4
+++ share/man/man4/lagg.4
@@ -16,7 +16,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd November 18, 2017
+.Dd October 21, 2020
.Dt LAGG 4
.Os
.Sh NAME
@@ -192,6 +192,15 @@
.Pp
(Note the mac address of the wireless device is forced to match the wired
device as a workaround.)
+.Pp
+The following example shows how to create an infiniband failover interface.
+.Bd -literal -offset indent
+# ifconfig ib0 up
+# ifconfig ib1 up
+# ifconfig lagg0 create laggtype infiniband
+# ifconfig lagg0 laggproto failover laggport ib0 laggport ib1 \e
+ 1.1.1.1 netmask 255.255.255.0
+.Ed
.Sh SEE ALSO
.Xr ng_one2many 4 ,
.Xr ifconfig 8 ,
Index: sys/conf/files
===================================================================
--- sys/conf/files
+++ sys/conf/files
@@ -4571,6 +4571,7 @@
compile-with "${LINUXKPI_C}"
# OpenFabrics Enterprise Distribution (Infiniband)
+net/if_infiniband.c optional ofed
ofed/drivers/infiniband/core/ib_addr.c optional ofed \
compile-with "${OFED_C}"
ofed/drivers/infiniband/core/ib_agent.c optional ofed \
Index: sys/kern/uipc_mbufhash.c
===================================================================
--- sys/kern/uipc_mbufhash.c
+++ sys/kern/uipc_mbufhash.c
@@ -28,6 +28,7 @@
#include <sys/fnv_hash.h>
#include <net/ethernet.h>
+#include <net/infiniband.h>
#if defined(INET) || defined(INET6)
#include <netinet/in.h>
@@ -42,7 +43,7 @@
#endif
static const void *
-m_ether_tcpip_hash_gethdr(const struct mbuf *m, const u_int off,
+m_common_hash_gethdr(const struct mbuf *m, const u_int off,
const u_int len, void *buf)
{
@@ -65,9 +66,18 @@
}
uint32_t
-m_ether_tcpip_hash(const uint32_t flags, const struct mbuf *m,
- const uint32_t key)
+m_infiniband_tcpip_hash_init(void)
{
+ uint32_t seed;
+
+ seed = arc4random();
+ return (fnv_32_buf(&seed, sizeof(seed), FNV1_32_INIT));
+}
+
+static inline uint32_t
+m_tcpip_hash(const uint32_t flags, const struct mbuf *m,
+ uint32_t p, int off, const uint16_t etype)
+{
union {
#ifdef INET
struct ip ip;
@@ -75,49 +85,19 @@
#ifdef INET6
struct ip6_hdr ip6;
#endif
- struct ether_vlan_header vlan;
uint32_t port;
} buf;
- const struct ether_header *eh;
- const struct ether_vlan_header *vlan;
#ifdef INET
const struct ip *ip;
#endif
#ifdef INET6
const struct ip6_hdr *ip6;
#endif
- uint32_t p;
- int off;
- uint16_t etype;
- p = key;
- off = sizeof(*eh);
- if (m->m_len < off)
- goto done;
- eh = mtod(m, struct ether_header *);
- etype = ntohs(eh->ether_type);
- if (flags & MBUF_HASHFLAG_L2) {
- p = fnv_32_buf(&eh->ether_shost, ETHER_ADDR_LEN, p);
- p = fnv_32_buf(&eh->ether_dhost, ETHER_ADDR_LEN, p);
- }
- /* Special handling for encapsulating VLAN frames */
- if ((m->m_flags & M_VLANTAG) && (flags & MBUF_HASHFLAG_L2)) {
- p = fnv_32_buf(&m->m_pkthdr.ether_vtag,
- sizeof(m->m_pkthdr.ether_vtag), p);
- } else if (etype == ETHERTYPE_VLAN) {
- vlan = m_ether_tcpip_hash_gethdr(m, off, sizeof(*vlan), &buf);
- if (vlan == NULL)
- goto done;
-
- if (flags & MBUF_HASHFLAG_L2)
- p = fnv_32_buf(&vlan->evl_tag, sizeof(vlan->evl_tag), p);
- etype = ntohs(vlan->evl_proto);
- off += sizeof(*vlan) - sizeof(*eh);
- }
switch (etype) {
#ifdef INET
case ETHERTYPE_IP:
- ip = m_ether_tcpip_hash_gethdr(m, off, sizeof(*ip), &buf);
+ ip = m_common_hash_gethdr(m, off, sizeof(*ip), &buf);
if (ip == NULL)
break;
if (flags & MBUF_HASHFLAG_L3) {
@@ -136,7 +116,7 @@
if (iphlen < sizeof(*ip))
break;
off += iphlen;
- ports = m_ether_tcpip_hash_gethdr(m,
+ ports = m_common_hash_gethdr(m,
off, sizeof(*ports), &buf);
if (ports == NULL)
break;
@@ -150,7 +130,7 @@
#endif
#ifdef INET6
case ETHERTYPE_IPV6:
- ip6 = m_ether_tcpip_hash_gethdr(m, off, sizeof(*ip6), &buf);
+ ip6 = m_common_hash_gethdr(m, off, sizeof(*ip6), &buf);
if (ip6 == NULL)
break;
if (flags & MBUF_HASHFLAG_L3) {
@@ -169,6 +149,62 @@
default:
break;
}
-done:
return (p);
}
+
+uint32_t
+m_ether_tcpip_hash(const uint32_t flags, const struct mbuf *m,
+ uint32_t p)
+{
+ union {
+ struct ether_vlan_header vlan;
+ } buf;
+ const struct ether_header *eh;
+ const struct ether_vlan_header *vlan;
+ int off;
+ uint16_t etype;
+
+ off = sizeof(*eh);
+ if (m->m_len < off)
+ return (p);
+ eh = mtod(m, struct ether_header *);
+ etype = ntohs(eh->ether_type);
+ if (flags & MBUF_HASHFLAG_L2) {
+ p = fnv_32_buf(&eh->ether_shost, ETHER_ADDR_LEN, p);
+ p = fnv_32_buf(&eh->ether_dhost, ETHER_ADDR_LEN, p);
+ }
+ /* Special handling for encapsulating VLAN frames */
+ if ((m->m_flags & M_VLANTAG) && (flags & MBUF_HASHFLAG_L2)) {
+ p = fnv_32_buf(&m->m_pkthdr.ether_vtag,
+ sizeof(m->m_pkthdr.ether_vtag), p);
+ } else if (etype == ETHERTYPE_VLAN) {
+ vlan = m_common_hash_gethdr(m, off, sizeof(*vlan), &buf);
+ if (vlan == NULL)
+ return (p);
+
+ if (flags & MBUF_HASHFLAG_L2)
+ p = fnv_32_buf(&vlan->evl_tag, sizeof(vlan->evl_tag), p);
+ etype = ntohs(vlan->evl_proto);
+ off += sizeof(*vlan) - sizeof(*eh);
+ }
+ return (m_tcpip_hash(flags, m, p, off, etype));
+}
+
+uint32_t
+m_infiniband_tcpip_hash(const uint32_t flags, const struct mbuf *m,
+ uint32_t p)
+{
+ const struct infiniband_header *ibh;
+ int off;
+ uint16_t etype;
+
+ off = sizeof(*ibh);
+ if (m->m_len < off)
+ return (p);
+ ibh = mtod(m, struct infiniband_header *);
+ etype = ntohs(ibh->ib_protocol);
+ if (flags & MBUF_HASHFLAG_L2)
+ p = fnv_32_buf(&ibh->ib_hwaddr, INFINIBAND_ADDR_LEN, p);
+
+ return (m_tcpip_hash(flags, m, p, off, etype));
+}
Index: sys/modules/Makefile
===================================================================
--- sys/modules/Makefile
+++ sys/modules/Makefile
@@ -154,6 +154,7 @@
${_if_gif} \
${_if_gre} \
${_if_me} \
+ if_infiniband \
if_lagg \
${_if_ndis} \
${_if_stf} \
Index: sys/modules/if_infiniband/Makefile
===================================================================
--- sys/modules/if_infiniband/Makefile
+++ sys/modules/if_infiniband/Makefile
@@ -0,0 +1,10 @@
+# $FreeBSD$
+
+.PATH: ${SRCTOP}/sys/net
+
+KMOD= if_infiniband
+SRCS= if_infiniband.c \
+ opt_inet.h \
+ opt_inet6.h
+
+.include <bsd.kmod.mk>
Index: sys/net/ieee8023ad_lacp.c
===================================================================
--- sys/net/ieee8023ad_lacp.c
+++ sys/net/ieee8023ad_lacp.c
@@ -54,6 +54,7 @@
#include <net/if_var.h>
#include <net/if_dl.h>
#include <net/ethernet.h>
+#include <net/infiniband.h>
#include <net/if_media.h>
#include <net/if_types.h>
Index: sys/net/if_ethersubr.c
===================================================================
--- sys/net/if_ethersubr.c
+++ sys/net/if_ethersubr.c
@@ -110,7 +110,7 @@
void (*bridge_dn_p)(struct mbuf *, struct ifnet *);
/* if_lagg(4) support */
-struct mbuf *(*lagg_input_p)(struct ifnet *, struct mbuf *);
+struct mbuf *(*lagg_input_ethernet_p)(struct ifnet *, struct mbuf *);
static const u_char etherbroadcastaddr[ETHER_ADDR_LEN] =
{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
@@ -601,9 +601,9 @@
/* Handle input from a lagg(4) port */
if (ifp->if_type == IFT_IEEE8023ADLAG) {
- KASSERT(lagg_input_p != NULL,
+ KASSERT(lagg_input_ethernet_p != NULL,
("%s: if_lagg not loaded!", __func__));
- m = (*lagg_input_p)(ifp, m);
+ m = (*lagg_input_ethernet_p)(ifp, m);
if (m != NULL)
ifp = m->m_pkthdr.rcvif;
else {
Index: sys/net/if_infiniband.c
===================================================================
--- sys/net/if_infiniband.c
+++ sys/net/if_infiniband.c
@@ -0,0 +1,539 @@
+/*-
+ * Copyright (c) 2020 Mellanox Technologies. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "opt_inet.h"
+#include "opt_inet6.h"
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD:");
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/eventhandler.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+#include <sys/devctl.h>
+#include <sys/module.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/route.h>
+#include <net/ethernet.h>
+#include <net/infiniband.h>
+#include <net/bpf.h>
+#include <net/if_llatbl.h>
+#include <net/netisr.h>
+#include <net/if_dl.h>
+#include <net/if_types.h>
+#include <net/if_media.h>
+#include <net/if_lagg.h>
+
+#include <netinet/in.h>
+#include <netinet/if_ether.h>
+#include <netinet/ip6.h>
+
+#include <netinet6/in6_var.h>
+#include <netinet6/nd6.h>
+
+#include <security/mac/mac_framework.h>
+
+/* if_lagg(4) support */
+struct mbuf *(*lagg_input_infiniband_p)(struct ifnet *, struct mbuf *);
+
+#ifdef INET
+static inline void
+infiniband_ipv4_multicast_map(uint32_t addr,
+ const uint8_t *broadcast, uint8_t *buf)
+{
+ uint8_t scope;
+
+ addr = ntohl(addr);
+ scope = broadcast[5] & 0xF;
+
+ buf[0] = 0;
+ buf[1] = 0xff;
+ buf[2] = 0xff;
+ buf[3] = 0xff;
+ buf[4] = 0xff;
+ buf[5] = 0x10 | scope;
+ buf[6] = 0x40;
+ buf[7] = 0x1b;
+ buf[8] = broadcast[8];
+ buf[9] = broadcast[9];
+ buf[10] = 0;
+ buf[11] = 0;
+ buf[12] = 0;
+ buf[13] = 0;
+ buf[14] = 0;
+ buf[15] = 0;
+ buf[16] = (addr >> 24) & 0xff;
+ buf[17] = (addr >> 16) & 0xff;
+ buf[18] = (addr >> 8) & 0xff;
+ buf[19] = addr & 0xff;
+}
+#endif
+
+#ifdef INET6
+static inline void
+infiniband_ipv6_multicast_map(const struct in6_addr *addr,
+ const uint8_t *broadcast, uint8_t *buf)
+{
+ uint8_t scope;
+
+ scope = broadcast[5] & 0xF;
+
+ buf[0] = 0;
+ buf[1] = 0xff;
+ buf[2] = 0xff;
+ buf[3] = 0xff;
+ buf[4] = 0xff;
+ buf[5] = 0x10 | scope;
+ buf[6] = 0x60;
+ buf[7] = 0x1b;
+ buf[8] = broadcast[8];
+ buf[9] = broadcast[9];
+ memcpy(&buf[10], &addr->s6_addr[6], 10);
+}
+#endif
+
+/*
+ * This is for clients that have an infiniband_header in the mbuf.
+ */
+void
+infiniband_bpf_mtap(struct ifnet *ifp, struct mbuf *mb)
+{
+ struct infiniband_header *ibh;
+ struct ether_header eh;
+
+ if (mb->m_len < sizeof(*ibh))
+ return;
+
+ ibh = mtod(mb, struct infiniband_header *);
+ eh.ether_type = ibh->ib_protocol;
+ memset(eh.ether_shost, 0, ETHER_ADDR_LEN);
+ memcpy(eh.ether_dhost, ibh->ib_hwaddr + 4, ETHER_ADDR_LEN);
+ mb->m_data += sizeof(*ibh);
+ mb->m_len -= sizeof(*ibh);
+ mb->m_pkthdr.len -= sizeof(*ibh);
+ bpf_mtap2(ifp->if_bpf, &eh, sizeof(eh), mb);
+ mb->m_data -= sizeof(*ibh);
+ mb->m_len += sizeof(*ibh);
+ mb->m_pkthdr.len += sizeof(*ibh);
+}
+
+/*
+ * Infiniband output routine.
+ */
+static int
+infiniband_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+ struct route *ro)
+{
+ uint8_t edst[INFINIBAND_ADDR_LEN];
+#if defined(INET) || defined(INET6)
+ struct llentry *lle = NULL;
+#endif
+ struct infiniband_header *ibh;
+ int error = 0;
+ uint16_t type;
+ bool is_gw;
+
+ NET_EPOCH_ASSERT();
+
+ is_gw = ((ro != NULL) && (ro->ro_flags & RT_HAS_GW) != 0);
+
+#ifdef MAC
+ error = mac_ifnet_check_transmit(ifp, m);
+ if (error)
+ goto bad;
+#endif
+
+ M_PROFILE(m);
+ if (ifp->if_flags & IFF_MONITOR) {
+ error = ENETDOWN;
+ goto bad;
+ }
+ if (!((ifp->if_flags & IFF_UP) &&
+ (ifp->if_drv_flags & IFF_DRV_RUNNING))) {
+ error = ENETDOWN;
+ goto bad;
+ }
+
+ switch (dst->sa_family) {
+ case AF_LINK:
+ goto output;
+#ifdef INET
+ case AF_INET:
+ if (lle != NULL && (lle->la_flags & LLE_VALID)) {
+ memcpy(edst, lle->ll_addr, sizeof(edst));
+ } else if (m->m_flags & M_MCAST) {
+ infiniband_ipv4_multicast_map(
+ ((const struct sockaddr_in *)dst)->sin_addr.s_addr,
+ ifp->if_broadcastaddr, edst);
+ } else {
+ error = arpresolve(ifp, is_gw, m, dst, edst, NULL, NULL);
+ if (error) {
+ if (error == EWOULDBLOCK)
+ error = 0;
+ m = NULL; /* mbuf is consumed by resolver */
+ goto bad;
+ }
+ }
+ type = htons(ETHERTYPE_IP);
+ break;
+ case AF_ARP: {
+ struct arphdr *ah;
+
+ if (m->m_len < sizeof(*ah)) {
+ error = EINVAL;
+ goto bad;
+ }
+
+ ah = mtod(m, struct arphdr *);
+
+ if (m->m_len < arphdr_len(ah)) {
+ error = EINVAL;
+ goto bad;
+ }
+ ah->ar_hrd = htons(ARPHRD_INFINIBAND);
+
+ switch (ntohs(ah->ar_op)) {
+ case ARPOP_REVREQUEST:
+ case ARPOP_REVREPLY:
+ type = htons(ETHERTYPE_REVARP);
+ break;
+ case ARPOP_REQUEST:
+ case ARPOP_REPLY:
+ default:
+ type = htons(ETHERTYPE_ARP);
+ break;
+ }
+
+ if (m->m_flags & M_BCAST) {
+ memcpy(edst, ifp->if_broadcastaddr, INFINIBAND_ADDR_LEN);
+ } else {
+ if (ah->ar_hln != INFINIBAND_ADDR_LEN) {
+ error = EINVAL;
+ goto bad;
+ }
+ memcpy(edst, ar_tha(ah), INFINIBAND_ADDR_LEN);
+ }
+ break;
+ }
+#endif
+#ifdef INET6
+ case AF_INET6: {
+ const struct ip6_hdr *ip6;
+
+ ip6 = mtod(m, const struct ip6_hdr *);
+ if (m->m_len < sizeof(*ip6)) {
+ error = EINVAL;
+ goto bad;
+ } else if (lle != NULL && (lle->la_flags & LLE_VALID)) {
+ memcpy(edst, lle->ll_addr, sizeof(edst));
+ } else if (m->m_flags & M_MCAST) {
+ infiniband_ipv6_multicast_map(
+ &((const struct sockaddr_in6 *)dst)->sin6_addr,
+ ifp->if_broadcastaddr, edst);
+ } else if (ip6->ip6_nxt == IPPROTO_ICMPV6) {
+ memcpy(edst, ifp->if_broadcastaddr, INFINIBAND_ADDR_LEN);
+ } else {
+ error = nd6_resolve(ifp, is_gw, m, dst, edst, NULL, NULL);
+ if (error) {
+ if (error == EWOULDBLOCK)
+ error = 0;
+ m = NULL; /* mbuf is consumed by resolver */
+ goto bad;
+ }
+ }
+ type = htons(ETHERTYPE_IPV6);
+ break;
+ }
+#endif
+ default:
+ error = EAFNOSUPPORT;
+ goto bad;
+ }
+
+ /*
+ * Add local net header. If no space in first mbuf,
+ * allocate another.
+ */
+ M_PREPEND(m, INFINIBAND_HDR_LEN, M_NOWAIT);
+ if (m == NULL) {
+ error = ENOBUFS;
+ goto bad;
+ }
+ ibh = mtod(m, struct infiniband_header *);
+
+ ibh->ib_protocol = type;
+ memcpy(ibh->ib_hwaddr, edst, sizeof(edst));
+
+ /*
+ * Queue message on interface, update output statistics if
+ * successful, and start output if interface not yet active.
+ */
+output:
+ return (ifp->if_transmit(ifp, m));
+bad:
+ if (m != NULL)
+ m_freem(m);
+ return (error);
+}
+
+/*
+ * Process a received Infiniband packet.
+ */
+static void
+infiniband_input(struct ifnet *ifp, struct mbuf *m)
+{
+ struct infiniband_header *ibh;
+ struct epoch_tracker et;
+ int isr;
+
+ CURVNET_SET_QUIET(ifp->if_vnet);
+
+ if ((ifp->if_flags & IFF_UP) == 0) {
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
+ m_freem(m);
+ goto done;
+ }
+
+ ibh = mtod(m, struct infiniband_header *);
+
+ /*
+ * Reset layer specific mbuf flags to avoid confusing upper
+ * layers:
+ */
+ m->m_flags &= ~M_VLANTAG;
+ m_clrprotoflags(m);
+
+ if (INFINIBAND_IS_MULTICAST(ibh->ib_hwaddr)) {
+ if (memcmp(ibh->ib_hwaddr, ifp->if_broadcastaddr,
+ ifp->if_addrlen) == 0)
+ m->m_flags |= M_BCAST;
+ else
+ m->m_flags |= M_MCAST;
+ if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
+ }
+
+ /* Let BPF have it before we strip the header. */
+ INFINIBAND_BPF_MTAP(ifp, m);
+
+ /* Allow monitor mode to claim this frame, after stats are updated. */
+ if (ifp->if_flags & IFF_MONITOR) {
+ m_freem(m);
+ goto done;
+ }
+
+ /* Direct packet to correct FIB based on interface config. */
+ M_SETFIB(m, ifp->if_fib);
+
+ /* Handle input from a lagg<N> port */
+ if (ifp->if_type == IFT_INFINIBANDLAG) {
+ KASSERT(lagg_input_infiniband_p != NULL,
+ ("%s: if_lagg not loaded!", __func__));
+ m = (*lagg_input_infiniband_p)(ifp, m);
+ if (__predict_false(m == NULL))
+ goto done;
+ ifp = m->m_pkthdr.rcvif;
+ }
+
+ /*
+ * Dispatch frame to upper layer.
+ */
+ switch (ibh->ib_protocol) {
+#ifdef INET
+ case htons(ETHERTYPE_IP):
+ isr = NETISR_IP;
+ break;
+
+ case htons(ETHERTYPE_ARP):
+ if (ifp->if_flags & IFF_NOARP) {
+ /* Discard packet if ARP is disabled on interface */
+ m_freem(m);
+ goto done;
+ }
+ isr = NETISR_ARP;
+ break;
+#endif
+#ifdef INET6
+ case htons(ETHERTYPE_IPV6):
+ isr = NETISR_IPV6;
+ break;
+#endif
+ default:
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
+ m_freem(m);
+ goto done;
+ }
+
+ /* Strip off the Infiniband header. */
+ m_adj(m, INFINIBAND_HDR_LEN);
+
+#ifdef MAC
+ /*
+ * Tag the mbuf with an appropriate MAC label before any other
+ * consumers can get to it.
+ */
+ mac_ifnet_create_mbuf(ifp, m);
+#endif
+ /* Allow monitor mode to claim this frame, after stats are updated. */
+ NET_EPOCH_ENTER(et);
+ netisr_dispatch(isr, m);
+ NET_EPOCH_EXIT(et);
+done:
+ CURVNET_RESTORE();
+}
+
+static int
+infiniband_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
+ struct sockaddr *sa)
+{
+ struct sockaddr_dl *sdl;
+#ifdef INET
+ struct sockaddr_in *sin;
+#endif
+#ifdef INET6
+ struct sockaddr_in6 *sin6;
+#endif
+ uint8_t *e_addr;
+
+ switch (sa->sa_family) {
+ case AF_LINK:
+ /*
+ * No mapping needed. Just check that it's a valid MC address.
+ */
+ sdl = (struct sockaddr_dl *)sa;
+ e_addr = LLADDR(sdl);
+ if (!INFINIBAND_IS_MULTICAST(e_addr))
+ return (EADDRNOTAVAIL);
+ *llsa = NULL;
+ return 0;
+
+#ifdef INET
+ case AF_INET:
+ sin = (struct sockaddr_in *)sa;
+ if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
+ return (EADDRNOTAVAIL);
+ sdl = link_init_sdl(ifp, *llsa, IFT_INFINIBAND);
+ sdl->sdl_alen = INFINIBAND_ADDR_LEN;
+ e_addr = LLADDR(sdl);
+ infiniband_ipv4_multicast_map(sin->sin_addr.s_addr, ifp->if_broadcastaddr,
+ e_addr);
+ *llsa = (struct sockaddr *)sdl;
+ return (0);
+#endif
+#ifdef INET6
+ case AF_INET6:
+ sin6 = (struct sockaddr_in6 *)sa;
+ /*
+ * An IP6 address of 0 means listen to all of the
+ * multicast address used for IP6. This has no meaning
+ * in infiniband.
+ */
+ if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
+ return (EADDRNOTAVAIL);
+ if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
+ return (EADDRNOTAVAIL);
+ sdl = link_init_sdl(ifp, *llsa, IFT_INFINIBAND);
+ sdl->sdl_alen = INFINIBAND_ADDR_LEN;
+ e_addr = LLADDR(sdl);
+ infiniband_ipv6_multicast_map(&sin6->sin6_addr, ifp->if_broadcastaddr, e_addr);
+ *llsa = (struct sockaddr *)sdl;
+ return (0);
+#endif
+ default:
+ return (EAFNOSUPPORT);
+ }
+}
+
+void
+infiniband_ifattach(struct ifnet *ifp, const uint8_t *lla, const uint8_t *llb)
+{
+ struct sockaddr_dl *sdl;
+ struct ifaddr *ifa;
+ int i;
+
+ ifp->if_addrlen = INFINIBAND_ADDR_LEN;
+ ifp->if_hdrlen = INFINIBAND_HDR_LEN;
+ ifp->if_mtu = INFINIBAND_MTU;
+ if_attach(ifp);
+ ifp->if_output = infiniband_output;
+ ifp->if_input = infiniband_input;
+ ifp->if_resolvemulti = infiniband_resolvemulti;
+
+ if (ifp->if_baudrate == 0)
+ ifp->if_baudrate = IF_Gbps(10); /* default value */
+ if (llb != NULL)
+ ifp->if_broadcastaddr = llb;
+
+ ifa = ifp->if_addr;
+ KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__));
+ sdl = (struct sockaddr_dl *)ifa->ifa_addr;
+ sdl->sdl_type = IFT_INFINIBAND;
+ sdl->sdl_alen = ifp->if_addrlen;
+
+ if (lla != NULL) {
+ memcpy(LLADDR(sdl), lla, ifp->if_addrlen);
+
+ if (ifp->if_hw_addr != NULL)
+ memcpy(ifp->if_hw_addr, lla, ifp->if_addrlen);
+ } else {
+ lla = LLADDR(sdl);
+ }
+
+ /* Attach ethernet compatible network device */
+ bpfattach(ifp, DLT_EN10MB, ETHER_HDR_LEN);
+
+ /* Announce Infiniband MAC address if non-zero. */
+ for (i = 0; i < ifp->if_addrlen; i++)
+ if (lla[i] != 0)
+ break;
+ if (i != ifp->if_addrlen)
+ if_printf(ifp, "Infiniband address: %20D\n", lla, ":");
+
+ /* Add necessary bits are setup; announce it now. */
+ EVENTHANDLER_INVOKE(infiniband_ifattach_event, ifp);
+
+ if (IS_DEFAULT_VNET(curvnet))
+ devctl_notify("INFINIBAND", ifp->if_xname, "IFATTACH", NULL);
+}
+
+/*
+ * Perform common duties while detaching an Infiniband interface
+ */
+void
+infiniband_ifdetach(struct ifnet *ifp)
+{
+ bpfdetach(ifp);
+ if_detach(ifp);
+}
+
+static moduledata_t infiniband_mod = {
+ .name = "if_infiniband",
+};
+
+DECLARE_MODULE(if_infiniband, infiniband_mod, SI_SUB_INIT_IF, SI_ORDER_ANY);
+MODULE_VERSION(if_infiniband, 1);
Index: sys/net/if_lagg.h
===================================================================
--- sys/net/if_lagg.h
+++ sys/net/if_lagg.h
@@ -72,7 +72,33 @@
{ "default", LAGG_PROTO_DEFAULT } \
}
+/* Supported lagg TYPEs */
+typedef enum {
+ LAGG_TYPE_ETHERNET = 0, /* ethernet (default) */
+ LAGG_TYPE_INFINIBAND, /* infiniband */
+ LAGG_TYPE_MAX,
+} lagg_type;
+
+struct lagg_types {
+ const char *lt_name;
+ lagg_type lt_value;
+};
+
+#define LAGG_TYPE_DEFAULT LAGG_TYPE_ETHERNET
+#define LAGG_TYPES { \
+ { "ethernet", LAGG_TYPE_ETHERNET }, \
+ { "infiniband", LAGG_TYPE_INFINIBAND }, \
+}
+
/*
+ * lagg create clone params
+ */
+struct iflaggparam {
+ uint8_t lagg_type; /* see LAGG_TYPE_XXX */
+ uint8_t reserved[3];
+};
+
+/*
* lagg ioctls.
*/
@@ -206,7 +232,7 @@
struct lagg_softc {
struct ifnet *sc_ifp; /* virtual interface */
- struct rmlock sc_mtx;
+ struct mtx sc_mtx; /* watchdog mutex */
struct sx sc_sx;
int sc_proto; /* lagg protocol */
u_int sc_count; /* number of ports */
@@ -230,12 +256,15 @@
u_int sc_opts;
int flowid_shift; /* shift the flowid */
struct lagg_counters detached_counters; /* detached ports sum */
+ struct callout sc_watchdog; /* watchdog timer */
};
struct lagg_port {
struct ifnet *lp_ifp; /* physical interface */
struct lagg_softc *lp_softc; /* parent lagg */
- uint8_t lp_lladdr[ETHER_ADDR_LEN];
+#define LAGG_ADDR_LEN \
+ MAX(INFINIBAND_ADDR_LEN, ETHER_ADDR_LEN)
+ uint8_t lp_lladdr[LAGG_ADDR_LEN];
u_char lp_iftype; /* interface type */
uint32_t lp_prio; /* port priority */
@@ -257,7 +286,8 @@
struct epoch_context lp_epoch_ctx;
};
-extern struct mbuf *(*lagg_input_p)(struct ifnet *, struct mbuf *);
+extern struct mbuf *(*lagg_input_ethernet_p)(struct ifnet *, struct mbuf *);
+extern struct mbuf *(*lagg_input_infiniband_p)(struct ifnet *, struct mbuf *);
extern void (*lagg_linkstate_p)(struct ifnet *, int );
int lagg_enqueue(struct ifnet *, struct mbuf *);
Index: sys/net/if_lagg.c
===================================================================
--- sys/net/if_lagg.c
+++ sys/net/if_lagg.c
@@ -55,6 +55,7 @@
#include <net/bpf.h>
#include <net/route.h>
#include <net/vnet.h>
+#include <net/infiniband.h>
#if defined(INET) || defined(INET6)
#include <netinet/in.h>
@@ -131,7 +132,8 @@
static void lagg_capabilities(struct lagg_softc *);
static int lagg_port_create(struct lagg_softc *, struct ifnet *);
static int lagg_port_destroy(struct lagg_port *, int);
-static struct mbuf *lagg_input(struct ifnet *, struct mbuf *);
+static struct mbuf *lagg_input_ethernet(struct ifnet *, struct mbuf *);
+static struct mbuf *lagg_input_infiniband(struct ifnet *, struct mbuf *);
static void lagg_linkstate(struct lagg_softc *);
static void lagg_port_state(struct ifnet *, int);
static int lagg_port_ioctl(struct ifnet *, u_long, caddr_t);
@@ -164,7 +166,8 @@
int (*func)(struct ifnet *, int));
static int lagg_setflags(struct lagg_port *, int status);
static uint64_t lagg_get_counter(struct ifnet *ifp, ift_counter cnt);
-static int lagg_transmit(struct ifnet *, struct mbuf *);
+static int lagg_transmit_ethernet(struct ifnet *, struct mbuf *);
+static int lagg_transmit_infiniband(struct ifnet *, struct mbuf *);
static void lagg_qflush(struct ifnet *);
static int lagg_media_change(struct ifnet *);
static void lagg_media_status(struct ifnet *, struct ifmediareq *);
@@ -327,7 +330,8 @@
switch (type) {
case MOD_LOAD:
- lagg_input_p = lagg_input;
+ lagg_input_ethernet_p = lagg_input_ethernet;
+ lagg_input_infiniband_p = lagg_input_infiniband;
lagg_linkstate_p = lagg_port_state;
lagg_detach_cookie = EVENTHANDLER_REGISTER(
ifnet_departure_event, lagg_port_ifdetach, NULL,
@@ -336,7 +340,8 @@
case MOD_UNLOAD:
EVENTHANDLER_DEREGISTER(ifnet_departure_event,
lagg_detach_cookie);
- lagg_input_p = NULL;
+ lagg_input_ethernet_p = NULL;
+ lagg_input_infiniband_p = NULL;
lagg_linkstate_p = NULL;
break;
default:
@@ -353,6 +358,7 @@
DECLARE_MODULE(if_lagg, lagg_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
MODULE_VERSION(if_lagg, 1);
+MODULE_DEPEND(if_lagg, if_infiniband, 1, 1, 1);
static void
lagg_proto_attach(struct lagg_softc *sc, lagg_proto pr)
@@ -504,18 +510,48 @@
static int
lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
{
+ struct iflaggparam iflp;
struct lagg_softc *sc;
struct ifnet *ifp;
- static const u_char eaddr[6]; /* 00:00:00:00:00:00 */
+ int if_type;
+ int error;
+ static const uint8_t eaddr[LAGG_ADDR_LEN];
+ static const uint8_t ib_bcast_addr[INFINIBAND_ADDR_LEN] = {
+ 0x00, 0xff, 0xff, 0xff,
+ 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff
+ };
+ if (params != NULL) {
+ error = copyin(params, &iflp, sizeof(iflp));
+ if (error)
+ return (error);
+
+ switch (iflp.lagg_type) {
+ case LAGG_TYPE_ETHERNET:
+ if_type = IFT_ETHER;
+ break;
+ case LAGG_TYPE_INFINIBAND:
+ if_type = IFT_INFINIBAND;
+ break;
+ default:
+ return (EINVAL);
+ }
+ } else {
+ if_type = IFT_ETHER;
+ }
+
sc = malloc(sizeof(*sc), M_LAGG, M_WAITOK|M_ZERO);
- ifp = sc->sc_ifp = if_alloc(IFT_ETHER);
+ ifp = sc->sc_ifp = if_alloc(if_type);
if (ifp == NULL) {
free(sc, M_LAGG);
return (ENOSPC);
}
LAGG_SX_INIT(sc);
+ mtx_init(&sc->sc_mtx, "lagg-mtx", NULL, MTX_DEF);
+ callout_init_mtx(&sc->sc_watchdog, &sc->sc_mtx, 0);
+
LAGG_XLOCK(sc);
if (V_def_use_flowid)
sc->sc_opts |= LAGG_OPT_USE_FLOWID;
@@ -530,15 +566,25 @@
CK_SLIST_INIT(&sc->sc_ports);
- /* Initialise pseudo media types */
- ifmedia_init(&sc->sc_media, 0, lagg_media_change,
- lagg_media_status);
- ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
- ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
+ switch (if_type) {
+ case IFT_ETHER:
+ /* Initialise pseudo media types */
+ ifmedia_init(&sc->sc_media, 0, lagg_media_change,
+ lagg_media_status);
+ ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
+ ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
- if_initname(ifp, laggname, unit);
+ if_initname(ifp, laggname, unit);
+ ifp->if_transmit = lagg_transmit_ethernet;
+ break;
+ case IFT_INFINIBAND:
+ if_initname(ifp, laggname, unit);
+ ifp->if_transmit = lagg_transmit_infiniband;
+ break;
+ default:
+ break;
+ }
ifp->if_softc = sc;
- ifp->if_transmit = lagg_transmit;
ifp->if_qflush = lagg_qflush;
ifp->if_init = lagg_init;
ifp->if_ioctl = lagg_ioctl;
@@ -555,9 +601,18 @@
/*
* Attach as an ordinary ethernet device, children will be attached
- * as special device IFT_IEEE8023ADLAG.
+ * as special device IFT_IEEE8023ADLAG or IFT_INFINIBANDLAG.
*/
- ether_ifattach(ifp, eaddr);
+ switch (if_type) {
+ case IFT_ETHER:
+ ether_ifattach(ifp, eaddr);
+ break;
+ case IFT_INFINIBAND:
+ infiniband_ifattach(ifp, eaddr, ib_bcast_addr);
+ break;
+ default:
+ break;
+ }
sc->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
lagg_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
@@ -595,14 +650,24 @@
lagg_proto_detach(sc);
LAGG_XUNLOCK(sc);
- ifmedia_removeall(&sc->sc_media);
- ether_ifdetach(ifp);
+ switch (ifp->if_type) {
+ case IFT_ETHER:
+ ifmedia_removeall(&sc->sc_media);
+ ether_ifdetach(ifp);
+ break;
+ case IFT_INFINIBAND:
+ infiniband_ifdetach(ifp);
+ break;
+ default:
+ break;
+ }
if_free(ifp);
LAGG_LIST_LOCK();
SLIST_REMOVE(&V_lagg_list, sc, lagg_softc, sc_entries);
LAGG_LIST_UNLOCK();
+ mtx_destroy(&sc->sc_mtx);
LAGG_SX_DESTROY(sc);
free(sc, M_LAGG);
}
@@ -669,6 +734,7 @@
struct lagg_port *lp, *tlp;
struct ifreq ifr;
int error, i, oldmtu;
+ int if_type;
uint64_t *pval;
LAGG_XLOCK_ASSERT(sc);
@@ -695,9 +761,22 @@
return (EBUSY);
}
- /* XXX Disallow non-ethernet interfaces (this should be any of 802) */
- if (ifp->if_type != IFT_ETHER && ifp->if_type != IFT_L2VLAN)
- return (EPROTONOSUPPORT);
+ switch (sc->sc_ifp->if_type) {
+ case IFT_ETHER:
+ /* XXX Disallow non-ethernet interfaces (this should be any of 802) */
+ if (ifp->if_type != IFT_ETHER && ifp->if_type != IFT_L2VLAN)
+ return (EPROTONOSUPPORT);
+ if_type = IFT_IEEE8023ADLAG;
+ break;
+ case IFT_INFINIBAND:
+ /* XXX Disallow non-infiniband interfaces */
+ if (ifp->if_type != IFT_INFINIBAND)
+ return (EPROTONOSUPPORT);
+ if_type = IFT_INFINIBANDLAG;
+ break;
+ default:
+ break;
+ }
/* Allow the first Ethernet member to define the MTU */
oldmtu = -1;
@@ -754,14 +833,14 @@
if_ref(ifp);
lp->lp_ifp = ifp;
- bcopy(IF_LLADDR(ifp), lp->lp_lladdr, ETHER_ADDR_LEN);
+ bcopy(IF_LLADDR(ifp), lp->lp_lladdr, ifp->if_addrlen);
lp->lp_ifcapenable = ifp->if_capenable;
if (CK_SLIST_EMPTY(&sc->sc_ports)) {
- bcopy(IF_LLADDR(ifp), IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
+ bcopy(IF_LLADDR(ifp), IF_LLADDR(sc->sc_ifp), ifp->if_addrlen);
lagg_proto_lladdr(sc);
EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp);
} else {
- if_setlladdr(ifp, IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
+ if_setlladdr(ifp, IF_LLADDR(sc->sc_ifp), ifp->if_addrlen);
}
lagg_setflags(lp, 1);
@@ -770,7 +849,7 @@
/* Change the interface type */
lp->lp_iftype = ifp->if_type;
- ifp->if_type = IFT_IEEE8023ADLAG;
+ ifp->if_type = if_type;
ifp->if_lagg = lp;
lp->lp_ioctl = ifp->if_ioctl;
ifp->if_ioctl = lagg_port_ioctl;
@@ -887,15 +966,15 @@
/* Update the primary interface */
if (lp == sc->sc_primary) {
- uint8_t lladdr[ETHER_ADDR_LEN];
+ uint8_t lladdr[LAGG_ADDR_LEN];
if ((lp0 = CK_SLIST_FIRST(&sc->sc_ports)) == NULL)
- bzero(&lladdr, ETHER_ADDR_LEN);
+ bzero(&lladdr, LAGG_ADDR_LEN);
else
- bcopy(lp0->lp_lladdr, lladdr, ETHER_ADDR_LEN);
+ bcopy(lp0->lp_lladdr, lladdr, LAGG_ADDR_LEN);
sc->sc_primary = lp0;
if (sc->sc_destroying == 0) {
- bcopy(lladdr, IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
+ bcopy(lladdr, IF_LLADDR(sc->sc_ifp), sc->sc_ifp->if_addrlen);
lagg_proto_lladdr(sc);
EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp);
}
@@ -905,7 +984,7 @@
* as well, to switch from old lladdr to its 'real' one)
*/
CK_SLIST_FOREACH(lp_ptr, &sc->sc_ports, lp_entries)
- if_setlladdr(lp_ptr->lp_ifp, lladdr, ETHER_ADDR_LEN);
+ if_setlladdr(lp_ptr->lp_ifp, lladdr, lp_ptr->lp_ifp->if_addrlen);
}
if (lp->lp_ifflags)
@@ -914,7 +993,7 @@
if (lp->lp_detaching == 0) {
lagg_setflags(lp, 0);
lagg_setcaps(lp, lp->lp_ifcapenable);
- if_setlladdr(ifp, lp->lp_lladdr, ETHER_ADDR_LEN);
+ if_setlladdr(ifp, lp->lp_lladdr, ifp->if_addrlen);
}
/*
@@ -938,9 +1017,15 @@
int error = 0;
/* Should be checked by the caller */
- if (ifp->if_type != IFT_IEEE8023ADLAG ||
- (lp = ifp->if_lagg) == NULL || (sc = lp->lp_softc) == NULL)
+ switch (ifp->if_type) {
+ case IFT_IEEE8023ADLAG:
+ case IFT_INFINIBANDLAG:
+ if ((lp = ifp->if_lagg) == NULL || (sc = lp->lp_softc) == NULL)
+ goto fallback;
+ break;
+ default:
goto fallback;
+ }
switch (cmd) {
case SIOCGLAGGPORT:
@@ -1130,6 +1215,41 @@
}
static void
+lagg_watchdog_infiniband(void *arg)
+{
+ struct lagg_softc *sc;
+ struct lagg_port *lp;
+ struct ifnet *ifp;
+ struct ifnet *lp_ifp;
+
+ sc = arg;
+
+ /*
+ * Because infiniband nodes have a fixed mac address, we need
+ * to regularly update the link level address of the parent
+ * lagg<N> device instead. This operation does not have to be
+ * atomic.
+ */
+ LAGG_RLOCK();
+ lp = lagg_link_active(sc, sc->sc_primary);
+ if (lp != NULL) {
+ ifp = sc->sc_ifp;
+ lp_ifp = lp->lp_ifp;
+
+ if (ifp != NULL && lp_ifp != NULL &&
+ memcmp(IF_LLADDR(ifp), IF_LLADDR(lp_ifp), ifp->if_addrlen) != 0) {
+ memcpy(IF_LLADDR(ifp), IF_LLADDR(lp_ifp), ifp->if_addrlen);
+ CURVNET_SET(ifp->if_vnet);
+ EVENTHANDLER_INVOKE(iflladdr_event, ifp);
+ CURVNET_RESTORE();
+ }
+ }
+ LAGG_RUNLOCK();
+
+ callout_reset(&sc->sc_watchdog, hz, &lagg_watchdog_infiniband, arg);
+}
+
+static void
lagg_init(void *xsc)
{
struct lagg_softc *sc = (struct lagg_softc *)xsc;
@@ -1151,12 +1271,18 @@
*/
CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
if (memcmp(IF_LLADDR(ifp), IF_LLADDR(lp->lp_ifp),
- ETHER_ADDR_LEN) != 0)
- if_setlladdr(lp->lp_ifp, IF_LLADDR(ifp), ETHER_ADDR_LEN);
+ ifp->if_addrlen) != 0)
+ if_setlladdr(lp->lp_ifp, IF_LLADDR(ifp), ifp->if_addrlen);
}
lagg_proto_init(sc);
+ if (ifp->if_type == IFT_INFINIBAND) {
+ mtx_lock(&sc->sc_mtx);
+ lagg_watchdog_infiniband(sc);
+ mtx_unlock(&sc->sc_mtx);
+ }
+
LAGG_XUNLOCK(sc);
}
@@ -1173,6 +1299,12 @@
ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
lagg_proto_stop(sc);
+
+ mtx_lock(&sc->sc_mtx);
+ callout_stop(&sc->sc_watchdog);
+ mtx_unlock(&sc->sc_mtx);
+
+ callout_drain(&sc->sc_watchdog);
}
static int
@@ -1228,7 +1360,12 @@
error = EPROTONOSUPPORT;
break;
}
-
+ /* Infiniband only supports the failover protocol. */
+ if (ra->ra_proto != LAGG_PROTO_FAILOVER &&
+ ifp->if_type == IFT_INFINIBAND) {
+ error = EPROTONOSUPPORT;
+ break;
+ }
LAGG_XLOCK(sc);
lagg_proto_detach(sc);
LAGG_UNLOCK_ASSERT();
@@ -1546,7 +1683,10 @@
break;
case SIOCSIFMEDIA:
case SIOCGIFMEDIA:
- error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
+ if (ifp->if_type == IFT_INFINIBAND)
+ error = EINVAL;
+ else
+ error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
break;
case SIOCSIFCAP:
@@ -1855,7 +1995,7 @@
}
static int
-lagg_transmit(struct ifnet *ifp, struct mbuf *m)
+lagg_transmit_ethernet(struct ifnet *ifp, struct mbuf *m)
{
struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
int error;
@@ -1880,6 +2020,32 @@
return (error);
}
+static int
+lagg_transmit_infiniband(struct ifnet *ifp, struct mbuf *m)
+{
+ struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
+ int error;
+
+#if defined(KERN_TLS) || defined(RATELIMIT)
+ if (m->m_pkthdr.csum_flags & CSUM_SND_TAG)
+ MPASS(m->m_pkthdr.snd_tag->ifp == ifp);
+#endif
+ LAGG_RLOCK();
+ /* We need a Tx algorithm and at least one port */
+ if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) {
+ LAGG_RUNLOCK();
+ m_freem(m);
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+ return (ENXIO);
+ }
+
+ INFINIBAND_BPF_MTAP(ifp, m);
+
+ error = lagg_proto_start(sc, m);
+ LAGG_RUNLOCK();
+ return (error);
+}
+
/*
* The ifp->if_qflush entry point for lagg(4) is no-op.
*/
@@ -1889,7 +2055,7 @@
}
static struct mbuf *
-lagg_input(struct ifnet *ifp, struct mbuf *m)
+lagg_input_ethernet(struct ifnet *ifp, struct mbuf *m)
{
struct lagg_port *lp = ifp->if_lagg;
struct lagg_softc *sc = lp->lp_softc;
@@ -1916,6 +2082,34 @@
return (m);
}
+static struct mbuf *
+lagg_input_infiniband(struct ifnet *ifp, struct mbuf *m)
+{
+ struct lagg_port *lp = ifp->if_lagg;
+ struct lagg_softc *sc = lp->lp_softc;
+ struct ifnet *scifp = sc->sc_ifp;
+
+ LAGG_RLOCK();
+ if ((scifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
+ lp->lp_detaching != 0 ||
+ sc->sc_proto == LAGG_PROTO_NONE) {
+ LAGG_RUNLOCK();
+ m_freem(m);
+ return (NULL);
+ }
+
+ INFINIBAND_BPF_MTAP(scifp, m);
+
+ m = lagg_proto_input(sc, lp, m);
+ if (m != NULL && (scifp->if_flags & IFF_MONITOR) != 0) {
+ m_freem(m);
+ m = NULL;
+ }
+
+ LAGG_RUNLOCK();
+ return (m);
+}
+
static int
lagg_media_change(struct ifnet *ifp)
{
@@ -2236,7 +2430,10 @@
LAGG_XLOCK_ASSERT(sc);
lb = malloc(sizeof(struct lagg_lb), M_LAGG, M_WAITOK | M_ZERO);
- lb->lb_key = m_ether_tcpip_hash_init();
+ if (sc->sc_ifp->if_type == IFT_INFINIBAND)
+ lb->lb_key = m_infiniband_tcpip_hash_init();
+ else
+ lb->lb_key = m_ether_tcpip_hash_init();
sc->sc_psc = lb;
CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
@@ -2303,6 +2500,8 @@
if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) &&
M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
p = m->m_pkthdr.flowid >> sc->flowid_shift;
+ else if (sc->sc_ifp->if_type == IFT_INFINIBAND)
+ p = m_infiniband_tcpip_hash(sc->sc_flags, m, lb->lb_key);
else
p = m_ether_tcpip_hash(sc->sc_flags, m, lb->lb_key);
p %= sc->sc_count;
Index: sys/net/if_types.h
===================================================================
--- sys/net/if_types.h
+++ sys/net/if_types.h
@@ -242,6 +242,7 @@
IFT_OPTICALCHANNEL = 0xc3, /* Optical Channel */
IFT_OPTICALTRANSPORT = 0xc4, /* Optical Transport */
IFT_INFINIBAND = 0xc7, /* Infiniband */
+ IFT_INFINIBANDLAG = 0xc8, /* Infiniband Link Aggregate */
IFT_BRIDGE = 0xd1, /* Transparent bridge interface */
IFT_STF = 0xd7, /* 6to4 interface */
Index: sys/net/infiniband.h
===================================================================
--- sys/net/infiniband.h
+++ sys/net/infiniband.h
@@ -0,0 +1,80 @@
+/*-
+ * Copyright (c) 2020 Mellanox Technologies. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef __INFINIBAND_H__
+#define __INFINIBAND_H__
+
+#include <sys/cdefs.h>
+#include <sys/stdint.h>
+
+#define INFINIBAND_ADDR_LEN 20 /* bytes */
+#define INFINIBAND_MTU 1500 /* bytes - default value */
+
+#define INFINIBAND_ENC_LEN 4 /* bytes */
+#define INFINIBAND_HDR_LEN \
+ (INFINIBAND_ADDR_LEN + INFINIBAND_ENC_LEN)
+
+#define INFINIBAND_IS_MULTICAST(addr) \
+ ((addr)[4] == 0xff)
+
+#define INFINIBAND_BPF_MTAP(_ifp, _m) \
+do { \
+ if (bpf_peers_present((_ifp)->if_bpf)) { \
+ M_ASSERTVALID(_m); \
+ infiniband_bpf_mtap(_ifp, _m); \
+ } \
+} while (0)
+
+struct infiniband_header {
+ uint8_t ib_hwaddr[INFINIBAND_ADDR_LEN];
+ uint16_t ib_protocol; /* big endian */
+ uint16_t ib_reserved; /* zero */
+} __packed;
+
+struct infiniband_address {
+ uint8_t octet[INFINIBAND_ADDR_LEN];
+} __packed;
+
+#ifdef _KERNEL
+
+#include <sys/_eventhandler.h>
+
+struct ifnet;
+struct mbuf;
+
+extern void infiniband_ifattach(struct ifnet *, const uint8_t *hwaddr, const uint8_t *bcaddr);
+extern void infiniband_ifdetach(struct ifnet *);
+extern void infiniband_bpf_mtap(struct ifnet *, struct mbuf *);
+
+/* new infiniband interface attached event */
+typedef void (*infiniband_ifattach_event_handler_t)(void *, struct ifnet *);
+
+EVENTHANDLER_DECLARE(infiniband_ifattach_event, infiniband_ifattach_event_handler_t);
+
+#endif
+
+#endif /* __INFINIBAND_H__ */
Index: sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h
===================================================================
--- sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h
+++ sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -438,16 +438,7 @@
extern struct workqueue_struct *ipoib_workqueue;
-#define IPOIB_MTAP_PROTO(_ifp, _m, _proto) \
-do { \
- if (bpf_peers_present((_ifp)->if_bpf)) { \
- M_ASSERTVALID(_m); \
- ipoib_mtap_proto((_ifp), (_m), (_proto)); \
- } \
-} while (0)
-
/* functions */
-void ipoib_mtap_proto(struct ifnet *ifp, struct mbuf *mb, uint16_t proto);
void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr);
void ipoib_send_comp_handler(struct ib_cq *cq, void *dev_ptr);
@@ -463,8 +454,6 @@
int ipoib_add_pkey_attr(struct ipoib_dev_priv *priv);
int ipoib_add_umcast_attr(struct ipoib_dev_priv *priv);
-void ipoib_demux(struct ifnet *ifp, struct mbuf *m, u_short proto);
-
void ipoib_send(struct ipoib_dev_priv *priv, struct mbuf *mb,
struct ipoib_ah *address, u32 qpn);
void ipoib_reap_ah(struct work_struct *work);
@@ -540,7 +529,7 @@
void ipoib_dma_unmap_rx(struct ipoib_dev_priv *priv, struct ipoib_rx_buf *rx_req);
void ipoib_dma_mb(struct ipoib_dev_priv *priv, struct mbuf *mb, unsigned int length);
-struct mbuf *ipoib_alloc_map_mb(struct ipoib_dev_priv *priv, struct ipoib_rx_buf *rx_req, int size);
+struct mbuf *ipoib_alloc_map_mb(struct ipoib_dev_priv *priv, struct ipoib_rx_buf *rx_req, int align, int size);
void ipoib_set_ethtool_ops(struct ifnet *dev);
Index: sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c
===================================================================
--- sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -153,7 +153,7 @@
ipoib_cm_alloc_rx_mb(struct ipoib_dev_priv *priv, struct ipoib_cm_rx_buf *rx_req)
{
return ipoib_alloc_map_mb(priv, (struct ipoib_rx_buf *)rx_req,
- priv->cm.max_cm_mtu);
+ sizeof(struct ipoib_pseudoheader), priv->cm.max_cm_mtu);
}
static void ipoib_cm_free_rx_ring(struct ipoib_dev_priv *priv,
@@ -484,10 +484,7 @@
struct mbuf *mb, *newmb;
struct ipoib_cm_rx *p;
int has_srq;
- u_short proto;
- CURVNET_SET_QUIET(dev->if_vnet);
-
ipoib_dbg_data(priv, "cm recv completion: id %d, status: %d\n",
wr_id, wc->status);
@@ -561,16 +558,24 @@
ipoib_dma_mb(priv, mb, wc->byte_len);
- if_inc_counter(dev, IFCOUNTER_IPACKETS, 1);
- if_inc_counter(dev, IFCOUNTER_IBYTES, mb->m_pkthdr.len);
-
mb->m_pkthdr.rcvif = dev;
- proto = *mtod(mb, uint16_t *);
- m_adj(mb, IPOIB_ENCAP_LEN);
- IPOIB_MTAP_PROTO(dev, mb, proto);
- ipoib_demux(dev, mb, ntohs(proto));
+ M_PREPEND(mb, sizeof(struct ipoib_pseudoheader), M_NOWAIT);
+ if (likely(mb != NULL)) {
+ struct ipoib_header *ibh;
+ if_inc_counter(dev, IFCOUNTER_IPACKETS, 1);
+ if_inc_counter(dev, IFCOUNTER_IBYTES, mb->m_pkthdr.len);
+
+ /* fixup destination infiniband address */
+ ibh = mtod(mb, struct ipoib_header *);
+ memset(ibh->hwaddr, 0, 4);
+ memcpy(ibh->hwaddr + 4, priv->local_gid.raw, sizeof(union ib_gid));
+
+ dev->if_input(dev, mb);
+ } else {
+ if_inc_counter(dev, IFCOUNTER_IERRORS, 1);
+ }
repost:
if (has_srq) {
if (unlikely(ipoib_cm_post_receive_srq(priv, wr_id)))
@@ -587,7 +592,6 @@
}
}
done:
- CURVNET_RESTORE();
return;
}
Index: sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c
===================================================================
--- sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -112,17 +112,19 @@
struct mbuf *
ipoib_alloc_map_mb(struct ipoib_dev_priv *priv, struct ipoib_rx_buf *rx_req,
- int size)
+ int align, int size)
{
struct mbuf *mb, *m;
int i, j;
rx_req->mb = NULL;
- mb = m_getm2(NULL, size, M_NOWAIT, MT_DATA, M_PKTHDR);
+ mb = m_getm2(NULL, align + size, M_NOWAIT, MT_DATA, M_PKTHDR);
if (mb == NULL)
return (NULL);
for (i = 0, m = mb; m != NULL; m = m->m_next, i++) {
- m->m_len = M_SIZE(m);
+ m->m_len = M_SIZE(m) - align;
+ m->m_data += align;
+ align = 0;
mb->m_pkthdr.len += m->m_len;
rx_req->mapping[i] = ib_dma_map_single(priv->ca,
mtod(m, void *), m->m_len, DMA_FROM_DEVICE);
@@ -174,7 +176,7 @@
{
return ipoib_alloc_map_mb(priv, &priv->rx_ring[id],
- priv->max_ib_mtu + IB_GRH_BYTES);
+ 0, priv->max_ib_mtu + IB_GRH_BYTES);
}
static int ipoib_ib_post_receives(struct ipoib_dev_priv *priv)
Index: sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c
===================================================================
--- sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -40,21 +40,16 @@
#include "ipoib.h"
#include <sys/eventhandler.h>
-static int ipoib_resolvemulti(struct ifnet *, struct sockaddr **,
- struct sockaddr *);
-
-
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/kernel.h>
#include <linux/vmalloc.h>
-#include <linux/if_arp.h> /* For ARPHRD_xxx */
#include <linux/if_vlan.h>
-#include <net/ip.h>
-#include <net/ipv6.h>
+#include <net/infiniband.h>
+
#include <rdma/ib_cache.h>
MODULE_AUTHOR("Roland Dreier");
@@ -98,19 +93,8 @@
const union ib_gid *gid, const struct sockaddr *addr,
void *client_data);
static void ipoib_start(struct ifnet *dev);
-static int ipoib_output(struct ifnet *ifp, struct mbuf *m,
- const struct sockaddr *dst, struct route *ro);
static int ipoib_ioctl(struct ifnet *ifp, u_long command, caddr_t data);
-static void ipoib_input(struct ifnet *ifp, struct mbuf *m);
-#define IPOIB_MTAP(_ifp, _m) \
-do { \
- if (bpf_peers_present((_ifp)->if_bpf)) { \
- M_ASSERTVALID(_m); \
- ipoib_mtap_mb((_ifp), (_m)); \
- } \
-} while (0)
-
static struct unrhdr *ipoib_unrhdr;
static void
@@ -136,37 +120,6 @@
}
SYSUNINIT(ipoib_unrhdr_uninit, SI_SUB_KLD - 1, SI_ORDER_ANY, ipoib_unrhdr_uninit, NULL);
-/*
- * This is for clients that have an ipoib_header in the mbuf.
- */
-static void
-ipoib_mtap_mb(struct ifnet *ifp, struct mbuf *mb)
-{
- struct ipoib_header *ih;
- struct ether_header eh;
-
- ih = mtod(mb, struct ipoib_header *);
- eh.ether_type = ih->proto;
- bcopy(ih->hwaddr, &eh.ether_dhost, ETHER_ADDR_LEN);
- bzero(&eh.ether_shost, ETHER_ADDR_LEN);
- mb->m_data += sizeof(struct ipoib_header);
- mb->m_len -= sizeof(struct ipoib_header);
- bpf_mtap2(ifp->if_bpf, &eh, sizeof(eh), mb);
- mb->m_data -= sizeof(struct ipoib_header);
- mb->m_len += sizeof(struct ipoib_header);
-}
-
-void
-ipoib_mtap_proto(struct ifnet *ifp, struct mbuf *mb, uint16_t proto)
-{
- struct ether_header eh;
-
- eh.ether_type = proto;
- bzero(&eh.ether_shost, ETHER_ADDR_LEN);
- bzero(&eh.ether_dhost, ETHER_ADDR_LEN);
- bpf_mtap2(ifp->if_bpf, &eh, sizeof(eh), mb);
-}
-
static struct ib_client ipoib_client = {
.name = "ipoib",
.add = ipoib_add_one,
@@ -787,7 +740,7 @@
IFQ_DRV_DEQUEUE(&dev->if_snd, mb);
if (mb == NULL)
break;
- IPOIB_MTAP(dev, mb);
+ INFINIBAND_BPF_MTAP(dev, mb);
ipoib_send_one(priv, mb);
}
}
@@ -875,8 +828,7 @@
dev = priv->dev;
if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
priv->gone = 1;
- bpfdetach(dev);
- if_detach(dev);
+ infiniband_ifdetach(dev);
if_free(dev);
free_unr(ipoib_unrhdr, priv->unit);
} else
@@ -935,7 +887,6 @@
ipoib_intf_alloc(const char *name)
{
struct ipoib_dev_priv *priv;
- struct sockaddr_dl *sdl;
struct ifnet *dev;
priv = ipoib_priv_alloc();
@@ -953,24 +904,17 @@
}
if_initname(dev, name, priv->unit);
dev->if_flags = IFF_BROADCAST | IFF_MULTICAST;
- dev->if_addrlen = INFINIBAND_ALEN;
- dev->if_hdrlen = IPOIB_HEADER_LEN;
- if_attach(dev);
+
+ infiniband_ifattach(dev, NULL, priv->broadcastaddr);
+
dev->if_init = ipoib_init;
dev->if_ioctl = ipoib_ioctl;
dev->if_start = ipoib_start;
- dev->if_output = ipoib_output;
- dev->if_input = ipoib_input;
- dev->if_resolvemulti = ipoib_resolvemulti;
- dev->if_baudrate = IF_Gbps(10);
- dev->if_broadcastaddr = priv->broadcastaddr;
+
dev->if_snd.ifq_maxlen = ipoib_sendq_size * 2;
- sdl = (struct sockaddr_dl *)dev->if_addr->ifa_addr;
- sdl->sdl_type = IFT_INFINIBAND;
- sdl->sdl_alen = dev->if_addrlen;
+
priv->dev = dev;
if_link_state_change(dev, LINK_STATE_DOWN);
- bpfattach(dev, DLT_EN10MB, ETHER_HDR_LEN);
return dev->if_softc;
}
@@ -1165,7 +1109,6 @@
struct ifaddr *ifa;
int retval = 0;
- CURVNET_SET(dev->if_vnet);
NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifa, &dev->if_addrhead, ifa_link) {
if (ifa->ifa_addr == NULL ||
@@ -1179,7 +1122,6 @@
}
}
NET_EPOCH_EXIT(et);
- CURVNET_RESTORE();
return (retval);
}
@@ -1475,286 +1417,6 @@
ib_sa_unregister_client(&ipoib_sa_client);
destroy_workqueue(ipoib_workqueue);
}
-
-/*
- * Infiniband output routine.
- */
-static int
-ipoib_output(struct ifnet *ifp, struct mbuf *m,
- const struct sockaddr *dst, struct route *ro)
-{
- u_char edst[INFINIBAND_ALEN];
-#if defined(INET) || defined(INET6)
- struct llentry *lle = NULL;
-#endif
- struct ipoib_header *eh;
- int error = 0, is_gw = 0;
- short type;
-
- NET_EPOCH_ASSERT();
-
- if (ro != NULL)
- is_gw = (ro->ro_flags & RT_HAS_GW) != 0;
-#ifdef MAC
- error = mac_ifnet_check_transmit(ifp, m);
- if (error)
- goto bad;
-#endif
-
- M_PROFILE(m);
- if (ifp->if_flags & IFF_MONITOR) {
- error = ENETDOWN;
- goto bad;
- }
- if (!((ifp->if_flags & IFF_UP) &&
- (ifp->if_drv_flags & IFF_DRV_RUNNING))) {
- error = ENETDOWN;
- goto bad;
- }
-
- switch (dst->sa_family) {
-#ifdef INET
- case AF_INET:
- if (lle != NULL && (lle->la_flags & LLE_VALID))
- memcpy(edst, lle->ll_addr, sizeof(edst));
- else if (m->m_flags & M_MCAST)
- ip_ib_mc_map(((struct sockaddr_in *)dst)->sin_addr.s_addr, ifp->if_broadcastaddr, edst);
- else
- error = arpresolve(ifp, is_gw, m, dst, edst, NULL, NULL);
- if (error)
- return (error == EWOULDBLOCK ? 0 : error);
- type = htons(ETHERTYPE_IP);
- break;
- case AF_ARP:
- {
- struct arphdr *ah;
- ah = mtod(m, struct arphdr *);
- ah->ar_hrd = htons(ARPHRD_INFINIBAND);
-
- switch(ntohs(ah->ar_op)) {
- case ARPOP_REVREQUEST:
- case ARPOP_REVREPLY:
- type = htons(ETHERTYPE_REVARP);
- break;
- case ARPOP_REQUEST:
- case ARPOP_REPLY:
- default:
- type = htons(ETHERTYPE_ARP);
- break;
- }
-
- if (m->m_flags & M_BCAST)
- bcopy(ifp->if_broadcastaddr, edst, INFINIBAND_ALEN);
- else
- bcopy(ar_tha(ah), edst, INFINIBAND_ALEN);
-
- }
- break;
-#endif
-#ifdef INET6
- case AF_INET6:
- if (lle != NULL && (lle->la_flags & LLE_VALID))
- memcpy(edst, lle->ll_addr, sizeof(edst));
- else if (m->m_flags & M_MCAST)
- ipv6_ib_mc_map(&((struct sockaddr_in6 *)dst)->sin6_addr, ifp->if_broadcastaddr, edst);
- else
- error = nd6_resolve(ifp, is_gw, m, dst, edst, NULL, NULL);
- if (error)
- return error;
- type = htons(ETHERTYPE_IPV6);
- break;
-#endif
-
- default:
- if_printf(ifp, "can't handle af%d\n", dst->sa_family);
- error = EAFNOSUPPORT;
- goto bad;
- }
-
- /*
- * Add local net header. If no space in first mbuf,
- * allocate another.
- */
- M_PREPEND(m, IPOIB_HEADER_LEN, M_NOWAIT);
- if (m == NULL) {
- error = ENOBUFS;
- goto bad;
- }
- eh = mtod(m, struct ipoib_header *);
- (void)memcpy(&eh->proto, &type, sizeof(eh->proto));
- (void)memcpy(&eh->hwaddr, edst, sizeof (edst));
-
- /*
- * Queue message on interface, update output statistics if
- * successful, and start output if interface not yet active.
- */
- return ((ifp->if_transmit)(ifp, m));
-bad:
- if (m != NULL)
- m_freem(m);
- return (error);
-}
-
-/*
- * Upper layer processing for a received Infiniband packet.
- */
-void
-ipoib_demux(struct ifnet *ifp, struct mbuf *m, u_short proto)
-{
- struct epoch_tracker et;
- int isr;
-
-#ifdef MAC
- /*
- * Tag the mbuf with an appropriate MAC label before any other
- * consumers can get to it.
- */
- mac_ifnet_create_mbuf(ifp, m);
-#endif
- /* Allow monitor mode to claim this frame, after stats are updated. */
- if (ifp->if_flags & IFF_MONITOR) {
- if_printf(ifp, "discard frame at IFF_MONITOR\n");
- m_freem(m);
- return;
- }
- /* Direct packet to correct FIB based on interface config */
- M_SETFIB(m, ifp->if_fib);
- /*
- * Dispatch frame to upper layer.
- */
- switch (proto) {
-#ifdef INET
- case ETHERTYPE_IP:
- isr = NETISR_IP;
- break;
-
- case ETHERTYPE_ARP:
- if (ifp->if_flags & IFF_NOARP) {
- /* Discard packet if ARP is disabled on interface */
- m_freem(m);
- return;
- }
- isr = NETISR_ARP;
- break;
-#endif
-#ifdef INET6
- case ETHERTYPE_IPV6:
- isr = NETISR_IPV6;
- break;
-#endif
- default:
- goto discard;
- }
- NET_EPOCH_ENTER(et);
- netisr_dispatch(isr, m);
- NET_EPOCH_EXIT(et);
- return;
-
-discard:
- m_freem(m);
-}
-
-/*
- * Process a received Infiniband packet.
- */
-static void
-ipoib_input(struct ifnet *ifp, struct mbuf *m)
-{
- struct ipoib_header *eh;
-
- if ((ifp->if_flags & IFF_UP) == 0) {
- m_freem(m);
- return;
- }
- CURVNET_SET_QUIET(ifp->if_vnet);
-
- /* Let BPF have it before we strip the header. */
- IPOIB_MTAP(ifp, m);
- eh = mtod(m, struct ipoib_header *);
- /*
- * Reset layer specific mbuf flags to avoid confusing upper layers.
- * Strip off Infiniband header.
- */
- m->m_flags &= ~M_VLANTAG;
- m_clrprotoflags(m);
- m_adj(m, IPOIB_HEADER_LEN);
-
- if (IPOIB_IS_MULTICAST(eh->hwaddr)) {
- if (memcmp(eh->hwaddr, ifp->if_broadcastaddr,
- ifp->if_addrlen) == 0)
- m->m_flags |= M_BCAST;
- else
- m->m_flags |= M_MCAST;
- if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
- }
-
- ipoib_demux(ifp, m, ntohs(eh->proto));
- CURVNET_RESTORE();
-}
-
-static int
-ipoib_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
- struct sockaddr *sa)
-{
- struct sockaddr_dl *sdl;
-#ifdef INET
- struct sockaddr_in *sin;
-#endif
-#ifdef INET6
- struct sockaddr_in6 *sin6;
-#endif
- u_char *e_addr;
-
- switch(sa->sa_family) {
- case AF_LINK:
- /*
- * No mapping needed. Just check that it's a valid MC address.
- */
- sdl = (struct sockaddr_dl *)sa;
- e_addr = LLADDR(sdl);
- if (!IPOIB_IS_MULTICAST(e_addr))
- return EADDRNOTAVAIL;
- *llsa = NULL;
- return 0;
-
-#ifdef INET
- case AF_INET:
- sin = (struct sockaddr_in *)sa;
- if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
- return EADDRNOTAVAIL;
- sdl = link_init_sdl(ifp, *llsa, IFT_INFINIBAND);
- sdl->sdl_alen = INFINIBAND_ALEN;
- e_addr = LLADDR(sdl);
- ip_ib_mc_map(sin->sin_addr.s_addr, ifp->if_broadcastaddr,
- e_addr);
- *llsa = (struct sockaddr *)sdl;
- return 0;
-#endif
-#ifdef INET6
- case AF_INET6:
- sin6 = (struct sockaddr_in6 *)sa;
- /*
- * An IP6 address of 0 means listen to all
- * of the multicast address used for IP6.
- * This has no meaning in ipoib.
- */
- if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
- return EADDRNOTAVAIL;
- if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
- return EADDRNOTAVAIL;
- sdl = link_init_sdl(ifp, *llsa, IFT_INFINIBAND);
- sdl->sdl_alen = INFINIBAND_ALEN;
- e_addr = LLADDR(sdl);
- ipv6_ib_mc_map(&sin6->sin6_addr, ifp->if_broadcastaddr, e_addr);
- *llsa = (struct sockaddr *)sdl;
- return 0;
-#endif
-
- default:
- return EAFNOSUPPORT;
- }
-}
-
module_init_order(ipoib_init_module, SI_ORDER_FIFTH);
module_exit_order(ipoib_cleanup_module, SI_ORDER_FIFTH);
@@ -1771,4 +1433,5 @@
DECLARE_MODULE(ipoib, ipoib_mod, SI_SUB_LAST, SI_ORDER_ANY);
MODULE_DEPEND(ipoib, ibcore, 1, 1, 1);
+MODULE_DEPEND(ipoib, if_infiniband, 1, 1, 1);
MODULE_DEPEND(ipoib, linuxkpi, 1, 1, 1);
Index: sys/sys/mbuf.h
===================================================================
--- sys/sys/mbuf.h
+++ sys/sys/mbuf.h
@@ -1455,14 +1455,16 @@
((_m)->m_pkthdr.fibnum) = (_fib); \
} while (0)
-/* flags passed as first argument for "m_ether_tcpip_hash()" */
+/* flags passed as first argument for "m_xxx_tcpip_hash()" */
#define MBUF_HASHFLAG_L2 (1 << 2)
#define MBUF_HASHFLAG_L3 (1 << 3)
#define MBUF_HASHFLAG_L4 (1 << 4)
/* mbuf hashing helper routines */
uint32_t m_ether_tcpip_hash_init(void);
-uint32_t m_ether_tcpip_hash(const uint32_t, const struct mbuf *, const uint32_t);
+uint32_t m_ether_tcpip_hash(const uint32_t, const struct mbuf *, uint32_t);
+uint32_t m_infiniband_tcpip_hash_init(void);
+uint32_t m_infiniband_tcpip_hash(const uint32_t, const struct mbuf *, uint32_t);
#ifdef MBUF_PROFILING
void m_profile(struct mbuf *m);

File Metadata

Mime Type
text/plain
Expires
Sun, Feb 22, 11:58 PM (9 h, 11 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
28946414
Default Alt Text
D26254.id78536.diff (57 KB)

Event Timeline