Index: sys/net/if.c =================================================================== --- sys/net/if.c +++ sys/net/if.c @@ -78,6 +78,7 @@ #include #include #include +#include #include #include #include @@ -2271,7 +2272,111 @@ int (*vlan_setcookie_p)(struct ifnet *, void *); void *(*vlan_cookie_p)(struct ifnet *); +static bool +if_is_lagg(if_t ifp) +{ + + return (ifp->if_dname != NULL && strcmp(ifp->if_dname, "lagg") == 0); +} + /* + * Figure out if a network device is a child of another network + * device, typically for use with LAGG and VLAN network interfaces. + * Returns true if successful, else false. + */ +bool +if_is_child_of(if_t child, if_t parent) +{ + struct lagg_softc *lagg_sc; + struct lagg_port *lagg_lp; + if_t trunk; + + NET_EPOCH_ASSERT(); + + if (child == NULL || + parent == NULL || + child == parent) + return (false); + + switch (child->if_type) { + case IFT_L2VLAN: + trunk = VLAN_TRUNKDEV(child); + if (trunk == parent || + if_is_child_of(trunk, parent)) + return (true); + break; + case IFT_ETHER: + if (if_is_lagg(child) == false) + break; + lagg_sc = child->if_softc; + if (lagg_sc == NULL) + break; + CK_SLIST_FOREACH(lagg_lp, &lagg_sc->sc_ports, lp_entries) { + trunk = lagg_lp->lp_ifp; + if (trunk == parent || + if_is_child_of(trunk, parent)) + return (true); + } + break; + default: + break; + } + return (false); /* no match */ +} + +/* + * Figure out the active trunk device given any network device. + * Typically for use with LAGG and VLAN network interfaces. + * Returns NULL if trunk is not active. + */ +if_t +if_get_active_trunk(if_t ifp) +{ + struct lagg_softc *lagg_sc; + struct lagg_port *lagg_lp; + + NET_EPOCH_ASSERT(); +retry: + /* check if network interface is down */ + if (ifp->if_link_state != LINK_STATE_UP || + (ifp->if_flags & IFF_UP) == 0) { + ifp = NULL; + } else switch (ifp->if_type) { + case IFT_L2VLAN: + ifp = VLAN_TRUNKDEV(ifp); + if (ifp != NULL) + goto retry; + break; + case IFT_ETHER: + if (if_is_lagg(ifp) == false) + break; + lagg_sc = ifp->if_softc; + if (lagg_sc == NULL || lagg_sc->sc_proto != LAGG_PROTO_FAILOVER) { + ifp = NULL; + break; + } + lagg_lp = lagg_sc->sc_primary; + if (lagg_lp == NULL || LAGG_PORTACTIVE(lagg_lp) == 0) { + CK_SLIST_FOREACH(lagg_lp, &lagg_sc->sc_ports, lp_entries) { + if (LAGG_PORTACTIVE(lagg_lp)) + break; + } + if (lagg_lp == NULL) { + ifp = NULL; + break; + } + } + ifp = lagg_lp->lp_ifp; + if (ifp != NULL) + goto retry; + break; + default: + break; + } + return (ifp); +} + +/* * Handle a change in the interface link state. To avoid LORs * between driver lock and upper layer locks, as well as possible * recursions, we post event to taskqueue, and all job Index: sys/net/if_lagg.c =================================================================== --- sys/net/if_lagg.c +++ sys/net/if_lagg.c @@ -605,6 +605,9 @@ LAGG_SX_DESTROY(sc); free(sc, M_LAGG); + + /* notify kernel about lagg port changes */ + EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_LAGG_PORT); } static void @@ -1480,6 +1483,9 @@ } VLAN_CAPABILITIES(ifp); + + /* notify kernel about new lagg port */ + EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_LAGG_PORT); break; case SIOCSLAGGDELPORT: error = priv_check(td, PRIV_NET_LAGG); @@ -1504,6 +1510,9 @@ LAGG_XUNLOCK(sc); if_rele(tpif); VLAN_CAPABILITIES(ifp); + + /* notify kernel about deleted lagg port */ + EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_LAGG_PORT); break; case SIOCSIFFLAGS: /* Set flags on ports too */ Index: sys/net/if_var.h =================================================================== --- sys/net/if_var.h +++ sys/net/if_var.h @@ -471,6 +471,7 @@ #define IFNET_EVENT_UP 0 #define IFNET_EVENT_DOWN 1 #define IFNET_EVENT_PCP 2 /* priority code point, PCP */ +#define IFNET_EVENT_LAGG_PORT 3 /* lagg port modified */ typedef void (*ifnet_event_fn)(void *, struct ifnet *ifp, int event); EVENTHANDLER_DECLARE(ifnet_event, ifnet_event_fn); @@ -750,6 +751,8 @@ void if_bpfmtap(if_t ifp, struct mbuf *m); void if_etherbpfmtap(if_t ifp, struct mbuf *m); void if_vlancap(if_t ifp); +bool if_is_child_of(if_t child, if_t parent); +if_t if_get_active_trunk(if_t ifp); /* * Traversing through interface address lists. Index: sys/ofed/drivers/infiniband/core/ib_cache.c =================================================================== --- sys/ofed/drivers/infiniband/core/ib_cache.c +++ sys/ofed/drivers/infiniband/core/ib_cache.c @@ -70,6 +70,7 @@ GID_ATTR_FIND_MASK_NETDEV = 1UL << 1, GID_ATTR_FIND_MASK_DEFAULT = 1UL << 2, GID_ATTR_FIND_MASK_GID_TYPE = 1UL << 3, + GID_ATTR_FIND_MASK_ACTIVE_CHECK = 1UL << 4, }; enum gid_table_entry_props { @@ -259,12 +260,14 @@ /* rwlock should be read locked */ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid, const struct ib_gid_attr *val, bool default_gid, - unsigned long mask, int *pempty) + unsigned long mask, int *pempty, struct net_device *idev) { + struct epoch_tracker et; int i = 0; int found = -1; int empty = pempty ? -1 : 0; + NET_EPOCH_ENTER(et); while (i < table->sz && (found < 0 || empty < 0)) { struct ib_gid_table_entry *data = &table->data_vec[i]; struct ib_gid_attr *attr = &data->attr; @@ -301,8 +304,12 @@ default_gid) continue; + if (idev != NULL && if_get_active_trunk(attr->ndev) != idev) + continue; + found = curr_index; } + NET_EPOCH_EXIT(et); if (pempty) *pempty = empty; @@ -349,7 +356,7 @@ ix = find_gid(table, gid, attr, false, GID_ATTR_FIND_MASK_GID | GID_ATTR_FIND_MASK_GID_TYPE | - GID_ATTR_FIND_MASK_NETDEV, &empty); + GID_ATTR_FIND_MASK_NETDEV, &empty, NULL); if (ix >= 0) goto out_unlock; @@ -385,7 +392,7 @@ GID_ATTR_FIND_MASK_GID_TYPE | GID_ATTR_FIND_MASK_NETDEV | GID_ATTR_FIND_MASK_DEFAULT, - NULL); + NULL, NULL); if (ix < 0) goto out_unlock; @@ -469,9 +476,18 @@ unsigned long flags; for (p = 0; p < ib_dev->phys_port_cnt; p++) { + struct net_device *idev; + + if ((mask & GID_ATTR_FIND_MASK_ACTIVE_CHECK) != 0 && + ib_dev->get_netdev != NULL) { + idev = ib_dev->get_netdev( + ib_dev, p + rdma_start_port(ib_dev)); + } else { + idev = NULL; + } table = ports_table[p]; read_lock_irqsave(&table->rwlock, flags); - local_index = find_gid(table, gid, val, false, mask, NULL); + local_index = find_gid(table, gid, val, false, mask, NULL, idev); if (local_index >= 0) { if (index) *index = local_index; @@ -486,11 +502,12 @@ return -ENOENT; } -static int ib_cache_gid_find(struct ib_device *ib_dev, - const union ib_gid *gid, - enum ib_gid_type gid_type, - struct net_device *ndev, u8 *port, - u16 *index) +int ib_find_cached_gid(struct ib_device *ib_dev, + const union ib_gid *gid, + enum ib_gid_type gid_type, + struct net_device *ndev, u8 *port, + u16 *index, + bool active_check) { unsigned long mask = GID_ATTR_FIND_MASK_GID | GID_ATTR_FIND_MASK_GID_TYPE; @@ -498,10 +515,13 @@ if (ndev) mask |= GID_ATTR_FIND_MASK_NETDEV; + if (active_check) + mask |= GID_ATTR_FIND_MASK_ACTIVE_CHECK; return _ib_cache_gid_table_find(ib_dev, gid, &gid_attr_val, mask, port, index); } +EXPORT_SYMBOL(ib_find_cached_gid); int ib_find_cached_gid_by_port(struct ib_device *ib_dev, const union ib_gid *gid, @@ -527,7 +547,7 @@ mask |= GID_ATTR_FIND_MASK_NETDEV; read_lock_irqsave(&table->rwlock, flags); - local_index = find_gid(table, gid, &val, false, mask, NULL); + local_index = find_gid(table, gid, &val, false, mask, NULL, NULL); if (local_index >= 0) { if (index) *index = local_index; @@ -713,7 +733,7 @@ ix = find_gid(table, NULL, &gid_attr, true, GID_ATTR_FIND_MASK_GID_TYPE | GID_ATTR_FIND_MASK_DEFAULT, - NULL); + NULL, NULL); /* Coudn't find default GID location */ if (WARN_ON(ix < 0)) @@ -895,17 +915,6 @@ } EXPORT_SYMBOL(ib_get_cached_gid); -int ib_find_cached_gid(struct ib_device *device, - const union ib_gid *gid, - enum ib_gid_type gid_type, - struct net_device *ndev, - u8 *port_num, - u16 *index) -{ - return ib_cache_gid_find(device, gid, gid_type, ndev, port_num, index); -} -EXPORT_SYMBOL(ib_find_cached_gid); - int ib_find_gid_by_filter(struct ib_device *device, const union ib_gid *gid, u8 port_num, Index: sys/ofed/drivers/infiniband/core/ib_cm.c =================================================================== --- sys/ofed/drivers/infiniband/core/ib_cm.c +++ sys/ofed/drivers/infiniband/core/ib_cm.c @@ -421,7 +421,8 @@ } static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av, - struct cm_id_private *cm_id_priv) + struct cm_id_private *cm_id_priv, + bool active_check) { struct cm_device *cm_dev; struct cm_port *port = NULL; @@ -433,7 +434,8 @@ read_lock_irqsave(&cm.device_lock, flags); list_for_each_entry(cm_dev, &cm.device_list, list) { if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid, - path->gid_type, ndev, &p, NULL)) { + path->gid_type, ndev, &p, NULL, + active_check)) { port = cm_dev->port[p-1]; break; } @@ -1286,12 +1288,12 @@ } ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av, - cm_id_priv); + cm_id_priv, true); if (ret) goto error1; if (param->alternate_path) { ret = cm_init_av_by_path(param->alternate_path, - &cm_id_priv->alt_av, cm_id_priv); + &cm_id_priv->alt_av, cm_id_priv, true); if (ret) goto error1; } @@ -1743,7 +1745,7 @@ } work->path[0].gid_type = gid_attr.gid_type; ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av, - cm_id_priv); + cm_id_priv, false); } if (ret) { int err = ib_get_cached_gid(work->port->cm_dev->ib_device, @@ -1763,7 +1765,7 @@ } if (req_msg->alt_local_lid) { ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av, - cm_id_priv); + cm_id_priv, false); if (ret) { ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID, &work->path[0].sgid, @@ -2821,7 +2823,7 @@ } ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av, - cm_id_priv); + cm_id_priv, true); if (ret) goto out; cm_id_priv->alt_av.timeout = @@ -2938,7 +2940,7 @@ if (ret) goto unlock; ret = cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av, - cm_id_priv); + cm_id_priv, false); if (ret) goto unlock; ret = atomic_inc_and_test(&cm_id_priv->work_count); @@ -3132,7 +3134,7 @@ return -EINVAL; cm_id_priv = container_of(cm_id, struct cm_id_private, id); - ret = cm_init_av_by_path(param->path, &cm_id_priv->av, cm_id_priv); + ret = cm_init_av_by_path(param->path, &cm_id_priv->av, cm_id_priv, true); if (ret) goto out; Index: sys/ofed/drivers/infiniband/core/ib_roce_gid_mgmt.c =================================================================== --- sys/ofed/drivers/infiniband/core/ib_roce_gid_mgmt.c +++ sys/ofed/drivers/infiniband/core/ib_roce_gid_mgmt.c @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 * - * Copyright (c) 2015-2017, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2015-2020, Mellanox Technologies inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -122,9 +122,17 @@ struct net_device *idev, void *cookie) { struct net_device *ndev = (struct net_device *)cookie; + struct epoch_tracker et; + int retval; + if (idev == NULL) return (0); - return (ndev == idev); + + NET_EPOCH_ENTER(et); + retval = (ndev == idev || if_is_child_of(ndev, idev) == true); + NET_EPOCH_EXIT(et); + + return (retval); } static int @@ -169,6 +177,7 @@ struct ifaddr *ifa; #endif VNET_ITERATOR_DECL(vnet_iter); + struct epoch_tracker et; struct ib_gid_attr gid_attr; union ib_gid gid; int default_gids; @@ -186,19 +195,15 @@ VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); IFNET_RLOCK(); + NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(idev, &V_ifnet, if_link) { - struct epoch_tracker et; - if (idev != ndev) { - if (idev->if_type != IFT_L2VLAN) - continue; - if (ndev != rdma_vlan_dev_real_dev(idev)) - continue; - } - - /* clone address information for IPv4 and IPv6 */ - NET_EPOCH_ENTER(et); + /* check if network device does not belong to us */ + if (idev != ndev && + if_is_child_of(idev, ndev) == false) + continue; #if defined(INET) + /* clone address information for IPv4 */ CK_STAILQ_FOREACH(ifa, &idev->if_addrhead, ifa_link) { if (ifa->ifa_addr == NULL || ifa->ifa_addr->sa_family != AF_INET) @@ -215,6 +220,7 @@ } #endif #if defined(INET6) + /* clone address information for IPv6 */ CK_STAILQ_FOREACH(ifa, &idev->if_addrhead, ifa_link) { if (ifa->ifa_addr == NULL || ifa->ifa_addr->sa_family != AF_INET6) @@ -235,8 +241,8 @@ STAILQ_INSERT_TAIL(&ipx_head, entry, entry); } #endif - NET_EPOCH_EXIT(et); } + NET_EPOCH_EXIT(et); IFNET_RUNLOCK(); CURVNET_RESTORE(); } @@ -319,15 +325,12 @@ { struct roce_netdev_event_work *work; -retry: + /* filter out network device types early on */ switch (ndev->if_type) { case IFT_ETHER: + case IFT_L2VLAN: + case IFT_IEEE8023ADLAG: break; - case IFT_L2VLAN: - ndev = rdma_vlan_dev_real_dev(ndev); - if (ndev != NULL) - goto retry; - /* FALLTHROUGH */ default: return; } @@ -406,12 +409,21 @@ static void roce_ifnet_event(void *arg, struct ifnet *ifp, int event) { - if (event != IFNET_EVENT_PCP || is_vlan_dev(ifp)) - return; - - /* make sure GID table is reloaded */ - roce_gid_delete_all_event(ifp); - roce_gid_queue_scan_event(ifp); + switch (event) { + case IFNET_EVENT_PCP: + if (is_vlan_dev(ifp)) + break; + /* make sure GID table is reloaded */ + roce_gid_delete_all_event(ifp); + roce_gid_queue_scan_event(ifp); + break; + case IFNET_EVENT_LAGG_PORT: + /* make sure GID table is updated */ + roce_gid_queue_scan_event(ifp); + break; + default: + break; + } } static void Index: sys/ofed/drivers/infiniband/core/ib_sa_query.c =================================================================== --- sys/ofed/drivers/infiniband/core/ib_sa_query.c +++ sys/ofed/drivers/infiniband/core/ib_sa_query.c @@ -663,6 +663,7 @@ use_roce = rdma_cap_eth_ah(device, port_num); if (use_roce) { + struct epoch_tracker et; struct net_device *idev; struct net_device *resolved_dev; struct rdma_dev_addr dev_addr = {.bound_dev_if = rec->ifindex, @@ -702,10 +703,16 @@ return -ENODEV; } ndev = ib_get_ndev_from_path(rec); - if ((ndev && ndev != resolved_dev) || + /* + * Check if path has incorrect net device or if the + * net device is not a child of the parent device: + */ + NET_EPOCH_ENTER(et); + if ((ndev != NULL && ndev != resolved_dev) || (resolved_dev != idev && - rdma_vlan_dev_real_dev(resolved_dev) != idev)) + if_is_child_of(resolved_dev, idev) == false)) ret = -EHOSTUNREACH; + NET_EPOCH_EXIT(et); dev_put(idev); dev_put(resolved_dev); if (ret) { Index: sys/ofed/include/rdma/ib_addr.h =================================================================== --- sys/ofed/include/rdma/ib_addr.h +++ sys/ofed/include/rdma/ib_addr.h @@ -352,15 +352,4 @@ return vid < 0x1000 ? vid : 0xffff; } -static inline struct net_device *rdma_vlan_dev_real_dev(struct net_device *dev) -{ - struct epoch_tracker et; - - NET_EPOCH_ENTER(et); - if (dev->if_type != IFT_ETHER || dev->if_pcp == IFNET_PCP_NONE) - dev = VLAN_TRUNKDEV(dev); /* non prio-tagged traffic */ - NET_EPOCH_EXIT(et); - return (dev); -} - #endif /* IB_ADDR_H */ Index: sys/ofed/include/rdma/ib_cache.h =================================================================== --- sys/ofed/include/rdma/ib_cache.h +++ sys/ofed/include/rdma/ib_cache.h @@ -69,6 +69,7 @@ * @port_num: The port number of the device where the GID value was found. * @index: The index into the cached GID table where the GID was found. This * parameter may be NULL. + * @active_check: Set if net device should be checked for active state. * * ib_find_cached_gid() searches for the specified GID value in * the local software cache. @@ -78,7 +79,8 @@ enum ib_gid_type gid_type, struct net_device *ndev, u8 *port_num, - u16 *index); + u16 *index, + bool active_check); /** * ib_find_cached_gid_by_port - Returns the GID table index where a specified