Index: sbin/ifconfig/ifconfig.8 =================================================================== --- sbin/ifconfig/ifconfig.8 +++ sbin/ifconfig/ifconfig.8 @@ -2497,6 +2497,10 @@ Set a shift parameter for RSS local hash computation. Hash is calculated by using flowid bits in a packet header mbuf which are shifted by the number of this parameter. +.It Cm use_numa +Enable selection of egress ports based on the native NUMA domain for the packets being transmitted. This is currently only implemented for lacp mode. This works only on NUMA hardware, running a kernel compiled with the NUMA option, and when interfaces from multiple NUMA domains are ports of the aggregation interface. +.It Cm -use_numa +Disable selection of egress ports based on the native NUMA domain for the packets being transmitted. .It Cm lacp_fast_timeout Enable lacp fast-timeout on the interface. .It Cm -lacp_fast_timeout Index: sbin/ifconfig/iflagg.c =================================================================== --- sbin/ifconfig/iflagg.c +++ sbin/ifconfig/iflagg.c @@ -130,6 +130,8 @@ switch (ro.ro_opts) { case LAGG_OPT_USE_FLOWID: case -LAGG_OPT_USE_FLOWID: + case LAGG_OPT_USE_NUMA: + case -LAGG_OPT_USE_NUMA: case LAGG_OPT_LACP_STRICT: case -LAGG_OPT_LACP_STRICT: case LAGG_OPT_LACP_TXTEST: @@ -303,6 +305,8 @@ DEF_CMD_ARG("lagghash", setlagghash), DEF_CMD("use_flowid", LAGG_OPT_USE_FLOWID, setlaggsetopt), DEF_CMD("-use_flowid", -LAGG_OPT_USE_FLOWID, setlaggsetopt), + DEF_CMD("use_numa", LAGG_OPT_USE_NUMA, setlaggsetopt), + DEF_CMD("-use_numa", -LAGG_OPT_USE_NUMA, setlaggsetopt), DEF_CMD("lacp_strict", LAGG_OPT_LACP_STRICT, setlaggsetopt), DEF_CMD("-lacp_strict", -LAGG_OPT_LACP_STRICT, setlaggsetopt), DEF_CMD("lacp_txtest", LAGG_OPT_LACP_TXTEST, setlaggsetopt), Index: sys/net/ieee8023ad_lacp.h =================================================================== --- sys/net/ieee8023ad_lacp.h +++ sys/net/ieee8023ad_lacp.h @@ -197,8 +197,15 @@ #define LACP_MAX_PORTS 32 +struct lacp_numa { + int count; + struct lacp_port *map[LACP_MAX_PORTS]; +}; + struct lacp_portmap { int pm_count; + int pm_num_dom; + struct lacp_numa pm_numa[MAXMEMDOM]; struct lacp_port *pm_map[LACP_MAX_PORTS]; }; Index: sys/net/ieee8023ad_lacp.c =================================================================== --- sys/net/ieee8023ad_lacp.c +++ sys/net/ieee8023ad_lacp.c @@ -835,7 +835,9 @@ struct lacp_softc *lsc = LACP_SOFTC(sc); struct lacp_portmap *pm; struct lacp_port *lp; + struct lacp_port **map; uint32_t hash; + int count; if (__predict_false(lsc->lsc_suppress_distributing)) { LACP_DPRINTF((NULL, "%s: waiting transit\n", __func__)); @@ -848,13 +850,32 @@ return (NULL); } +#ifdef NUMA + if ((sc->sc_opts & LAGG_OPT_USE_NUMA) && + pm->pm_num_dom > 1 && m->m_pkthdr.numa_domain < MAXMEMDOM) { + count = pm->pm_numa[m->m_pkthdr.numa_domain].count; + if (count > 0) { + map = pm->pm_numa[m->m_pkthdr.numa_domain].map; + } else { + /* No ports on this domain; use global hash. */ + map = pm->pm_map; + count = pm->pm_count; + } + } else { +#endif + map = pm->pm_map; + count = pm->pm_count; +#ifdef NUMA + } +#endif if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) && M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) hash = m->m_pkthdr.flowid >> sc->flowid_shift; else hash = m_ether_tcpip_hash(sc->sc_flags, m, lsc->lsc_hashkey); - hash %= pm->pm_count; - lp = pm->pm_map[hash]; + + hash %= count; + lp = map[hash]; KASSERT((lp->lp_state & LACP_STATE_DISTRIBUTING) != 0, ("aggregated port is not distributing")); @@ -1044,6 +1065,10 @@ uint64_t speed; u_int newmap; int i; +#ifdef NUMA + int count; + uint8_t domain; +#endif newmap = lsc->lsc_activemap == 0 ? 1 : 0; p = &lsc->lsc_pmap[newmap]; @@ -1054,9 +1079,25 @@ if (la != NULL && la->la_nports > 0) { p->pm_count = la->la_nports; i = 0; - TAILQ_FOREACH(lp, &la->la_ports, lp_dist_q) + TAILQ_FOREACH(lp, &la->la_ports, lp_dist_q) { p->pm_map[i++] = lp; +#ifdef NUMA + domain = lp->lp_ifp->if_numa_domain; + if (domain >= MAXMEMDOM) + continue; + count = p->pm_numa[domain].count; + p->pm_numa[domain].map[count] = lp; + p->pm_numa[domain].count++; +#endif + } KASSERT(i == p->pm_count, ("Invalid port count")); + +#ifdef NUMA + for (i = 0; i < MAXMEMDOM; i++) { + if (p->pm_numa[i].count != 0) + p->pm_num_dom++; + } +#endif speed = lacp_aggregator_bandwidth(la); } sc->sc_ifp->if_baudrate = speed; Index: sys/net/if_lagg.h =================================================================== --- sys/net/if_lagg.h +++ sys/net/if_lagg.h @@ -143,6 +143,7 @@ #define LAGG_OPT_USE_FLOWID 0x01 /* enable use of flowid */ /* Pseudo flags which are used in ro_opts but not stored into sc_opts. */ #define LAGG_OPT_FLOWIDSHIFT 0x02 /* set flowid shift */ +#define LAGG_OPT_USE_NUMA 0x04 /* enable use of numa */ #define LAGG_OPT_FLOWIDSHIFT_MASK 0x1f /* flowid is uint32_t */ #define LAGG_OPT_LACP_STRICT 0x10 /* LACP strict mode */ #define LAGG_OPT_LACP_TXTEST 0x20 /* LACP debug: txtest */ Index: sys/net/if_lagg.c =================================================================== --- sys/net/if_lagg.c +++ sys/net/if_lagg.c @@ -264,6 +264,13 @@ &VNET_NAME(def_use_flowid), 0, "Default setting for using flow id for load sharing"); +/* Default value for using numa */ +VNET_DEFINE_STATIC(int, def_use_numa) = 1; +#define V_def_use_numa VNET(def_use_numa) +SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_numa, CTLFLAG_RWTUN, + &VNET_NAME(def_use_numa), 1, + "Use numa to steer flows"); + /* Default value for flowid shift */ VNET_DEFINE_STATIC(int, def_flowid_shift) = 16; #define V_def_flowid_shift VNET(def_flowid_shift) @@ -491,6 +498,8 @@ LAGG_XLOCK(sc); if (V_def_use_flowid) sc->sc_opts |= LAGG_OPT_USE_FLOWID; + if (V_def_use_numa) + sc->sc_opts |= LAGG_OPT_USE_NUMA; sc->flowid_shift = V_def_flowid_shift; /* Hash all layers by default */ @@ -1247,6 +1256,8 @@ switch (ro->ro_opts) { case LAGG_OPT_USE_FLOWID: case -LAGG_OPT_USE_FLOWID: + case LAGG_OPT_USE_NUMA: + case -LAGG_OPT_USE_NUMA: case LAGG_OPT_FLOWIDSHIFT: valid = 1; lacp = 0;