Index: head/sbin/ifconfig/ifconfig.8 =================================================================== --- head/sbin/ifconfig/ifconfig.8 +++ head/sbin/ifconfig/ifconfig.8 @@ -28,7 +28,7 @@ .\" From: @(#)ifconfig.8 8.3 (Berkeley) 1/5/94 .\" $FreeBSD$ .\" -.Dd June 27, 2018 +.Dd May 3, 2019 .Dt IFCONFIG 8 .Os .Sh NAME @@ -2497,6 +2497,22 @@ Set a shift parameter for RSS local hash computation. Hash is calculated by using flowid bits in a packet header mbuf which are shifted by the number of this parameter. +.It Cm use_numa +Enable selection of egress ports based on the native +.Xr NUMA 4 +domain for the packets being transmitted. +This is currently only implemented for lacp mode. +This works only on +.Xr NUMA 4 +hardware, running a kernel compiled with the +.Xr NUMA 4 +option, and when interfaces from multiple +.Xr NUMA 4 +domains are ports of the aggregation interface. +.It Cm -use_numa +Disable selection of egress ports based on the native +.Xr NUMA 4 +domain for the packets being transmitted. .It Cm lacp_fast_timeout Enable lacp fast-timeout on the interface. .It Cm -lacp_fast_timeout Index: head/sbin/ifconfig/iflagg.c =================================================================== --- head/sbin/ifconfig/iflagg.c +++ head/sbin/ifconfig/iflagg.c @@ -130,6 +130,8 @@ switch (ro.ro_opts) { case LAGG_OPT_USE_FLOWID: case -LAGG_OPT_USE_FLOWID: + case LAGG_OPT_USE_NUMA: + case -LAGG_OPT_USE_NUMA: case LAGG_OPT_LACP_STRICT: case -LAGG_OPT_LACP_STRICT: case LAGG_OPT_LACP_TXTEST: @@ -303,6 +305,8 @@ DEF_CMD_ARG("lagghash", setlagghash), DEF_CMD("use_flowid", LAGG_OPT_USE_FLOWID, setlaggsetopt), DEF_CMD("-use_flowid", -LAGG_OPT_USE_FLOWID, setlaggsetopt), + DEF_CMD("use_numa", LAGG_OPT_USE_NUMA, setlaggsetopt), + DEF_CMD("-use_numa", -LAGG_OPT_USE_NUMA, setlaggsetopt), DEF_CMD("lacp_strict", LAGG_OPT_LACP_STRICT, setlaggsetopt), DEF_CMD("-lacp_strict", -LAGG_OPT_LACP_STRICT, setlaggsetopt), DEF_CMD("lacp_txtest", LAGG_OPT_LACP_TXTEST, setlaggsetopt), Index: head/sys/net/ieee8023ad_lacp.h =================================================================== --- head/sys/net/ieee8023ad_lacp.h +++ head/sys/net/ieee8023ad_lacp.h @@ -197,8 +197,15 @@ #define LACP_MAX_PORTS 32 +struct lacp_numa { + int count; + struct lacp_port *map[LACP_MAX_PORTS]; +}; + struct lacp_portmap { int pm_count; + int pm_num_dom; + struct lacp_numa pm_numa[MAXMEMDOM]; struct lacp_port *pm_map[LACP_MAX_PORTS]; }; Index: head/sys/net/ieee8023ad_lacp.c =================================================================== --- head/sys/net/ieee8023ad_lacp.c +++ head/sys/net/ieee8023ad_lacp.c @@ -835,7 +835,9 @@ struct lacp_softc *lsc = LACP_SOFTC(sc); struct lacp_portmap *pm; struct lacp_port *lp; + struct lacp_port **map; uint32_t hash; + int count; if (__predict_false(lsc->lsc_suppress_distributing)) { LACP_DPRINTF((NULL, "%s: waiting transit\n", __func__)); @@ -848,14 +850,32 @@ return (NULL); } +#ifdef NUMA + if ((sc->sc_opts & LAGG_OPT_USE_NUMA) && + pm->pm_num_dom > 1 && m->m_pkthdr.numa_domain < MAXMEMDOM) { + count = pm->pm_numa[m->m_pkthdr.numa_domain].count; + if (count > 0) { + map = pm->pm_numa[m->m_pkthdr.numa_domain].map; + } else { + /* No ports on this domain; use global hash. */ + map = pm->pm_map; + count = pm->pm_count; + } + } else +#endif + { + map = pm->pm_map; + count = pm->pm_count; + } if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) && M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) hash = m->m_pkthdr.flowid >> sc->flowid_shift; else hash = m_ether_tcpip_hash(sc->sc_flags, m, lsc->lsc_hashkey); - hash %= pm->pm_count; - lp = pm->pm_map[hash]; + hash %= count; + lp = map[hash]; + KASSERT((lp->lp_state & LACP_STATE_DISTRIBUTING) != 0, ("aggregated port is not distributing")); @@ -1044,6 +1064,10 @@ uint64_t speed; u_int newmap; int i; +#ifdef NUMA + int count; + uint8_t domain; +#endif newmap = lsc->lsc_activemap == 0 ? 1 : 0; p = &lsc->lsc_pmap[newmap]; @@ -1054,9 +1078,25 @@ if (la != NULL && la->la_nports > 0) { p->pm_count = la->la_nports; i = 0; - TAILQ_FOREACH(lp, &la->la_ports, lp_dist_q) + TAILQ_FOREACH(lp, &la->la_ports, lp_dist_q) { p->pm_map[i++] = lp; +#ifdef NUMA + domain = lp->lp_ifp->if_numa_domain; + if (domain >= MAXMEMDOM) + continue; + count = p->pm_numa[domain].count; + p->pm_numa[domain].map[count] = lp; + p->pm_numa[domain].count++; +#endif + } KASSERT(i == p->pm_count, ("Invalid port count")); + +#ifdef NUMA + for (i = 0; i < MAXMEMDOM; i++) { + if (p->pm_numa[i].count != 0) + p->pm_num_dom++; + } +#endif speed = lacp_aggregator_bandwidth(la); } sc->sc_ifp->if_baudrate = speed; Index: head/sys/net/if_lagg.h =================================================================== --- head/sys/net/if_lagg.h +++ head/sys/net/if_lagg.h @@ -143,6 +143,7 @@ #define LAGG_OPT_USE_FLOWID 0x01 /* enable use of flowid */ /* Pseudo flags which are used in ro_opts but not stored into sc_opts. */ #define LAGG_OPT_FLOWIDSHIFT 0x02 /* set flowid shift */ +#define LAGG_OPT_USE_NUMA 0x04 /* enable use of numa */ #define LAGG_OPT_FLOWIDSHIFT_MASK 0x1f /* flowid is uint32_t */ #define LAGG_OPT_LACP_STRICT 0x10 /* LACP strict mode */ #define LAGG_OPT_LACP_TXTEST 0x20 /* LACP debug: txtest */ @@ -158,8 +159,9 @@ #define SIOCGLAGGOPTS _IOWR('i', 152, struct lagg_reqopts) #define SIOCSLAGGOPTS _IOW('i', 153, struct lagg_reqopts) -#define LAGG_OPT_BITS "\020\001USE_FLOWID\005LACP_STRICT" \ - "\006LACP_TXTEST\007LACP_RXTEST" +#define LAGG_OPT_BITS "\020\001USE_FLOWID\003USE_NUMA" \ + "\005LACP_STRICT\006LACP_TXTEST" \ + "\007LACP_RXTEST" #ifdef _KERNEL Index: head/sys/net/if_lagg.c =================================================================== --- head/sys/net/if_lagg.c +++ head/sys/net/if_lagg.c @@ -264,6 +264,13 @@ &VNET_NAME(def_use_flowid), 0, "Default setting for using flow id for load sharing"); +/* Default value for using numa */ +VNET_DEFINE_STATIC(int, def_use_numa) = 1; +#define V_def_use_numa VNET(def_use_numa) +SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_numa, CTLFLAG_RWTUN, + &VNET_NAME(def_use_numa), 0, + "Use numa to steer flows"); + /* Default value for flowid shift */ VNET_DEFINE_STATIC(int, def_flowid_shift) = 16; #define V_def_flowid_shift VNET(def_flowid_shift) @@ -491,6 +498,8 @@ LAGG_XLOCK(sc); if (V_def_use_flowid) sc->sc_opts |= LAGG_OPT_USE_FLOWID; + if (V_def_use_numa) + sc->sc_opts |= LAGG_OPT_USE_NUMA; sc->flowid_shift = V_def_flowid_shift; /* Hash all layers by default */ @@ -1247,6 +1256,8 @@ switch (ro->ro_opts) { case LAGG_OPT_USE_FLOWID: case -LAGG_OPT_USE_FLOWID: + case LAGG_OPT_USE_NUMA: + case -LAGG_OPT_USE_NUMA: case LAGG_OPT_FLOWIDSHIFT: valid = 1; lacp = 0;