diff --git a/share/man/man4/netmap.4 b/share/man/man4/netmap.4 --- a/share/man/man4/netmap.4 +++ b/share/man/man4/netmap.4 @@ -25,7 +25,7 @@ .\" This document is derived in part from the enet man page (enet.4) .\" distributed with 4.3BSD Unix. .\" -.Dd March 6, 2022 +.Dd October 10, 2024 .Dt NETMAP 4 .Os .Sh NAME @@ -938,6 +938,16 @@ at loader time. .It Va dev.netmap.ptnet_vnet_hdr: 1 Allow ptnet devices to use virtio-net headers +.It Va dev.netmap.port_numa_affinity: 0 +On +.Xr numa 4 +systems, allocate memory for netmap ports from the local NUMA domain when +possible. +This can improve performance by reducing the number of remote memory accesses. +However, when forwarding packets between ports attached to different NUMA +domains, this will prevent zero-copy forwarding optimizations and thus may hurt +performance. +Note that this setting must be specified as a loader tunable at boot time. .El .Sh SYSTEM CALLS .Nm diff --git a/sys/dev/netmap/netmap.c b/sys/dev/netmap/netmap.c --- a/sys/dev/netmap/netmap.c +++ b/sys/dev/netmap/netmap.c @@ -4010,8 +4010,8 @@ na->active_fds = 0; if (na->nm_mem == NULL) { - /* use iommu or global allocator */ - na->nm_mem = netmap_mem_get_iommu(na); + /* select an allocator based on IOMMU and NUMA affinity */ + na->nm_mem = netmap_mem_get_allocator(na); } if (na->nm_bdg_attach == NULL) /* no special nm_bdg_attach callback. On VALE diff --git a/sys/dev/netmap/netmap_kern.h b/sys/dev/netmap/netmap_kern.h --- a/sys/dev/netmap/netmap_kern.h +++ b/sys/dev/netmap/netmap_kern.h @@ -81,6 +81,7 @@ #if defined(__FreeBSD__) #include +#include #define likely(x) __builtin_expect((long)!!(x), 1L) #define unlikely(x) __builtin_expect((long)!!(x), 0L) @@ -1727,10 +1728,30 @@ #define NM_IS_NATIVE(ifp) (NM_NA_VALID(ifp) && NA(ifp)->nm_dtor == netmap_hw_dtor) #if defined(__FreeBSD__) +extern int netmap_port_numa_affinity; -/* Assigns the device IOMMU domain to an allocator. - * Returns -ENOMEM in case the domain is different */ -#define nm_iommu_group_id(dev) (-1) +static inline int +nm_iommu_group_id(struct netmap_adapter *na) +{ + return (-1); +} + +static inline int +nm_numa_domain(struct netmap_adapter *na) +{ + int domain; + + /* + * If the system has only one NUMA domain, don't bother distinguishing + * between IF_NODOM and domain 0. + */ + if (vm_ndomains == 1 || netmap_port_numa_affinity == 0) + return (-1); + domain = if_getnumadomain(na->ifp); + if (domain == IF_NODOM) + domain = -1; + return (domain); +} /* Callback invoked by the dma machinery after a successful dmamap_load */ static void netmap_dmamap_cb(__unused void *arg, diff --git a/sys/dev/netmap/netmap_mem2.h b/sys/dev/netmap/netmap_mem2.h --- a/sys/dev/netmap/netmap_mem2.h +++ b/sys/dev/netmap/netmap_mem2.h @@ -146,7 +146,7 @@ #define netmap_mem_get(d) __netmap_mem_get(d, __FUNCTION__, __LINE__) #define netmap_mem_put(d) __netmap_mem_put(d, __FUNCTION__, __LINE__) struct netmap_mem_d* __netmap_mem_get(struct netmap_mem_d *, const char *, int); -struct netmap_mem_d* netmap_mem_get_iommu(struct netmap_adapter *); +struct netmap_mem_d* netmap_mem_get_allocator(struct netmap_adapter *); void __netmap_mem_put(struct netmap_mem_d *, const char *, int); struct netmap_mem_d* netmap_mem_find(nm_memid_t); unsigned netmap_mem_bufsize(struct netmap_mem_d *nmd); diff --git a/sys/dev/netmap/netmap_mem2.c b/sys/dev/netmap/netmap_mem2.c --- a/sys/dev/netmap/netmap_mem2.c +++ b/sys/dev/netmap/netmap_mem2.c @@ -37,8 +37,8 @@ #endif /* __APPLE__ */ #ifdef __FreeBSD__ -#include /* prerequisite */ #include +#include #include #include /* MALLOC_DEFINE */ #include @@ -174,7 +174,8 @@ struct netmap_obj_pool pools[NETMAP_POOLS_NR]; nm_memid_t nm_id; /* allocator identifier */ - int nm_grp; /* iommu group id */ + int nm_grp; /* iommu group id */ + int nm_numa_domain; /* local NUMA domain */ /* list of all existing allocators, sorted by nm_id */ struct netmap_mem_d *prev, *next; @@ -310,7 +311,7 @@ static int netmap_mem_map(struct netmap_obj_pool *, struct netmap_adapter *); static int netmap_mem_unmap(struct netmap_obj_pool *, struct netmap_adapter *); -static int nm_mem_check_group(struct netmap_mem_d *, bus_dma_tag_t); +static int nm_mem_check_group(struct netmap_mem_d *, void *); static void nm_mem_release_id(struct netmap_mem_d *); nm_memid_t @@ -576,6 +577,7 @@ .nm_id = 1, .nm_grp = -1, + .nm_numa_domain = -1, .prev = &nm_mem, .next = &nm_mem, @@ -615,6 +617,7 @@ }, .nm_grp = -1, + .nm_numa_domain = -1, .flags = NETMAP_MEM_PRIVATE, @@ -625,7 +628,6 @@ #define STRINGIFY(x) #x - #define DECLARE_SYSCTLS(id, name) \ SYSBEGIN(mem2_ ## name); \ SYSCTL_INT(_dev_netmap, OID_AUTO, name##_size, \ @@ -649,9 +651,14 @@ DECLARE_SYSCTLS(NETMAP_RING_POOL, ring); DECLARE_SYSCTLS(NETMAP_BUF_POOL, buf); +int netmap_port_numa_affinity = 0; +SYSCTL_INT(_dev_netmap, OID_AUTO, port_numa_affinity, + CTLFLAG_RDTUN, &netmap_port_numa_affinity, 0, + "Use NUMA-local memory for memory pools when possible"); + /* call with nm_mem_list_lock held */ static int -nm_mem_assign_id_locked(struct netmap_mem_d *nmd, int grp_id) +nm_mem_assign_id_locked(struct netmap_mem_d *nmd, int grp_id, int domain) { nm_memid_t id; struct netmap_mem_d *scan = netmap_last_mem_d; @@ -666,6 +673,7 @@ if (id != scan->nm_id) { nmd->nm_id = id; nmd->nm_grp = grp_id; + nmd->nm_numa_domain = domain; nmd->prev = scan->prev; nmd->next = scan; scan->prev->next = nmd; @@ -688,7 +696,7 @@ int ret; NM_MTX_LOCK(nm_mem_list_lock); - ret = nm_mem_assign_id_locked(nmd, grp_id); + ret = nm_mem_assign_id_locked(nmd, grp_id, -1); NM_MTX_UNLOCK(nm_mem_list_lock); return ret; @@ -728,7 +736,7 @@ } static int -nm_mem_check_group(struct netmap_mem_d *nmd, bus_dma_tag_t dev) +nm_mem_check_group(struct netmap_mem_d *nmd, void *dev) { int err = 0, id; @@ -1399,7 +1407,7 @@ /* call with NMA_LOCK held */ static int -netmap_finalize_obj_allocator(struct netmap_obj_pool *p) +netmap_finalize_obj_allocator(struct netmap_mem_d *nmd, struct netmap_obj_pool *p) { int i; /* must be signed */ @@ -1440,8 +1448,16 @@ * can live with standard malloc, because the hardware will not * access the pages directly. */ - clust = contigmalloc(p->_clustsize, M_NETMAP, M_NOWAIT | M_ZERO, - (size_t)0, -1UL, PAGE_SIZE, 0); + if (nmd->nm_numa_domain == -1) { + clust = contigmalloc(p->_clustsize, M_NETMAP, + M_NOWAIT | M_ZERO, (size_t)0, -1UL, PAGE_SIZE, 0); + } else { + struct domainset *ds; + + ds = DOMAINSET_PREF(nmd->nm_numa_domain); + clust = contigmalloc_domainset(p->_clustsize, M_NETMAP, + ds, M_NOWAIT | M_ZERO, (size_t)0, -1UL, PAGE_SIZE, 0); + } if (clust == NULL) { /* * If we get here, there is a severe memory shortage, @@ -1634,7 +1650,7 @@ nmd->lasterr = 0; nmd->nm_totalsize = 0; for (i = 0; i < NETMAP_POOLS_NR; i++) { - nmd->lasterr = netmap_finalize_obj_allocator(&nmd->pools[i]); + nmd->lasterr = netmap_finalize_obj_allocator(nmd, &nmd->pools[i]); if (nmd->lasterr) goto error; nmd->nm_totalsize += nmd->pools[i].memtotal; @@ -1802,24 +1818,26 @@ return d; } -/* Reference iommu allocator - find existing or create new, - * for not hw addapeters fallback to global allocator. +/* Reference IOMMU and NUMA local allocator - find existing or create new, + * for non-hw adapters, fall back to global allocator. */ struct netmap_mem_d * -netmap_mem_get_iommu(struct netmap_adapter *na) +netmap_mem_get_allocator(struct netmap_adapter *na) { - int i, err, grp_id; + int i, domain, err, grp_id; struct netmap_mem_d *nmd; if (na == NULL || na->pdev == NULL) return netmap_mem_get(&nm_mem); + domain = nm_numa_domain(na->pdev); grp_id = nm_iommu_group_id(na->pdev); NM_MTX_LOCK(nm_mem_list_lock); nmd = netmap_last_mem_d; do { - if (!(nmd->flags & NETMAP_MEM_HIDDEN) && nmd->nm_grp == grp_id) { + if (!(nmd->flags & NETMAP_MEM_HIDDEN) && + nmd->nm_grp == grp_id && nmd->nm_numa_domain == domain) { nmd->refcount++; NM_DBG_REFC(nmd, __FUNCTION__, __LINE__); NM_MTX_UNLOCK(nm_mem_list_lock); @@ -1834,7 +1852,7 @@ *nmd = nm_mem_blueprint; - err = nm_mem_assign_id_locked(nmd, grp_id); + err = nm_mem_assign_id_locked(nmd, grp_id, domain); if (err) goto error_free; @@ -2878,7 +2896,7 @@ ptnmd->pt_ifs = NULL; /* Assign new id in the guest (We have the lock) */ - err = nm_mem_assign_id_locked(&ptnmd->up, -1); + err = nm_mem_assign_id_locked(&ptnmd->up, -1, -1); if (err) goto error;