Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F156618501
D24094.id69583.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
17 KB
Referenced Files
None
Subscribers
None
D24094.id69583.diff
View Options
Index: sys/net/iflib.c
===================================================================
--- sys/net/iflib.c
+++ sys/net/iflib.c
@@ -195,6 +195,8 @@
uint16_t ifc_sysctl_core_offset;
#define CORE_OFFSET_UNSPECIFIED 0xffff
uint8_t ifc_sysctl_separate_txrx;
+ uint8_t ifc_sysctl_use_logical_cores;
+ bool ifc_cpus_are_physical_cores;
qidx_t ifc_sysctl_ntxds[8];
qidx_t ifc_sysctl_nrxds[8];
@@ -725,7 +727,7 @@
SLIST_ENTRY(cpu_offset) entries;
cpuset_t set;
unsigned int refcount;
- uint16_t offset;
+ uint16_t next_cpuid;
};
static struct mtx cpu_offset_mtx;
MTX_SYSINIT(iflib_cpu_offset, &cpu_offset_mtx, "iflib_cpu_offset lock",
@@ -4538,41 +4540,291 @@
pfil_head_unregister(pfil);
}
+
+/*
+ * Advance forward by n members of the cpuset ctx->ifc_cpus starting from
+ * cpuid and wrapping as necessary.
+ */
+static unsigned int
+cpuid_advance(if_ctx_t ctx, unsigned int cpuid, unsigned int n)
+{
+ unsigned int first_valid;
+ unsigned int last_valid;
+
+ /* cpuid should always be in the valid set */
+ MPASS(CPU_ISSET(cpuid, &ctx->ifc_cpus));
+
+ /* valid set should never be empty */
+ MPASS(!CPU_EMPTY(&ctx->ifc_cpus));
+
+ first_valid = CPU_FFS(&ctx->ifc_cpus) - 1;
+ last_valid = CPU_FLS(&ctx->ifc_cpus) - 1;
+ n = n % CPU_COUNT(&ctx->ifc_cpus);
+ while (n > 0) {
+ do {
+ cpuid++;
+ if (cpuid > last_valid)
+ cpuid = first_valid;
+ } while (!CPU_ISSET(cpuid, &ctx->ifc_cpus));
+ n--;
+ }
+
+ return (cpuid);
+}
+
+#if defined(SMP) && defined(SCHED_ULE)
+extern struct cpu_group *cpu_top; /* CPU topology */
+
+static int
+find_child_with_core(int cpu, struct cpu_group *grp)
+{
+ int i;
+
+ if (grp->cg_children == 0)
+ return -1;
+
+ MPASS(grp->cg_child);
+ for (i = 0; i < grp->cg_children; i++) {
+ if (CPU_ISSET(cpu, &grp->cg_child[i].cg_mask))
+ return i;
+ }
+
+ return -1;
+}
+
+
+/*
+ * Find an L2 neighbor of the given CPU or return -1 if none found. This
+ * does not distinguish among multiple L2 neighbors if the given CPU has
+ * more than one (it will always return the same result in that case).
+ */
+static int
+find_l2_neighbor(int cpu)
+{
+ struct cpu_group *grp;
+ int i;
+
+ grp = cpu_top;
+ if (grp == NULL)
+ return -1;
+
+ /*
+ * Find the smallest CPU group that contains the given core.
+ */
+ i = 0;
+ while ((i = find_child_with_core(cpu, grp)) != -1) {
+ /*
+ * If the smallest group containing the given CPU has less
+ * than two members, we conclude the given CPU has no
+ * L2 neighbor.
+ */
+ if (grp->cg_child[i].cg_count <= 1)
+ return (-1);
+ grp = &grp->cg_child[i];
+ }
+
+ /* Must share L2. */
+ if (grp->cg_level > CG_SHARE_L2 || grp->cg_level == CG_SHARE_NONE)
+ return -1;
+
+ /*
+ * Select the first member of the set that isn't the reference
+ * CPU, which at this point is guaranteed to exist.
+ */
+ for (i = 0; i < CPU_SETSIZE; i++) {
+ if (CPU_ISSET(i, &grp->cg_mask) && i != cpu)
+ return (i);
+ }
+
+ /* Should never be reached */
+ return (-1);
+}
+
+#else
+static int
+find_l2_neighbor(int cpu)
+{
+
+ return (-1);
+}
+#endif
+
+/*
+ * CPU mapping behaviors
+ * ---------------------
+ * 'separate txrx' refers to the separate_txrx sysctl
+ * 'use logical' refers to the use_logical_cores sysctl
+ * 'INTR CPUS' indicates whether bus_get_cpus(INTR_CPUS) succeeded
+ *
+ * separate use INTR
+ * txrx logical CPUS result
+ * ---------- --------- ------ ------------------------------------------------
+ * - - X RX and TX queues mapped to consecutive physical
+ * cores with RX/TX pairs on same core and excess
+ * of either following
+ * - X X RX and TX queues mapped to consecutive cores
+ * of any type with RX/TX pairs on same core and
+ * excess of either following
+ * X - X RX and TX queues mapped to consecutive physical
+ * cores; all RX then all TX
+ * X X X RX queues mapped to consecutive physical cores
+ * first, then TX queues mapped to L2 neighbor of
+ * the corresponding RX queue if one exists,
+ * otherwise to consecutive physical cores
+ * - n/a - RX and TX queues mapped to consecutive cores of
+ * any type with RX/TX pairs on same core and excess
+ * of either following
+ * X n/a - RX and TX queues mapped to consecutive cores of
+ * any type; all RX then all TX
+ */
+static unsigned int
+get_cpuid_for_queue(if_ctx_t ctx, unsigned int base_cpuid, unsigned int qid,
+ bool is_tx)
+{
+ if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
+ unsigned int core_index;
+
+ if (ctx->ifc_sysctl_separate_txrx) {
+ /*
+ * When using separate CPUs for TX and RX, the assignment
+ * will always be of a consecutive CPU out of the set of
+ * context CPUs, except for the specific case where the
+ * context CPUs are phsyical cores, the use of logical cores
+ * has been enabled, the assignment is for TX, the TX qid
+ * corresponds to an RX qid, and the CPU assigned to the
+ * corresponding RX queue has an L2 neighbor.
+ */
+ if (ctx->ifc_sysctl_use_logical_cores &&
+ ctx->ifc_cpus_are_physical_cores &&
+ is_tx && qid < scctx->isc_nrxqsets) {
+ int l2_neighbor;
+ unsigned int rx_cpuid;
+
+ rx_cpuid = cpuid_advance(ctx, base_cpuid, qid);
+ l2_neighbor = find_l2_neighbor(rx_cpuid);
+ if (l2_neighbor != -1) {
+ return (l2_neighbor);
+ }
+ /*
+ else fall through to the normal
+ consecutive-after-RX assignment scheme
+
+ Note that we are assuming that all RX queue CPUs
+ have and L2 neighbor, or all do not. If a mixed
+ scenario is possible, we will have to keep track
+ separately of how many queues prior to this one
+ were not able to be assigned to an L2 neighbor.
+ */
+ }
+ if (is_tx)
+ core_index = scctx->isc_nrxqsets + qid;
+ else
+ core_index = qid;
+ } else {
+ core_index = qid;
+ }
+
+ return (cpuid_advance(ctx, base_cpuid, core_index));
+}
+
static uint16_t
get_ctx_core_offset(if_ctx_t ctx)
{
if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
struct cpu_offset *op;
- uint16_t qc;
- uint16_t ret = ctx->ifc_sysctl_core_offset;
-
- if (ret != CORE_OFFSET_UNSPECIFIED)
- return (ret);
+ cpuset_t assigned_cpus;
+ unsigned int cores_consumed;
+ unsigned int base_cpuid = ctx->ifc_sysctl_core_offset;
+ unsigned int first_valid;
+ unsigned int last_valid;
+ unsigned int i;
+
+ first_valid = CPU_FFS(&ctx->ifc_cpus) - 1;
+ last_valid = CPU_FLS(&ctx->ifc_cpus) - 1;
- if (ctx->ifc_sysctl_separate_txrx)
- qc = scctx->isc_ntxqsets + scctx->isc_nrxqsets;
- else
- qc = max(scctx->isc_ntxqsets, scctx->isc_nrxqsets);
+ if (base_cpuid != CORE_OFFSET_UNSPECIFIED) {
+ /*
+ * Align the user-chosen base CPU ID to the next valid CPU
+ * for this device. If the chosen base CPU ID is smaller
+ * than the first valid CPU or larger than the last valid
+ * CPU, we assume the user does not know what the valid
+ * range is for this device and is thinking in terms of a
+ * zero-based reference frame, and so we shift the given
+ * value into the valid range (and wrap accordingly) so the
+ * intent is translated to the proper frame of reference.
+ * If the base CPU ID is within the valid first/last, but
+ * does not correspond to a valid CPU, it is advanced to the
+ * next valid CPU (wrapping if necessary).
+ */
+ if (base_cpuid < first_valid || base_cpuid > last_valid) {
+ /* shift from zero-based to first_valid-based */
+ base_cpuid += first_valid;
+ /* wrap to range [first_valid, last_valid] */
+ base_cpuid = (base_cpuid - first_valid) %
+ (last_valid - first_valid + 1);
+ }
+ if (!CPU_ISSET(base_cpuid, &ctx->ifc_cpus)) {
+ /*
+ * base_cpuid is in [first_valid, last_valid], but
+ * not a member of the valid set. In this case,
+ * there will always be a member of the valid set
+ * with a CPU ID that is greater than base_cpuid,
+ * and we simply advance to it.
+ */
+ while (!CPU_ISSET(base_cpuid, &ctx->ifc_cpus))
+ base_cpuid++;
+ }
+ return (base_cpuid);
+ }
+ /*
+ * Determine how many cores will be consumed by performing the CPU
+ * assignments and counting how many of the assigned CPUs correspond
+ * to CPUs in the set of context CPUs. This is done using the CPU
+ * ID first_valid as the base CPU ID, as the base CPU must be within
+ * the set of context CPUs.
+ *
+ * Note not all assigned CPUs will be in the set of context CPUs
+ * when separate CPUs are being allocated to TX and RX queues,
+ * assignment to logical cores has been enabled, the set of context
+ * CPUs contains only physical CPUs, and TX queues are mapped to L2
+ * neighbors of CPUs that RX queues have been mapped to - in this
+ * case we do only want to count how many CPUs in the set of context
+ * CPUs have been consumed, as that determines the next CPU in that
+ * set to start allocating at for the next device for which
+ * core_offset is not set.
+ */
+ CPU_ZERO(&assigned_cpus);
+ for (i = 0; i < scctx->isc_ntxqsets; i++)
+ CPU_SET(get_cpuid_for_queue(ctx, first_valid, i, true),
+ &assigned_cpus);
+ for (i = 0; i < scctx->isc_nrxqsets; i++)
+ CPU_SET(get_cpuid_for_queue(ctx, first_valid, i, false),
+ &assigned_cpus);
+ CPU_AND(&assigned_cpus, &ctx->ifc_cpus);
+ cores_consumed = CPU_COUNT(&assigned_cpus);
+
mtx_lock(&cpu_offset_mtx);
SLIST_FOREACH(op, &cpu_offsets, entries) {
if (CPU_CMP(&ctx->ifc_cpus, &op->set) == 0) {
- ret = op->offset;
- op->offset += qc;
+ base_cpuid = op->next_cpuid;
+ op->next_cpuid = cpuid_advance(ctx, op->next_cpuid,
+ cores_consumed);
MPASS(op->refcount < UINT_MAX);
op->refcount++;
break;
}
}
- if (ret == CORE_OFFSET_UNSPECIFIED) {
- ret = 0;
+ if (base_cpuid == CORE_OFFSET_UNSPECIFIED) {
+ base_cpuid = first_valid;
op = malloc(sizeof(struct cpu_offset), M_IFLIB,
M_NOWAIT | M_ZERO);
if (op == NULL) {
device_printf(ctx->ifc_dev,
"allocation for cpu offset failed.\n");
} else {
- op->offset = qc;
+ op->next_cpuid = cpuid_advance(ctx, base_cpuid,
+ cores_consumed);
op->refcount = 1;
CPU_COPY(&ctx->ifc_cpus, &op->set);
SLIST_INSERT_HEAD(&cpu_offsets, op, entries);
@@ -4580,7 +4832,7 @@
}
mtx_unlock(&cpu_offset_mtx);
- return (ret);
+ return (base_cpuid);
}
static void
@@ -4710,7 +4962,9 @@
if (bus_get_cpus(dev, INTR_CPUS, sizeof(ctx->ifc_cpus), &ctx->ifc_cpus) != 0) {
device_printf(dev, "Unable to fetch CPU list\n");
CPU_COPY(&all_cpus, &ctx->ifc_cpus);
- }
+ ctx->ifc_cpus_are_physical_cores = false;
+ } else
+ ctx->ifc_cpus_are_physical_cores = true;
MPASS(CPU_COUNT(&ctx->ifc_cpus) > 0);
/*
@@ -5824,128 +6078,6 @@
return (_iflib_irq_alloc(ctx, irq, rid, filter, handler, arg, name));
}
-#ifdef SMP
-static int
-find_nth(if_ctx_t ctx, int qid)
-{
- cpuset_t cpus;
- int i, cpuid, eqid, count;
-
- CPU_COPY(&ctx->ifc_cpus, &cpus);
- count = CPU_COUNT(&cpus);
- eqid = qid % count;
- /* clear up to the qid'th bit */
- for (i = 0; i < eqid; i++) {
- cpuid = CPU_FFS(&cpus);
- MPASS(cpuid != 0);
- CPU_CLR(cpuid-1, &cpus);
- }
- cpuid = CPU_FFS(&cpus);
- MPASS(cpuid != 0);
- return (cpuid-1);
-}
-
-#ifdef SCHED_ULE
-extern struct cpu_group *cpu_top; /* CPU topology */
-
-static int
-find_child_with_core(int cpu, struct cpu_group *grp)
-{
- int i;
-
- if (grp->cg_children == 0)
- return -1;
-
- MPASS(grp->cg_child);
- for (i = 0; i < grp->cg_children; i++) {
- if (CPU_ISSET(cpu, &grp->cg_child[i].cg_mask))
- return i;
- }
-
- return -1;
-}
-
-/*
- * Find the nth "close" core to the specified core
- * "close" is defined as the deepest level that shares
- * at least an L2 cache. With threads, this will be
- * threads on the same core. If the shared cache is L3
- * or higher, simply returns the same core.
- */
-static int
-find_close_core(int cpu, int core_offset)
-{
- struct cpu_group *grp;
- int i;
- int fcpu;
- cpuset_t cs;
-
- grp = cpu_top;
- if (grp == NULL)
- return cpu;
- i = 0;
- while ((i = find_child_with_core(cpu, grp)) != -1) {
- /* If the child only has one cpu, don't descend */
- if (grp->cg_child[i].cg_count <= 1)
- break;
- grp = &grp->cg_child[i];
- }
-
- /* If they don't share at least an L2 cache, use the same CPU */
- if (grp->cg_level > CG_SHARE_L2 || grp->cg_level == CG_SHARE_NONE)
- return cpu;
-
- /* Now pick one */
- CPU_COPY(&grp->cg_mask, &cs);
-
- /* Add the selected CPU offset to core offset. */
- for (i = 0; (fcpu = CPU_FFS(&cs)) != 0; i++) {
- if (fcpu - 1 == cpu)
- break;
- CPU_CLR(fcpu - 1, &cs);
- }
- MPASS(fcpu);
-
- core_offset += i;
-
- CPU_COPY(&grp->cg_mask, &cs);
- for (i = core_offset % grp->cg_count; i > 0; i--) {
- MPASS(CPU_FFS(&cs));
- CPU_CLR(CPU_FFS(&cs) - 1, &cs);
- }
- MPASS(CPU_FFS(&cs));
- return CPU_FFS(&cs) - 1;
-}
-#else
-static int
-find_close_core(int cpu, int core_offset __unused)
-{
- return cpu;
-}
-#endif
-
-static int
-get_core_offset(if_ctx_t ctx, iflib_intr_type_t type, int qid)
-{
- switch (type) {
- case IFLIB_INTR_TX:
- /* TX queues get cores which share at least an L2 cache with the corresponding RX queue */
- /* XXX handle multiple RX threads per core and more than two core per L2 group */
- return qid / CPU_COUNT(&ctx->ifc_cpus) + 1;
- case IFLIB_INTR_RX:
- case IFLIB_INTR_RXTX:
- /* RX queues get the specified core */
- return qid / CPU_COUNT(&ctx->ifc_cpus);
- default:
- return -1;
- }
-}
-#else
-#define get_core_offset(ctx, type, qid) CPU_FIRST()
-#define find_close_core(cpuid, tid) CPU_FIRST()
-#define find_nth(ctx, gid) CPU_FIRST()
-#endif
-
/* Just to avoid copy/paste */
static inline int
iflib_irq_set_affinity(if_ctx_t ctx, if_irq_t irq, iflib_intr_type_t type,
@@ -5953,21 +6085,14 @@
const char *name)
{
device_t dev;
- int co, cpuid, err, tid;
+ unsigned int base_cpuid, cpuid;
+ int err;
dev = ctx->ifc_dev;
- co = ctx->ifc_sysctl_core_offset;
- if (ctx->ifc_sysctl_separate_txrx && type == IFLIB_INTR_TX)
- co += ctx->ifc_softc_ctx.isc_nrxqsets;
- cpuid = find_nth(ctx, qid + co);
- tid = get_core_offset(ctx, type, qid);
- if (tid < 0) {
- device_printf(dev, "get_core_offset failed\n");
- return (EOPNOTSUPP);
- }
- cpuid = find_close_core(cpuid, tid);
- err = taskqgroup_attach_cpu(tqg, gtask, uniq, cpuid, dev, irq->ii_res,
- name);
+ base_cpuid = ctx->ifc_sysctl_core_offset;
+ cpuid = get_cpuid_for_queue(ctx, base_cpuid, qid, type == IFLIB_INTR_TX);
+ err = taskqgroup_attach_cpu(tqg, gtask, uniq, cpuid, dev,
+ irq ? irq->ii_res : NULL, name);
if (err) {
device_printf(dev, "taskqgroup_attach_cpu failed %d\n", err);
return (err);
@@ -6056,8 +6181,8 @@
return (0);
if (tqrid != -1) {
- err = iflib_irq_set_affinity(ctx, irq, type, qid, gtask, tqg,
- q, name);
+ err = iflib_irq_set_affinity(ctx, irq, type, qid, gtask, tqg, q,
+ name);
if (err)
return (err);
} else {
@@ -6070,6 +6195,7 @@
void
iflib_softirq_alloc_generic(if_ctx_t ctx, if_irq_t irq, iflib_intr_type_t type, void *arg, int qid, const char *name)
{
+ device_t dev;
struct grouptask *gtask;
struct taskqgroup *tqg;
gtask_fn_t *fn;
@@ -6101,14 +6227,11 @@
default:
panic("unknown net intr type");
}
- if (irq != NULL) {
- err = iflib_irq_set_affinity(ctx, irq, type, qid, gtask, tqg,
- q, name);
- if (err)
- taskqgroup_attach(tqg, gtask, q, ctx->ifc_dev,
- irq->ii_res, name);
- } else {
- taskqgroup_attach(tqg, gtask, q, NULL, NULL, name);
+ err = iflib_irq_set_affinity(ctx, irq, type, qid, gtask, tqg, q, name);
+ if (err) {
+ dev = ctx->ifc_dev;
+ taskqgroup_attach(tqg, gtask, q, dev, irq ? irq->ii_res : NULL,
+ name);
}
}
@@ -6595,6 +6718,9 @@
SYSCTL_ADD_U8(ctx_list, oid_list, OID_AUTO, "separate_txrx",
CTLFLAG_RDTUN, &ctx->ifc_sysctl_separate_txrx, 0,
"use separate cores for TX and RX");
+ SYSCTL_ADD_U8(ctx_list, oid_list, OID_AUTO, "use_logical_cores",
+ CTLFLAG_RDTUN, &ctx->ifc_sysctl_use_logical_cores, 0,
+ "try to make use of logical cores for TX and RX");
/* XXX change for per-queue sizes */
SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_ntxds",
@@ -6639,6 +6765,9 @@
queue_node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, namebuf,
CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Queue Name");
queue_list = SYSCTL_CHILDREN(queue_node);
+ SYSCTL_ADD_INT(ctx_list, queue_list, OID_AUTO, "cpu",
+ CTLFLAG_RD,
+ &txq->ift_task.gt_cpu, 0, "cpu this queue is bound to");
#if MEMORY_LOGGING
SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_dequeued",
CTLFLAG_RD,
@@ -6721,6 +6850,9 @@
queue_node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, namebuf,
CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Queue Name");
queue_list = SYSCTL_CHILDREN(queue_node);
+ SYSCTL_ADD_INT(ctx_list, queue_list, OID_AUTO, "cpu",
+ CTLFLAG_RD,
+ &rxq->ifr_task.gt_cpu, 0, "cpu this queue is bound to");
if (sctx->isc_flags & IFLIB_HAS_RXCQ) {
SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "rxq_cq_cidx",
CTLFLAG_RD,
Index: sys/sys/cpuset.h
===================================================================
--- sys/sys/cpuset.h
+++ sys/sys/cpuset.h
@@ -65,6 +65,7 @@
#define CPU_OR_ATOMIC(d, s) BIT_OR_ATOMIC(CPU_SETSIZE, d, s)
#define CPU_COPY_STORE_REL(f, t) BIT_COPY_STORE_REL(CPU_SETSIZE, f, t)
#define CPU_FFS(p) BIT_FFS(CPU_SETSIZE, p)
+#define CPU_FLS(p) BIT_FLS(CPU_SETSIZE, p)
#define CPU_COUNT(p) BIT_COUNT(CPU_SETSIZE, p)
#define CPUSET_FSET BITSET_FSET(_NCPUWORDS)
#define CPUSET_T_INITIALIZER BITSET_T_INITIALIZER
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sat, May 16, 4:06 AM (2 h, 6 s)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
33112410
Default Alt Text
D24094.id69583.diff (17 KB)
Attached To
Mode
D24094: Fix allocation of queues to CPUs in iflib
Attached
Detach File
Event Timeline
Log In to Comment