Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F141978062
D42653.id130569.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
10 KB
Referenced Files
None
Subscribers
None
D42653.id130569.diff
View Options
diff --git a/sys/dev/ena/ena.h b/sys/dev/ena/ena.h
--- a/sys/dev/ena/ena.h
+++ b/sys/dev/ena/ena.h
@@ -69,6 +69,7 @@
#define ENA_DEFAULT_RING_SIZE 1024
#define ENA_MIN_RING_SIZE 256
+#define ENA_BASE_CPU_UNSPECIFIED -1
/*
* Refill Rx queue when number of required descriptors is above
* QUEUE_SIZE / ENA_RX_REFILL_THRESH_DIVIDER or ENA_RX_REFILL_THRESH_PACKET
@@ -201,9 +202,7 @@
void *cookie;
unsigned int vector;
bool requested;
-#ifdef RSS
int cpu;
-#endif
char name[ENA_IRQNAME_SIZE];
};
@@ -216,10 +215,8 @@
struct taskqueue *cleanup_tq;
uint32_t id;
-#ifdef RSS
int cpu;
cpuset_t cpu_mask;
-#endif
int domain;
struct sysctl_oid *oid;
};
@@ -448,6 +445,12 @@
ena_state_t flags;
+ /* IRQ CPU affinity */
+ int irq_cpu_base;
+ uint32_t irq_cpu_stride;
+
+ uint8_t rss_enabled;
+
/* Queue will represent one TX and one RX ring */
struct ena_que que[ENA_MAX_NUM_IO_QUEUES]
__aligned(CACHE_LINE_SIZE);
@@ -524,7 +527,8 @@
int ena_update_queue_size(struct ena_adapter *adapter, uint32_t new_tx_size,
uint32_t new_rx_size);
int ena_update_io_queue_nb(struct ena_adapter *adapter, uint32_t new_num);
-
+int ena_update_base_cpu(struct ena_adapter *adapter, int new_num);
+int ena_update_cpu_stride(struct ena_adapter *adapter, uint32_t new_num);
static inline int
ena_mbuf_count(struct mbuf *mbuf)
{
diff --git a/sys/dev/ena/ena.c b/sys/dev/ena/ena.c
--- a/sys/dev/ena/ena.c
+++ b/sys/dev/ena/ena.c
@@ -1237,6 +1237,84 @@
ena_init_io_rings(adapter);
}
+int
+ena_update_base_cpu(struct ena_adapter *adapter, int new_num)
+{
+ int old_num;
+ int rc = 0;
+ bool dev_was_up;
+
+ dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter);
+ old_num = adapter->irq_cpu_base;
+
+ ena_down(adapter);
+
+ adapter->irq_cpu_base = new_num;
+
+ if (dev_was_up) {
+ rc = ena_up(adapter);
+ if (unlikely(rc != 0)) {
+ ena_log(adapter->pdev, ERR,
+ "Failed to configure device %d IRQ base CPU. "
+ "Reverting to previous value: %d\n",
+ new_num, old_num);
+
+ adapter->irq_cpu_base = old_num;
+
+ rc = ena_up(adapter);
+ if (unlikely(rc != 0)) {
+ ena_log(adapter->pdev, ERR,
+ "Failed to revert to previous setup."
+ "Triggering device reset.\n");
+ ENA_FLAG_SET_ATOMIC(
+ ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
+ ena_trigger_reset(adapter,
+ ENA_REGS_RESET_OS_TRIGGER);
+ }
+ }
+ }
+ return (rc);
+}
+
+int
+ena_update_cpu_stride(struct ena_adapter *adapter, uint32_t new_num)
+{
+ uint32_t old_num;
+ int rc = 0;
+ bool dev_was_up;
+
+ dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter);
+ old_num = adapter->irq_cpu_stride;
+
+ ena_down(adapter);
+
+ adapter->irq_cpu_stride = new_num;
+
+ if (dev_was_up) {
+ rc = ena_up(adapter);
+ if (unlikely(rc != 0)) {
+ ena_log(adapter->pdev, ERR,
+ "Failed to configure device %d IRQ CPU stride. "
+ "Reverting to previous value: %d\n",
+ new_num, old_num);
+
+ adapter->irq_cpu_stride = old_num;
+
+ rc = ena_up(adapter);
+ if (unlikely(rc != 0)) {
+ ena_log(adapter->pdev, ERR,
+ "Failed to revert to previous setup."
+ "Triggering device reset.\n");
+ ENA_FLAG_SET_ATOMIC(
+ ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
+ ena_trigger_reset(adapter,
+ ENA_REGS_RESET_OS_TRIGGER);
+ }
+ }
+ }
+ return (rc);
+}
+
/* Caller should sanitize new_num */
int
ena_update_io_queue_nb(struct ena_adapter *adapter, uint32_t new_num)
@@ -1683,6 +1761,13 @@
ena_log(adapter->pdev, DBG, "ena_setup_io_intr vector: %d\n",
adapter->msix_entries[irq_idx].vector);
+ if (adapter->irq_cpu_base > ENA_BASE_CPU_UNSPECIFIED) {
+ adapter->que[i].cpu = adapter->irq_tbl[irq_idx].cpu =
+ (unsigned)(adapter->irq_cpu_base +
+ i * adapter->irq_cpu_stride) % (unsigned)mp_ncpus;
+ CPU_SETOF(adapter->que[i].cpu, &adapter->que[i].cpu_mask);
+ }
+
#ifdef RSS
adapter->que[i].cpu = adapter->irq_tbl[irq_idx].cpu =
rss_getcpu(cur_bind);
@@ -1790,20 +1875,19 @@
}
irq->requested = true;
-#ifdef RSS
- rc = bus_bind_intr(adapter->pdev, irq->res, irq->cpu);
- if (unlikely(rc != 0)) {
- ena_log(pdev, ERR,
- "failed to bind interrupt handler for irq %ju to cpu %d: %d\n",
- rman_get_start(irq->res), irq->cpu, rc);
- goto err;
- }
+ if (adapter->rss_enabled || adapter->irq_cpu_base > ENA_BASE_CPU_UNSPECIFIED) {
+ rc = bus_bind_intr(adapter->pdev, irq->res, irq->cpu);
+ if (unlikely(rc != 0)) {
+ ena_log(pdev, ERR,
+ "failed to bind interrupt handler for irq %ju to cpu %d: %d\n",
+ rman_get_start(irq->res), irq->cpu, rc);
+ goto err;
+ }
- ena_log(pdev, INFO, "queue %d - cpu %d\n",
- i - ENA_IO_IRQ_FIRST_IDX, irq->cpu);
-#endif
+ ena_log(pdev, INFO, "queue %d - cpu %d\n",
+ i - ENA_IO_IRQ_FIRST_IDX, irq->cpu);
+ }
}
-
return (rc);
err:
@@ -1814,13 +1898,14 @@
/* Once we entered err: section and irq->requested is true we
free both intr and resources */
- if (irq->requested)
+ if (irq->requested) {
rcc = bus_teardown_intr(adapter->pdev, irq->res,
irq->cookie);
- if (unlikely(rcc != 0))
- ena_log(pdev, ERR,
- "could not release irq: %d, error: %d\n",
- irq->vector, rcc);
+ if (unlikely(rcc != 0))
+ ena_log(pdev, ERR,
+ "could not release irq: %d, error: %d\n",
+ irq->vector, rcc);
+ }
/* If we entered err: section without irq->requested set we know
it was bus_alloc_resource_any() that needs cleanup, provided
@@ -3523,6 +3608,13 @@
adapter->missing_tx_max_queues = ENA_DEFAULT_TX_MONITORED_QUEUES;
adapter->missing_tx_threshold = ENA_DEFAULT_TX_CMP_THRESHOLD;
+ adapter->irq_cpu_base = ENA_BASE_CPU_UNSPECIFIED;
+ adapter->irq_cpu_stride = 0;
+
+#ifdef RSS
+ adapter->rss_enabled = 1;
+#endif
+
if (version_printed++ == 0)
ena_log(pdev, INFO, "%s\n", ena_version);
diff --git a/sys/dev/ena/ena_sysctl.c b/sys/dev/ena/ena_sysctl.c
--- a/sys/dev/ena/ena_sysctl.c
+++ b/sys/dev/ena/ena_sysctl.c
@@ -38,6 +38,7 @@
static void ena_sysctl_add_stats(struct ena_adapter *);
static void ena_sysctl_add_eni_metrics(struct ena_adapter *);
static void ena_sysctl_add_tuneables(struct ena_adapter *);
+static void ena_sysctl_add_irq_affinity(struct ena_adapter *);
/* Kernel option RSS prevents manipulation of key hash and indirection table. */
#ifndef RSS
static void ena_sysctl_add_rss(struct ena_adapter *);
@@ -45,6 +46,8 @@
static int ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS);
static int ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS);
static int ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS);
+static int ena_sysctl_irq_base_cpu(SYSCTL_HANDLER_ARGS);
+static int ena_sysctl_irq_cpu_stride(SYSCTL_HANDLER_ARGS);
static int ena_sysctl_eni_metrics_interval(SYSCTL_HANDLER_ARGS);
#ifndef RSS
static int ena_sysctl_rss_key(SYSCTL_HANDLER_ARGS);
@@ -102,6 +105,7 @@
ena_sysctl_add_stats(adapter);
ena_sysctl_add_eni_metrics(adapter);
ena_sysctl_add_tuneables(adapter);
+ ena_sysctl_add_irq_affinity(adapter);
#ifndef RSS
ena_sysctl_add_rss(adapter);
#endif
@@ -448,6 +452,36 @@
}
#endif /* RSS */
+static void
+ena_sysctl_add_irq_affinity(struct ena_adapter *adapter)
+{
+ device_t dev;
+
+ struct sysctl_ctx_list *ctx;
+ struct sysctl_oid *tree;
+ struct sysctl_oid_list *child;
+
+ dev = adapter->pdev;
+
+ ctx = device_get_sysctl_ctx(dev);
+ tree = device_get_sysctl_tree(dev);
+ child = SYSCTL_CHILDREN(tree);
+
+ tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "irq_affinity",
+ CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "Decide base CPU and stride for irqs affinity.");
+ child = SYSCTL_CHILDREN(tree);
+
+ /* Add base cpu leaf */
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "base_cpu",
+ CTLTYPE_S32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
+ ena_sysctl_irq_base_cpu, "I", "Base cpu index for setting irq affinity.");
+
+ /* Add cpu stride leaf */
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "cpu_stride",
+ CTLTYPE_S32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
+ ena_sysctl_irq_cpu_stride, "I", "Distance between irqs when setting affinity.");
+}
+
/*
* ena_sysctl_update_queue_node_nb - Register/unregister sysctl queue nodes.
@@ -707,6 +741,117 @@
return (0);
}
+static int
+ena_sysctl_irq_base_cpu(SYSCTL_HANDLER_ARGS)
+{
+ struct ena_adapter *adapter = arg1;
+ int irq_base_cpu = 0;
+ int error;
+
+ ENA_LOCK_LOCK();
+ if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
+ error = ENODEV;
+ goto unlock;
+ }
+
+ error = sysctl_wire_old_buffer(req, sizeof(irq_base_cpu));
+ if (error == 0) {
+ irq_base_cpu = adapter->irq_cpu_base;
+ error = sysctl_handle_int(oidp, &irq_base_cpu, 0, req);
+ }
+ if (error != 0 || req->newptr == NULL)
+ goto unlock;
+
+ if (irq_base_cpu <= ENA_BASE_CPU_UNSPECIFIED) {
+ ena_log(adapter->pdev, ERR,
+ "Requested base CPU is less than zero.\n");
+ error = EINVAL;
+ goto unlock;
+ }
+
+ if (irq_base_cpu > mp_ncpus) {
+ ena_log(adapter->pdev, INFO,
+ "Requested base CPU is larger than the number of available CPUs. \n");
+ error = EINVAL;
+ goto unlock;
+
+ }
+
+ if (irq_base_cpu == adapter->irq_cpu_base) {
+ ena_log(adapter->pdev, INFO,
+ "Requested IRQ base CPU is equal to current value "
+ "(%d)\n",
+ adapter->irq_cpu_base);
+ goto unlock;
+ }
+
+ ena_log(adapter->pdev, INFO,
+ "Requested new IRQ base CPU: %d, current value: %d\n",
+ irq_base_cpu, adapter->irq_cpu_base);
+
+ error = ena_update_base_cpu(adapter, irq_base_cpu);
+
+unlock:
+ ENA_LOCK_UNLOCK();
+
+ return (error);
+}
+
+static int
+ena_sysctl_irq_cpu_stride(SYSCTL_HANDLER_ARGS)
+{
+ struct ena_adapter *adapter = arg1;
+ int32_t irq_cpu_stride = 0;
+ int error;
+
+ ENA_LOCK_LOCK();
+ if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
+ error = ENODEV;
+ goto unlock;
+ }
+
+ error = sysctl_wire_old_buffer(req, sizeof(irq_cpu_stride));
+ if (error == 0) {
+ irq_cpu_stride = adapter->irq_cpu_stride;
+ error = sysctl_handle_int(oidp, &irq_cpu_stride, 0, req);
+ }
+ if (error != 0 || req->newptr == NULL)
+ goto unlock;
+
+ if (irq_cpu_stride < 0) {
+ ena_log(adapter->pdev, ERR,
+ "Requested IRQ stride is less than zero.\n");
+ error = EINVAL;
+ goto unlock;
+ }
+
+ if (irq_cpu_stride > mp_ncpus) {
+ ena_log(adapter->pdev, INFO,
+ "Warning: Requested IRQ stride is larger than the number of available CPUs.\n");
+ }
+
+ if (irq_cpu_stride == adapter->irq_cpu_stride) {
+ ena_log(adapter->pdev, INFO,
+ "Requested IRQ CPU stride is equal to current value "
+ "(%u)\n",
+ adapter->irq_cpu_stride);
+ goto unlock;
+ }
+
+ ena_log(adapter->pdev, INFO,
+ "Requested new IRQ CPU stride: %u, current value: %u\n",
+ irq_cpu_stride, adapter->irq_cpu_stride);
+
+ error = ena_update_cpu_stride(adapter, irq_cpu_stride);
+ if (error != 0)
+ goto unlock;
+
+unlock:
+ ENA_LOCK_UNLOCK();
+
+ return (error);
+}
+
#ifndef RSS
/*
* Change the Receive Side Scaling hash key.
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Jan 15, 11:10 AM (14 m, 30 s)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
27647257
Default Alt Text
D42653.id130569.diff (10 KB)
Attached To
Mode
D42653: ena: Add sysctl support for spreading IRQs
Attached
Detach File
Event Timeline
Log In to Comment