Page MenuHomeFreeBSD

D42653.id130569.diff
No OneTemporary

D42653.id130569.diff

diff --git a/sys/dev/ena/ena.h b/sys/dev/ena/ena.h
--- a/sys/dev/ena/ena.h
+++ b/sys/dev/ena/ena.h
@@ -69,6 +69,7 @@
#define ENA_DEFAULT_RING_SIZE 1024
#define ENA_MIN_RING_SIZE 256
+#define ENA_BASE_CPU_UNSPECIFIED -1
/*
* Refill Rx queue when number of required descriptors is above
* QUEUE_SIZE / ENA_RX_REFILL_THRESH_DIVIDER or ENA_RX_REFILL_THRESH_PACKET
@@ -201,9 +202,7 @@
void *cookie;
unsigned int vector;
bool requested;
-#ifdef RSS
int cpu;
-#endif
char name[ENA_IRQNAME_SIZE];
};
@@ -216,10 +215,8 @@
struct taskqueue *cleanup_tq;
uint32_t id;
-#ifdef RSS
int cpu;
cpuset_t cpu_mask;
-#endif
int domain;
struct sysctl_oid *oid;
};
@@ -448,6 +445,12 @@
ena_state_t flags;
+ /* IRQ CPU affinity */
+ int irq_cpu_base;
+ uint32_t irq_cpu_stride;
+
+ uint8_t rss_enabled;
+
/* Queue will represent one TX and one RX ring */
struct ena_que que[ENA_MAX_NUM_IO_QUEUES]
__aligned(CACHE_LINE_SIZE);
@@ -524,7 +527,8 @@
int ena_update_queue_size(struct ena_adapter *adapter, uint32_t new_tx_size,
uint32_t new_rx_size);
int ena_update_io_queue_nb(struct ena_adapter *adapter, uint32_t new_num);
-
+int ena_update_base_cpu(struct ena_adapter *adapter, int new_num);
+int ena_update_cpu_stride(struct ena_adapter *adapter, uint32_t new_num);
static inline int
ena_mbuf_count(struct mbuf *mbuf)
{
diff --git a/sys/dev/ena/ena.c b/sys/dev/ena/ena.c
--- a/sys/dev/ena/ena.c
+++ b/sys/dev/ena/ena.c
@@ -1237,6 +1237,84 @@
ena_init_io_rings(adapter);
}
+int
+ena_update_base_cpu(struct ena_adapter *adapter, int new_num)
+{
+ int old_num;
+ int rc = 0;
+ bool dev_was_up;
+
+ dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter);
+ old_num = adapter->irq_cpu_base;
+
+ ena_down(adapter);
+
+ adapter->irq_cpu_base = new_num;
+
+ if (dev_was_up) {
+ rc = ena_up(adapter);
+ if (unlikely(rc != 0)) {
+ ena_log(adapter->pdev, ERR,
+ "Failed to configure device %d IRQ base CPU. "
+ "Reverting to previous value: %d\n",
+ new_num, old_num);
+
+ adapter->irq_cpu_base = old_num;
+
+ rc = ena_up(adapter);
+ if (unlikely(rc != 0)) {
+ ena_log(adapter->pdev, ERR,
+ "Failed to revert to previous setup."
+ "Triggering device reset.\n");
+ ENA_FLAG_SET_ATOMIC(
+ ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
+ ena_trigger_reset(adapter,
+ ENA_REGS_RESET_OS_TRIGGER);
+ }
+ }
+ }
+ return (rc);
+}
+
+int
+ena_update_cpu_stride(struct ena_adapter *adapter, uint32_t new_num)
+{
+ uint32_t old_num;
+ int rc = 0;
+ bool dev_was_up;
+
+ dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter);
+ old_num = adapter->irq_cpu_stride;
+
+ ena_down(adapter);
+
+ adapter->irq_cpu_stride = new_num;
+
+ if (dev_was_up) {
+ rc = ena_up(adapter);
+ if (unlikely(rc != 0)) {
+ ena_log(adapter->pdev, ERR,
+ "Failed to configure device %d IRQ CPU stride. "
+ "Reverting to previous value: %d\n",
+ new_num, old_num);
+
+ adapter->irq_cpu_stride = old_num;
+
+ rc = ena_up(adapter);
+ if (unlikely(rc != 0)) {
+ ena_log(adapter->pdev, ERR,
+ "Failed to revert to previous setup."
+ "Triggering device reset.\n");
+ ENA_FLAG_SET_ATOMIC(
+ ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
+ ena_trigger_reset(adapter,
+ ENA_REGS_RESET_OS_TRIGGER);
+ }
+ }
+ }
+ return (rc);
+}
+
/* Caller should sanitize new_num */
int
ena_update_io_queue_nb(struct ena_adapter *adapter, uint32_t new_num)
@@ -1683,6 +1761,13 @@
ena_log(adapter->pdev, DBG, "ena_setup_io_intr vector: %d\n",
adapter->msix_entries[irq_idx].vector);
+ if (adapter->irq_cpu_base > ENA_BASE_CPU_UNSPECIFIED) {
+ adapter->que[i].cpu = adapter->irq_tbl[irq_idx].cpu =
+ (unsigned)(adapter->irq_cpu_base +
+ i * adapter->irq_cpu_stride) % (unsigned)mp_ncpus;
+ CPU_SETOF(adapter->que[i].cpu, &adapter->que[i].cpu_mask);
+ }
+
#ifdef RSS
adapter->que[i].cpu = adapter->irq_tbl[irq_idx].cpu =
rss_getcpu(cur_bind);
@@ -1790,20 +1875,19 @@
}
irq->requested = true;
-#ifdef RSS
- rc = bus_bind_intr(adapter->pdev, irq->res, irq->cpu);
- if (unlikely(rc != 0)) {
- ena_log(pdev, ERR,
- "failed to bind interrupt handler for irq %ju to cpu %d: %d\n",
- rman_get_start(irq->res), irq->cpu, rc);
- goto err;
- }
+ if (adapter->rss_enabled || adapter->irq_cpu_base > ENA_BASE_CPU_UNSPECIFIED) {
+ rc = bus_bind_intr(adapter->pdev, irq->res, irq->cpu);
+ if (unlikely(rc != 0)) {
+ ena_log(pdev, ERR,
+ "failed to bind interrupt handler for irq %ju to cpu %d: %d\n",
+ rman_get_start(irq->res), irq->cpu, rc);
+ goto err;
+ }
- ena_log(pdev, INFO, "queue %d - cpu %d\n",
- i - ENA_IO_IRQ_FIRST_IDX, irq->cpu);
-#endif
+ ena_log(pdev, INFO, "queue %d - cpu %d\n",
+ i - ENA_IO_IRQ_FIRST_IDX, irq->cpu);
+ }
}
-
return (rc);
err:
@@ -1814,13 +1898,14 @@
/* Once we entered err: section and irq->requested is true we
free both intr and resources */
- if (irq->requested)
+ if (irq->requested) {
rcc = bus_teardown_intr(adapter->pdev, irq->res,
irq->cookie);
- if (unlikely(rcc != 0))
- ena_log(pdev, ERR,
- "could not release irq: %d, error: %d\n",
- irq->vector, rcc);
+ if (unlikely(rcc != 0))
+ ena_log(pdev, ERR,
+ "could not release irq: %d, error: %d\n",
+ irq->vector, rcc);
+ }
/* If we entered err: section without irq->requested set we know
it was bus_alloc_resource_any() that needs cleanup, provided
@@ -3523,6 +3608,13 @@
adapter->missing_tx_max_queues = ENA_DEFAULT_TX_MONITORED_QUEUES;
adapter->missing_tx_threshold = ENA_DEFAULT_TX_CMP_THRESHOLD;
+ adapter->irq_cpu_base = ENA_BASE_CPU_UNSPECIFIED;
+ adapter->irq_cpu_stride = 0;
+
+#ifdef RSS
+ adapter->rss_enabled = 1;
+#endif
+
if (version_printed++ == 0)
ena_log(pdev, INFO, "%s\n", ena_version);
diff --git a/sys/dev/ena/ena_sysctl.c b/sys/dev/ena/ena_sysctl.c
--- a/sys/dev/ena/ena_sysctl.c
+++ b/sys/dev/ena/ena_sysctl.c
@@ -38,6 +38,7 @@
static void ena_sysctl_add_stats(struct ena_adapter *);
static void ena_sysctl_add_eni_metrics(struct ena_adapter *);
static void ena_sysctl_add_tuneables(struct ena_adapter *);
+static void ena_sysctl_add_irq_affinity(struct ena_adapter *);
/* Kernel option RSS prevents manipulation of key hash and indirection table. */
#ifndef RSS
static void ena_sysctl_add_rss(struct ena_adapter *);
@@ -45,6 +46,8 @@
static int ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS);
static int ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS);
static int ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS);
+static int ena_sysctl_irq_base_cpu(SYSCTL_HANDLER_ARGS);
+static int ena_sysctl_irq_cpu_stride(SYSCTL_HANDLER_ARGS);
static int ena_sysctl_eni_metrics_interval(SYSCTL_HANDLER_ARGS);
#ifndef RSS
static int ena_sysctl_rss_key(SYSCTL_HANDLER_ARGS);
@@ -102,6 +105,7 @@
ena_sysctl_add_stats(adapter);
ena_sysctl_add_eni_metrics(adapter);
ena_sysctl_add_tuneables(adapter);
+ ena_sysctl_add_irq_affinity(adapter);
#ifndef RSS
ena_sysctl_add_rss(adapter);
#endif
@@ -448,6 +452,36 @@
}
#endif /* RSS */
+static void
+ena_sysctl_add_irq_affinity(struct ena_adapter *adapter)
+{
+ device_t dev;
+
+ struct sysctl_ctx_list *ctx;
+ struct sysctl_oid *tree;
+ struct sysctl_oid_list *child;
+
+ dev = adapter->pdev;
+
+ ctx = device_get_sysctl_ctx(dev);
+ tree = device_get_sysctl_tree(dev);
+ child = SYSCTL_CHILDREN(tree);
+
+ tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "irq_affinity",
+ CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "Decide base CPU and stride for irqs affinity.");
+ child = SYSCTL_CHILDREN(tree);
+
+ /* Add base cpu leaf */
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "base_cpu",
+ CTLTYPE_S32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
+ ena_sysctl_irq_base_cpu, "I", "Base cpu index for setting irq affinity.");
+
+ /* Add cpu stride leaf */
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "cpu_stride",
+ CTLTYPE_S32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
+ ena_sysctl_irq_cpu_stride, "I", "Distance between irqs when setting affinity.");
+}
+
/*
* ena_sysctl_update_queue_node_nb - Register/unregister sysctl queue nodes.
@@ -707,6 +741,117 @@
return (0);
}
+static int
+ena_sysctl_irq_base_cpu(SYSCTL_HANDLER_ARGS)
+{
+ struct ena_adapter *adapter = arg1;
+ int irq_base_cpu = 0;
+ int error;
+
+ ENA_LOCK_LOCK();
+ if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
+ error = ENODEV;
+ goto unlock;
+ }
+
+ error = sysctl_wire_old_buffer(req, sizeof(irq_base_cpu));
+ if (error == 0) {
+ irq_base_cpu = adapter->irq_cpu_base;
+ error = sysctl_handle_int(oidp, &irq_base_cpu, 0, req);
+ }
+ if (error != 0 || req->newptr == NULL)
+ goto unlock;
+
+ if (irq_base_cpu <= ENA_BASE_CPU_UNSPECIFIED) {
+ ena_log(adapter->pdev, ERR,
+ "Requested base CPU is less than zero.\n");
+ error = EINVAL;
+ goto unlock;
+ }
+
+ if (irq_base_cpu > mp_ncpus) {
+ ena_log(adapter->pdev, INFO,
+ "Requested base CPU is larger than the number of available CPUs. \n");
+ error = EINVAL;
+ goto unlock;
+
+ }
+
+ if (irq_base_cpu == adapter->irq_cpu_base) {
+ ena_log(adapter->pdev, INFO,
+ "Requested IRQ base CPU is equal to current value "
+ "(%d)\n",
+ adapter->irq_cpu_base);
+ goto unlock;
+ }
+
+ ena_log(adapter->pdev, INFO,
+ "Requested new IRQ base CPU: %d, current value: %d\n",
+ irq_base_cpu, adapter->irq_cpu_base);
+
+ error = ena_update_base_cpu(adapter, irq_base_cpu);
+
+unlock:
+ ENA_LOCK_UNLOCK();
+
+ return (error);
+}
+
+static int
+ena_sysctl_irq_cpu_stride(SYSCTL_HANDLER_ARGS)
+{
+ struct ena_adapter *adapter = arg1;
+ int32_t irq_cpu_stride = 0;
+ int error;
+
+ ENA_LOCK_LOCK();
+ if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
+ error = ENODEV;
+ goto unlock;
+ }
+
+ error = sysctl_wire_old_buffer(req, sizeof(irq_cpu_stride));
+ if (error == 0) {
+ irq_cpu_stride = adapter->irq_cpu_stride;
+ error = sysctl_handle_int(oidp, &irq_cpu_stride, 0, req);
+ }
+ if (error != 0 || req->newptr == NULL)
+ goto unlock;
+
+ if (irq_cpu_stride < 0) {
+ ena_log(adapter->pdev, ERR,
+ "Requested IRQ stride is less than zero.\n");
+ error = EINVAL;
+ goto unlock;
+ }
+
+ if (irq_cpu_stride > mp_ncpus) {
+ ena_log(adapter->pdev, INFO,
+ "Warning: Requested IRQ stride is larger than the number of available CPUs.\n");
+ }
+
+ if (irq_cpu_stride == adapter->irq_cpu_stride) {
+ ena_log(adapter->pdev, INFO,
+ "Requested IRQ CPU stride is equal to current value "
+ "(%u)\n",
+ adapter->irq_cpu_stride);
+ goto unlock;
+ }
+
+ ena_log(adapter->pdev, INFO,
+ "Requested new IRQ CPU stride: %u, current value: %u\n",
+ irq_cpu_stride, adapter->irq_cpu_stride);
+
+ error = ena_update_cpu_stride(adapter, irq_cpu_stride);
+ if (error != 0)
+ goto unlock;
+
+unlock:
+ ENA_LOCK_UNLOCK();
+
+ return (error);
+}
+
#ifndef RSS
/*
* Change the Receive Side Scaling hash key.

File Metadata

Mime Type
text/plain
Expires
Thu, Jan 15, 11:10 AM (14 m, 30 s)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
27647257
Default Alt Text
D42653.id130569.diff (10 KB)

Event Timeline