Page MenuHomeFreeBSD

D30155.diff
No OneTemporary

D30155.diff

Index: sys/dev/ixgbe/if_ix.c
===================================================================
--- sys/dev/ixgbe/if_ix.c
+++ sys/dev/ixgbe/if_ix.c
@@ -349,7 +349,7 @@
* is varied over time based on the
* traffic for that interrupt vector
*/
-static int ixgbe_enable_aim = false;
+static int ixgbe_enable_aim = 1;
SYSCTL_INT(_hw_ix, OID_AUTO, enable_aim, CTLFLAG_RWTUN, &ixgbe_enable_aim, 0,
"Enable adaptive interrupt moderation");
@@ -1671,8 +1671,8 @@
queue_list = SYSCTL_CHILDREN(queue_node);
SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
- CTLTYPE_UINT | CTLFLAG_RW,
- &sc->rx_queues[i], 0,
+ CTLTYPE_UINT | CTLFLAG_RD,
+ rx_que, false,
ixgbe_sysctl_interrupt_rate_handler, "IU",
"Interrupt Rate");
SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irqs",
@@ -2178,12 +2178,201 @@
return (error);
} /* ixgbe_if_msix_intr_assign */
+static inline void
+ixgbe_neweitr(struct ixgbe_softc *sc, struct ix_rx_queue *que)
+{
+#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
+#define IXGBE_ITR_ADAPTIVE_MIN_INC 2
+#define IXGBE_ITR_ADAPTIVE_MIN_USECS 10
+#define IXGBE_ITR_ADAPTIVE_MAX_USECS 126
+#define IXGBE_ITR_ADAPTIVE_LATENCY 0x80
+#define IXGBE_ITR_ADAPTIVE_BULK 0x00
+
+ struct rx_ring *rxr = &que->rxr;
+ struct tx_ring *txr = &sc->tx_queues[que->msix].txr;
+ uint32_t newitr = IXGBE_ITR_ADAPTIVE_MIN_USECS |
+ IXGBE_ITR_ADAPTIVE_LATENCY;
+ uint32_t bytes_packets, packets, bytes;
+
+ /* We have no packets to actually measure against. This means
+ * we are a Tx queue doing TSO with too high of an interrupt rate.
+ *
+ * When this occurs just tick up our delay by the minimum value
+ * and hope that this extra delay will prevent us from being called
+ * without any work on our queue.
+ */
+ if (!txr->packets && !rxr->packets && txr->bytes) {
+ newitr = (que->eitr_setting >> 2) + IXGBE_ITR_ADAPTIVE_MIN_INC;
+ if (newitr > IXGBE_ITR_ADAPTIVE_MAX_USECS)
+ newitr = IXGBE_ITR_ADAPTIVE_MAX_USECS;
+ newitr &= IXGBE_ITR_ADAPTIVE_LATENCY;
+ goto ixgbe_set_eitr;
+ }
+
+ /* Get the largest values from the associated tx and rx ring */
+ if (txr->bytes && txr->packets) {
+ bytes = txr->bytes;
+ bytes_packets = txr->bytes/txr->packets;
+ packets = txr->packets;
+ }
+ if (rxr->bytes && rxr->packets) {
+ bytes = max(bytes, rxr->bytes);
+ bytes_packets = max(bytes_packets, rxr->bytes/rxr->packets);
+ packets = max(packets, rxr->packets);
+ }
+
+ /* If packets are less than 4 or bytes are less than 9000 assume
+ * insufficient data to use bulk rate limiting approach. We are
+ * likely latency driven.
+ */
+ if (packets < 4 && bytes < 9000) {
+ newitr = IXGBE_ITR_ADAPTIVE_LATENCY;
+ goto ixgbe_adjust_by_size;
+ }
+
+ /* Between 4 and 48 we can assume that our current interrupt delay
+ * is only slightly too low. As such we should increase it by a small
+ * fixed amount.
+ */
+ if (packets < 48) {
+ newitr = (que->eitr_setting >> 2) + IXGBE_ITR_ADAPTIVE_MIN_INC;
+ if (newitr > IXGBE_ITR_ADAPTIVE_MAX_USECS)
+ newitr = IXGBE_ITR_ADAPTIVE_MAX_USECS;
+ goto ixgbe_set_eitr;
+ }
+
+ /* Between 48 and 96 is our "goldilocks" zone where we are working
+ * out "just right". Just report that our current ITR is good for us.
+ */
+ if (packets < 96) {
+ newitr = que->eitr_setting >> 2;
+ goto ixgbe_set_eitr;
+ }
+
+ /* If packet count is 96 or greater we are likely looking at a slight
+ * overrun of the delay we want. Try halving our delay to see if that
+ * will cut the number of packets in half per interrupt.
+ */
+ if (packets < 256) {
+ newitr = que->eitr_setting >> 3;
+ if (newitr < IXGBE_ITR_ADAPTIVE_MIN_USECS)
+ newitr = IXGBE_ITR_ADAPTIVE_MIN_USECS;
+ goto ixgbe_set_eitr;
+ }
+
+ /* The paths below assume we are dealing with a bulk ITR since number
+ * of packets is 256 or greater. We are just going to have to compute
+ * a value and try to bring the count under control, though for smaller
+ * packet sizes there isn't much we can do as NAPI polling will likely
+ * be kicking in sooner rather than later.
+ */
+ newitr = IXGBE_ITR_ADAPTIVE_BULK;
+
+ixgbe_adjust_by_size:
+ /* If packet counts are 256 or greater we can assume we have a gross
+ * overestimation of what the rate should be. Instead of trying to fine
+ * tune it just use the formula below to try and dial in an exact value
+ * give the current packet size of the frame.
+ */
+
+ /* The following is a crude approximation of:
+ * wmem_default / (size + overhead) = desired_pkts_per_int
+ * rate / bits_per_byte / (size + ethernet overhead) = pkt_rate
+ * (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value
+ *
+ * Assuming wmem_default is 212992 and overhead is 640 bytes per
+ * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the
+ * formula down to
+ *
+ * (170 * (size + 24)) / (size + 640) = ITR
+ *
+ * We first do some math on the packet size and then finally bitshift
+ * by 8 after rounding up. We also have to account for PCIe link speed
+ * difference as ITR scales based on this.
+ */
+ if (bytes_packets <= 60) {
+ /* Start at 50k ints/sec */
+ bytes_packets = 5120;
+ } else if (bytes_packets <= 316) {
+ /* 50K ints/sec to 16K ints/sec */
+ bytes_packets *= 40;
+ bytes_packets += 2720;
+ } else if (bytes_packets <= 1084) {
+ /* 16K ints/sec to 9.2K ints/sec */
+ bytes_packets *= 15;
+ bytes_packets += 11452;
+ } else if (bytes_packets < 1968) {
+ /* 9.2K ints/sec to 8K ints/sec */
+ bytes_packets *= 5;
+ bytes_packets += 22420;
+ } else {
+ /* plateau at a limit of 8K ints/sec */
+ bytes_packets = 32256;
+ }
+
+ /* If we are in low latency mode half our delay which doubles the rate
+ * to somewhere between 100K to 16K ints/sec
+ */
+ if (newitr & IXGBE_ITR_ADAPTIVE_LATENCY)
+ bytes_packets >>= 1;
+
+ /* Resultant value is 256 times larger than it needs to be. This
+ * gives us room to adjust the value as needed to either increase
+ * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc.
+ *
+ * Use addition as we have already recorded the new latency flag
+ * for the ITR value.
+ */
+ switch (sc->link_speed) {
+ case IXGBE_LINK_SPEED_10GB_FULL:
+ case IXGBE_LINK_SPEED_5GB_FULL:
+ case IXGBE_LINK_SPEED_2_5GB_FULL:
+ case IXGBE_LINK_SPEED_1GB_FULL:
+ newitr += DIV_ROUND_UP(bytes_packets,
+ IXGBE_ITR_ADAPTIVE_MIN_INC * 256) *
+ IXGBE_ITR_ADAPTIVE_MIN_INC;
+ break;
+ case IXGBE_LINK_SPEED_100_FULL:
+ case IXGBE_LINK_SPEED_10_FULL:
+ default:
+ if (bytes_packets > 8064)
+ bytes_packets = 8064;
+ newitr += DIV_ROUND_UP(bytes_packets,
+ IXGBE_ITR_ADAPTIVE_MIN_INC * 64) *
+ IXGBE_ITR_ADAPTIVE_MIN_INC;
+ break;
+ }
+
+ixgbe_set_eitr:
+ if (sc->hw.mac.type == ixgbe_mac_82598EB) {
+ newitr |= newitr << 16;
+ } else {
+ newitr |= IXGBE_EITR_CNT_WDIS;
+ }
+
+ /* Clear latency flag if set, shift into correct position */
+ newitr &= ~IXGBE_ITR_ADAPTIVE_LATENCY;
+ newitr <<= 2;
+
+ if (newitr != que->eitr_setting) {
+ que->eitr_setting = newitr;
+ IXGBE_WRITE_REG(&sc->hw, IXGBE_EITR(que->msix),
+ que->eitr_setting);
+ }
+
+ /* Reset state */
+ txr->bytes = 0;
+ txr->packets = 0;
+ rxr->bytes = 0;
+ rxr->packets = 0;
+}
+
static inline void
ixgbe_perform_aim(struct ixgbe_softc *sc, struct ix_rx_queue *que)
{
uint32_t newitr = 0;
struct rx_ring *rxr = &que->rxr;
- /* FIXME struct tx_ring *txr = ... ->txr; */
+ struct tx_ring *txr = &sc->tx_queues[que->msix].txr;
/*
* Do Adaptive Interrupt Moderation:
@@ -2197,20 +2386,15 @@
}
que->eitr_setting = 0;
+
/* Idle, do nothing */
- if (rxr->bytes == 0) {
- /* FIXME && txr->bytes == 0 */
+ if (txr->bytes == 0 && rxr->bytes == 0)
return;
- }
+ if ((txr->bytes) && (txr->packets))
+ newitr = txr->bytes/txr->packets;
if ((rxr->bytes) && (rxr->packets))
- newitr = rxr->bytes / rxr->packets;
- /* FIXME for transmit accounting
- * if ((txr->bytes) && (txr->packets))
- * newitr = txr->bytes/txr->packets;
- * if ((rxr->bytes) && (rxr->packets))
- * newitr = max(newitr, (rxr->bytes / rxr->packets));
- */
+ newitr = max(newitr, rxr->bytes / rxr->packets);
newitr += 24; /* account for hardware frame, crc */
/* set an upper boundary */
@@ -2223,18 +2407,17 @@
newitr = (newitr / 2);
}
- if (sc->hw.mac.type == ixgbe_mac_82598EB) {
+ if (sc->hw.mac.type == ixgbe_mac_82598EB)
newitr |= newitr << 16;
- } else {
+ else
newitr |= IXGBE_EITR_CNT_WDIS;
- }
/* save for next interrupt */
que->eitr_setting = newitr;
/* Reset state */
- /* FIXME txr->bytes = 0; */
- /* FIXME txr->packets = 0; */
+ txr->bytes = 0;
+ txr->packets = 0;
rxr->bytes = 0;
rxr->packets = 0;
@@ -2251,6 +2434,8 @@
struct ixgbe_softc *sc = que->sc;
if_t ifp = iflib_get_ifp(que->sc->ctx);
+ uint32_t newitr = 0;
+
/* Protect against spurious interrupts */
if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
return (FILTER_HANDLED);
@@ -2259,8 +2444,17 @@
++que->irqs;
/* Check for AIM */
- if (sc->enable_aim) {
+ if (sc->enable_aim == 1) {
+ ixgbe_neweitr(sc, que);
+ } else if (sc->enable_aim == 2) {
ixgbe_perform_aim(sc, que);
+ } else {
+ newitr = IXGBE_INTS_TO_EITR(ixgbe_max_interrupt_rate);
+ if (sc->hw.mac.type == ixgbe_mac_82598EB)
+ newitr |= newitr << 16;
+ else
+ newitr |= IXGBE_EITR_CNT_WDIS;
+ IXGBE_WRITE_REG(&sc->hw, IXGBE_EITR(que->msix), newitr);
}
return (FILTER_SCHEDULE_THREAD);
@@ -2688,31 +2882,35 @@
static int
ixgbe_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
{
- struct ix_rx_queue *que = ((struct ix_rx_queue *)oidp->oid_arg1);
+ struct ix_rx_queue *rque;
+ struct ix_tx_queue *tque;
int error;
unsigned int reg, usec, rate;
- if (atomic_load_acq_int(&que->sc->recovery_mode))
- return (EPERM);
+ bool tx = oidp->oid_arg2;
- reg = IXGBE_READ_REG(&que->sc->hw, IXGBE_EITR(que->msix));
- usec = ((reg & 0x0FF8) >> 3);
+ if (tx) {
+ tque = oidp->oid_arg1;
+ if (atomic_load_acq_int(&tque->sc->recovery_mode))
+ return (EPERM);
+ reg = IXGBE_READ_REG(&tque->sc->hw, IXGBE_EITR(tque->txr.me));
+ } else {
+ rque = oidp->oid_arg1;
+ if (atomic_load_acq_int(&rque->sc->recovery_mode))
+ return (EPERM);
+ reg = IXGBE_READ_REG(&rque->sc->hw, IXGBE_EITR(rque->msix));
+ }
+
+ usec = (reg & IXGBE_QVECTOR_MASK) >> 3; // >> 3;
if (usec > 0)
rate = 500000 / usec;
+ //rate = IXGBE_INTS_TO_EITR(reg); // rate = 500000 / usec;
else
rate = 0;
+
error = sysctl_handle_int(oidp, &rate, 0, req);
if (error || !req->newptr)
return error;
- reg &= ~0xfff; /* default, no limitation */
- ixgbe_max_interrupt_rate = 0;
- if (rate > 0 && rate < 500000) {
- if (rate < 1000)
- rate = 1000;
- ixgbe_max_interrupt_rate = rate;
- reg |= ((4000000/rate) & 0xff8);
- }
- IXGBE_WRITE_REG(&que->sc->hw, IXGBE_EITR(que->msix), reg);
return (0);
} /* ixgbe_sysctl_interrupt_rate_handler */
@@ -3314,7 +3512,7 @@
u32 newitr;
if (ixgbe_max_interrupt_rate > 0)
- newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8;
+ newitr = IXGBE_INTS_TO_EITR(ixgbe_max_interrupt_rate);
else {
/*
* Disable DMA coalescing if interrupt moderation is
@@ -3324,6 +3522,11 @@
newitr = 0;
}
+ if (sc->hw.mac.type == ixgbe_mac_82598EB)
+ newitr |= newitr << 16;
+ else
+ newitr |= IXGBE_EITR_CNT_WDIS;
+
for (int i = 0; i < sc->num_rx_queues; i++, rx_que++) {
struct rx_ring *rxr = &rx_que->rxr;
Index: sys/dev/ixgbe/ix_txrx.c
===================================================================
--- sys/dev/ixgbe/ix_txrx.c
+++ sys/dev/ixgbe/ix_txrx.c
@@ -235,6 +235,7 @@
}
txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | flags);
+ ++txr->packets;
txr->bytes += pi->ipi_len;
pi->ipi_new_pidx = i;
Index: sys/dev/ixgbe/ixgbe.h
===================================================================
--- sys/dev/ixgbe/ixgbe.h
+++ sys/dev/ixgbe/ixgbe.h
@@ -204,6 +204,11 @@
#define DEVMETHOD_END { NULL, NULL }
#endif
+#define IXGBE_EITR_DIVIDEND 4000000
+#define IXGBE_QVECTOR_MASK 0x0FF8
+#define IXGBE_INTS_TO_EITR(i) (((IXGBE_EITR_DIVIDEND/i) & \
+ IXGBE_QVECTOR_MASK))
+
/*
* Interrupt Moderation parameters
*/

File Metadata

Mime Type
text/plain
Expires
Thu, Nov 20, 10:14 AM (2 h, 51 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
25720531
Default Alt Text
D30155.diff (11 KB)

Event Timeline