Page MenuHomeFreeBSD

D10435.id27980.diff
No OneTemporary

D10435.id27980.diff

Index: etc/defaults/rc.conf
===================================================================
--- etc/defaults/rc.conf
+++ etc/defaults/rc.conf
@@ -682,6 +682,9 @@
iovctl_files="" # Config files for iovctl(8)
+irqrebalance_enable="NO" # Balance IRQs dynamically
+irqrebalance_period="" # Rebalance period (seconds) (defaults to 60)
+
##############################################################
### Jail Configuration (see rc.conf(5) manual page) ##########
##############################################################
Index: etc/rc.d/irqrebalance
===================================================================
--- /dev/null
+++ etc/rc.d/irqrebalance
@@ -0,0 +1,31 @@
+#!/bin/sh
+#
+# $FreeBSD$
+#
+
+# PROVIDE: irqrebalance
+
+. /etc/rc.subr
+
+name="irqrebalance"
+desc="Dynamically rebalance interrupts across cores depending on load"
+rcvar="irqrebalance_enable"
+command="/libexec/${name}"
+start_precmd="irqrebalance_precmd"
+command_args="&"
+
+irqrebalance_precmd()
+{
+
+ case "${irqrebalance_period}" in
+ '')
+ rc_flags="-f 60"
+ ;;
+ *)
+ rc_flags="-f ${irqrebalance_period}"
+ ;;
+ esac
+}
+
+load_rc_config $name
+run_rc_command "$1"
Index: libexec/Makefile
===================================================================
--- libexec/Makefile
+++ libexec/Makefile
@@ -9,6 +9,7 @@
${_comsat} \
${_dma} \
getty \
+ ${_irqrebalance} \
${_mail.local} \
${_makewhatis.local} \
${_mknetid} \
@@ -58,6 +59,10 @@
_dma= dma
.endif
+.if ${MK_IRQREBALANCE} != "no"
+_irqrebalance= irqrebalance
+.endif
+
.if ${MK_NIS} != "no"
_mknetid= mknetid
_ypxfr= ypxfr
Index: libexec/irqrebalance/Makefile
===================================================================
--- /dev/null
+++ libexec/irqrebalance/Makefile
@@ -0,0 +1,9 @@
+# $FreeBSD$
+
+.include <src.opts.mk>
+
+PROG= irqrebalance
+MAN=
+SRCS= irqrebalance.c
+
+.include <bsd.prog.mk>
Index: libexec/irqrebalance/irqrebalance.c
===================================================================
--- /dev/null
+++ libexec/irqrebalance/irqrebalance.c
@@ -0,0 +1,222 @@
+/*
+ * Copyright (c) 2017 Dell EMC Isilon
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD $
+ */
+
+#include <sys/param.h>
+#include <sys/cpuset.h>
+#include <sys/sysctl.h>
+
+#include <err.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+struct intr_src {
+ const char *is_name;
+ unsigned long is_count;
+ int is_irq;
+};
+
+static cpuset_t cpus;
+static struct intr_src *intr_sources;
+static char *intrnames;
+
+static void enumerate_irqs(void);
+static void irqshuffle(void);
+
+/*
+ * irqrebalance options:
+ * -f period
+ * Rebalance every N seconds.
+ */
+int
+main(int argc, char **argv)
+{
+ int c;
+ time_t period;
+
+ while ((c = getopt(argc, argv, "f:")) != -1) {
+ switch (c) {
+ case 'f':
+ period = atol(optarg);
+ if (period <= 0)
+ return (1);
+ break;
+ default:
+ return (1);
+ }
+ }
+
+ enumerate_irqs();
+ while (true) {
+ irqshuffle();
+ sleep(period);
+ }
+ /* NOTREACHED */
+ return (0);
+}
+
+static size_t
+read_intrcnts(unsigned long **intrcnts)
+{
+ size_t intrcntlen;
+ int rc;
+
+ for (*intrcnts = NULL, intrcntlen = 1024; ; intrcntlen *= 2) {
+ *intrcnts = reallocf(*intrcnts, intrcntlen);
+ if (*intrcnts == NULL)
+ err(1, "reallocf");
+ rc = sysctlbyname("hw.intrcnt", *intrcnts, &intrcntlen, NULL, 0);
+ if (rc == 0)
+ break;
+ else if (rc != ENOMEM)
+ err(1, "sysctl");
+ }
+
+ return (intrcntlen / sizeof(unsigned long));
+}
+
+static void
+enumerate_irqs(void)
+{
+ size_t inamlen, intrcnt, i;
+ unsigned long *counts;
+ const char *name;
+ int rc;
+
+ for (intrnames = NULL, inamlen = 1024; ; inamlen *= 2) {
+ if ((intrnames = reallocf(intrnames, inamlen)) == NULL)
+ err(1, "reallocf");
+ rc = sysctlbyname("hw.intrnames", intrnames, &inamlen, NULL, 0);
+ if (rc == 0)
+ break;
+ else if (rc != ENOMEM)
+ err(1, "sysctl");
+ }
+
+ intrcnt = read_intrcnts(&counts);
+ free(counts);
+
+ intr_sources = calloc(intrcnt, sizeof(*intr_sources));
+ if (intr_sources == NULL)
+ err(1, "calloc");
+
+ for (i = 0, name = intrnames; i < intrcnt; i++) {
+ if (name[0] != '\0')
+ intr_sources[i].is_name = strdup(name);
+ /* XXX */
+ if (strncmp(name, "irq", 3) != 0) {
+ rc = sscanf(name, "irq%d:", &intr_sources[i].is_irq);
+ if (rc < 1)
+ intr_sources[i].is_irq = -1;
+ } else
+ intr_sources[i].is_irq = -1;
+ name += strlen(name) + 1;
+ }
+
+ rc = cpuset_getaffinity(CPU_LEVEL_ROOT, CPU_WHICH_CPUSET, -1,
+ sizeof(cpus), &cpus);
+ if (rc != 0)
+ err(1, "cpuset_getaffinity");
+}
+
+static int
+intrcmp(const void *one, const void *two)
+{
+ const struct intr_src *i1, *i2;
+
+ i1 = one;
+ i2 = two;
+ if (i1->is_count != 0 && i2->is_count != 0) {
+ if (i1->is_count < i2->is_count)
+ return (-1);
+ else if (i1->is_count == i2->is_count)
+ return (0);
+ return (1);
+ }
+
+ if (i1->is_count != 0)
+ return (1);
+ else if (i2->is_count != 0)
+ return (-1);
+ return (0);
+}
+
+static size_t
+nextcpu(const cpuset_t *allcpus, size_t idx, cpuset_t *output)
+{
+
+ CPU_ZERO(output);
+ while (true) {
+ if (CPU_ISSET(idx, allcpus)) {
+ CPU_SET(idx, output);
+ break;
+ }
+ idx = (idx + 1) % CPU_SETSIZE;
+ }
+ return ((idx + 1) % CPU_SETSIZE);
+}
+
+static void
+irqshuffle(void)
+{
+ const struct intr_src *isrc;
+ unsigned long *intrcnts;
+ size_t current_cpu;
+ ssize_t i, nintrs;
+ cpuset_t mask;
+ int rc;
+
+ nintrs = read_intrcnts(&intrcnts);
+
+ for (i = 0; i < nintrs; i++)
+ intr_sources[i].is_count = intrcnts[i];
+
+ qsort(intr_sources, nintrs, sizeof(*intr_sources), intrcmp);
+
+ /*
+ * Scan from the same location to avoid moving in the common case.
+ */
+ current_cpu = 0;
+
+ for (i = nintrs - 1; i >= 0; i--) {
+ isrc = &intr_sources[i];
+ current_cpu = nextcpu(&cpus, current_cpu, &mask);
+
+ if (isrc->is_irq < 0)
+ continue;
+
+ /* XXX Differentiate managed and unmanaged irqs? */
+ rc = cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_IRQ_ONLY,
+ isrc->is_irq, sizeof(mask), &mask);
+ if (rc != 0)
+ err(1, "cpuset_setaffinity");
+ }
+}
Index: share/mk/src.opts.mk
===================================================================
--- share/mk/src.opts.mk
+++ share/mk/src.opts.mk
@@ -112,6 +112,7 @@
INETD \
IPFILTER \
IPFW \
+ IRQREBALANCE \
ISCSI \
JAIL \
KDUMP \

File Metadata

Mime Type
text/plain
Expires
Sun, Feb 8, 2:09 AM (11 h, 50 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
28462756
Default Alt Text
D10435.id27980.diff (7 KB)

Event Timeline