Page MenuHomeFreeBSD

D44567.id136371.diff
No OneTemporary

D44567.id136371.diff

Index: usr.sbin/bhyve/acpi.c
===================================================================
--- usr.sbin/bhyve/acpi.c
+++ usr.sbin/bhyve/acpi.c
@@ -726,6 +726,72 @@
return (0);
}
+static int
+build_srat(struct vmctx *const ctx)
+{
+ ACPI_TABLE_SRAT srat;
+ ACPI_SRAT_MEM_AFFINITY srat_mem_affinity;
+ ACPI_SRAT_CPU_AFFINITY srat_cpu_affinity;
+
+ struct basl_table *table;
+ struct mem_domain *dom;
+ struct vm_numa numa;
+ u_int32_t i, cpu_id;
+
+ if (vm_get_numa_configuration(ctx, &numa) != 0) {
+ /* Ignore errors. */
+ return (0);
+ }
+ /* Don't build SRAT if there are no domains. */
+ if (numa.ndomains == 0)
+ return (0);
+
+ BASL_EXEC(basl_table_create(&table, ctx, ACPI_SIG_SRAT,
+ BASL_TABLE_ALIGNMENT));
+
+ memset(&srat, 0, sizeof(srat));
+ BASL_EXEC(basl_table_append_header(table, ACPI_SIG_SRAT, 1, 1));
+ srat.TableRevision = 1;
+ BASL_EXEC(basl_table_append_content(table, &srat, sizeof(srat)));
+
+ /* Add 'Memory Affinity Structures' for each domain. */
+ for (i = 0; i < numa.ndomains; i++) {
+ dom = &numa.domains[i];
+ /* Sanity checks. */
+ assert(dom->end > dom->start);
+ memset(&srat_mem_affinity, 0, sizeof(srat_mem_affinity));
+ srat_mem_affinity.Header.Type = ACPI_SRAT_TYPE_MEMORY_AFFINITY;
+ srat_mem_affinity.Header.Length = sizeof(srat_mem_affinity);
+ srat_mem_affinity.Flags |= ACPI_SRAT_MEM_ENABLED;
+ srat_mem_affinity.ProximityDomain = htole32(i);
+ srat_mem_affinity.BaseAddress = htole64(dom->start);
+ srat_mem_affinity.Length = htole64(dom->end - dom->start);
+ srat_mem_affinity.Flags = htole32(ACPI_SRAT_MEM_ENABLED);
+ BASL_EXEC(basl_table_append_bytes(table, &srat_mem_affinity,
+ sizeof(srat_mem_affinity)));
+
+ /* Add all domain CPUs. */
+ CPU_FOREACH_ISSET (cpu_id, &dom->cpus) {
+ memset(&srat_cpu_affinity, 0,
+ sizeof(srat_cpu_affinity));
+ srat_cpu_affinity.Header.Type =
+ ACPI_SRAT_TYPE_CPU_AFFINITY;
+ srat_cpu_affinity.Header.Length = sizeof(
+ srat_cpu_affinity);
+ srat_cpu_affinity.ProximityDomainLo = (uint8_t)i;
+ srat_cpu_affinity.ApicId = (uint8_t)cpu_id;
+ srat_cpu_affinity.Flags = htole32(
+ ACPI_SRAT_CPU_USE_AFFINITY);
+ BASL_EXEC(basl_table_append_bytes(table,
+ &srat_cpu_affinity, sizeof(srat_cpu_affinity)));
+ }
+ }
+
+ BASL_EXEC(basl_table_register_to_rsdt(table));
+
+ return (0);
+}
+
int
acpi_build(struct vmctx *ctx, int ncpu)
{
@@ -765,6 +831,7 @@
BASL_EXEC(build_mcfg(ctx));
BASL_EXEC(build_facs(ctx));
BASL_EXEC(build_spcr(ctx));
+ BASL_EXEC(build_srat(ctx));
/* Build ACPI device-specific tables such as a TPM2 table. */
const struct acpi_device_list_entry *entry;
Index: usr.sbin/bhyve/bhyverun.c
===================================================================
--- usr.sbin/bhyve/bhyverun.c
+++ usr.sbin/bhyve/bhyverun.c
@@ -218,6 +218,75 @@
return (lval);
}
+static long long
+parse_ll_value(const char *key, const char *value, long long minval,
+ long long maxval)
+{
+ char *cp;
+ long long lval;
+
+ errno = 0;
+ lval = strtol(value, &cp, 0);
+ if (errno != 0 || *cp != '\0' || cp == value || lval < minval ||
+ lval > maxval)
+ errx(4, "Invalid value for %s: '%s'", key, value);
+ return (lval);
+}
+
+static int
+numa_node_parse(const char *opt)
+{
+ int id = -1;
+ nvlist_t *nvl;
+ char *cp, *str, *tofree;
+ char pathbuf[64] = { 0 };
+ char *start = NULL, *end = NULL, *cpus = NULL;
+
+ if (*opt == '\0') {
+ return (-1);
+ }
+
+ tofree = str = strdup(opt);
+ if (str == NULL)
+ errx(4, "Failed to allocate memory");
+
+ while ((cp = strsep(&str, ",")) != NULL) {
+ if (strncmp(cp, "id=", strlen("id=")) == 0)
+ id = parse_int_value("id", cp + strlen("id="), 0,
+ UINT8_MAX);
+ else if (strncmp(cp, "start=", strlen("start=")) == 0)
+ start = cp + strlen("start=");
+ else if (strncmp(cp, "end=", strlen("end=")) == 0)
+ end = cp + strlen("end=");
+ else if (strncmp(cp, "cpus=", strlen("cpus=")) == 0)
+ cpus = cp + strlen("cpus=");
+ }
+
+ /* Check if have everything we need. */
+ if (id == -1 || start == NULL || end == NULL || cpus == NULL) {
+ EPRINTLN("Incomplete NUMA domain information");
+ goto out;
+ }
+
+ snprintf(pathbuf, 64, "domains.%d", id);
+ if (find_config_node(pathbuf) != NULL) {
+ EPRINTLN("Attempting to redefine NUMA domain %d!", id);
+ goto out;
+ }
+
+ nvl = create_config_node(pathbuf);
+ set_config_value_node(nvl, "start", start);
+ set_config_value_node(nvl, "end", end);
+ set_config_value_node(nvl, "cpus", cpus);
+
+ free(tofree);
+ return (0);
+
+out:
+ free(tofree);
+ return (-1);
+}
+
/*
* Set the sockets, cores, threads, and guest_cpus variables based on
* the configured topology.
@@ -554,6 +623,59 @@
return (1);
}
+static int
+set_mem_affinity(struct vmctx *ctx)
+{
+ int i;
+ nvlist_t *nvl;
+ const char *value;
+ const char *reason;
+ struct vm_numa numa;
+ struct mem_domain *dom;
+ char pathbuf[64] = { 0 };
+
+ memset(&numa, 0, sizeof(struct vm_numa));
+ for (i = 0; i < VM_MAX_MEMDOMS; i++) {
+ snprintf(pathbuf, 64, "domains.%d", i);
+ nvl = find_config_node(pathbuf);
+ if (nvl == NULL)
+ break;
+
+ dom = &numa.domains[i];
+ value = get_config_value_node(nvl, "start");
+ dom->start = parse_ll_value("domain start", value, 0,
+ LLONG_MAX);
+ value = get_config_value_node(nvl, "end");
+ dom->end = parse_ll_value("domain end", value, 0, LLONG_MAX);
+ value = get_config_value_node(nvl, "cpus");
+ parse_cpuset(i, value, &dom->cpus);
+ }
+ numa.ndomains = i;
+
+ if (vm_set_numa_configuration(ctx, &numa) == -1) {
+ switch (errno) {
+ case EINVAL:
+ reason =
+ "invalid number of domains or invalid domain address ranges";
+ break;
+ case EEXIST:
+ reason = "cpu or address range overlap";
+ break;
+ case E2BIG:
+ reason =
+ "domain address range exceeds guest physical address range";
+ break;
+ default:
+ reason = "unknown";
+ break;
+ }
+ EPRINTLN("Error while setting NUMA configuration: %s", reason);
+ return (-1);
+ }
+
+ return (0);
+}
+
static struct vmctx *
do_open(const char *vmname)
{
@@ -615,6 +737,10 @@
error = vm_set_topology(ctx, cpu_sockets, cpu_cores, cpu_threads, 0);
if (error)
errx(EX_OSERR, "vm_set_topology");
+ error = set_mem_affinity(ctx);
+ if (error)
+ errx(EX_OSERR, "set_mem_affinity");
+
return (ctx);
}
@@ -709,9 +835,9 @@
progname = basename(argv[0]);
#ifdef BHYVE_SNAPSHOT
- optstr = "aehuwxACDHIPSWYk:f:o:p:G:c:s:m:l:K:U:r:";
+ optstr = "aehuwxACDHIPSWYk:f:o:p:G:c:s:m:l:K:U:r:n:";
#else
- optstr = "aehuwxACDHIPSWYk:f:o:p:G:c:s:m:l:K:U:";
+ optstr = "aehuwxACDHIPSWYk:f:o:p:G:c:s:m:l:K:U:n:";
#endif
while ((c = getopt(argc, argv, optstr)) != -1) {
switch (c) {
@@ -791,6 +917,13 @@
case 'm':
set_config_value("memory.size", optarg);
break;
+ case 'n':
+ if (numa_node_parse(optarg) != 0)
+ errx(EX_USAGE,
+ "invalid NUMA node configuration "
+ "'%s'",
+ optarg);
+ break;
case 'o':
if (!parse_config_option(optarg))
errx(EX_USAGE, "invalid configuration option '%s'", optarg);

File Metadata

Mime Type
text/plain
Expires
Sat, Feb 28, 3:32 AM (11 h, 29 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
29050470
Default Alt Text
D44567.id136371.diff (6 KB)

Event Timeline