diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h --- a/sys/amd64/include/vmm.h +++ b/sys/amd64/include/vmm.h @@ -159,6 +159,7 @@ struct vm_guest_paging; struct pmap; enum snapshot_req; +struct vm_numa; struct vm_eventinfo { cpuset_t *rptr; /* rendezvous cookie */ @@ -233,7 +234,8 @@ uint16_t *threads, uint16_t *maxcpus); int vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, uint16_t threads, uint16_t maxcpus); - +int vm_get_numa_configuration(struct vm *vm, struct vm_numa *numa); +int vm_set_numa_configuration(struct vm *vm, struct vm_numa *numa); /* * APIs that modify the guest memory map require all vcpus to be frozen. */ @@ -527,6 +529,16 @@ #define SEG_DESC_GRANULARITY(access) (((access) & 0x8000) ? 1 : 0) #define SEG_DESC_UNUSABLE(access) (((access) & 0x10000) ? 1 : 0) +#define VM_MAX_MEMDOMS 8 +struct vm_numa { + struct mem_domain { + vm_paddr_t start; + vm_paddr_t end; + cpuset_t cpus; + } domains[VM_MAX_MEMDOMS]; + size_t ndomains; +}; + enum vm_cpu_mode { CPU_MODE_REAL, CPU_MODE_PROTECTED, diff --git a/sys/amd64/include/vmm_dev.h b/sys/amd64/include/vmm_dev.h --- a/sys/amd64/include/vmm_dev.h +++ b/sys/amd64/include/vmm_dev.h @@ -257,6 +257,13 @@ }; _Static_assert(sizeof(struct vm_readwrite_kernemu_device) == 24, "ABI"); +struct vm_numa_domain { + int id; + vm_paddr_t start; + vm_paddr_t end; + cpuset_t cpus; +}; + enum { /* general routines */ IOCNUM_ABIVERS = 0, @@ -324,6 +331,10 @@ IOCNUM_SET_TOPOLOGY = 63, IOCNUM_GET_TOPOLOGY = 64, + /* NUMA configuration */ + IOCNUM_SET_NUMA = 65, + IOCNUM_GET_NUMA = 66, + /* legacy interrupt injection */ IOCNUM_ISA_ASSERT_IRQ = 80, IOCNUM_ISA_DEASSERT_IRQ = 81, @@ -444,6 +455,10 @@ _IOW('v', IOCNUM_SET_TOPOLOGY, struct vm_cpu_topology) #define VM_GET_TOPOLOGY \ _IOR('v', IOCNUM_GET_TOPOLOGY, struct vm_cpu_topology) +#define VM_SET_NUMA \ + _IOW('v', IOCNUM_SET_NUMA, struct vm_numa) +#define VM_GET_NUMA \ + _IOR('v', IOCNUM_GET_NUMA, struct vm_numa) #define VM_GET_GPA_PMAP \ _IOWR('v', IOCNUM_GET_GPA_PMAP, struct vm_gpa_pte) #define VM_GLA2GPA \ diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -189,6 +189,7 @@ uint16_t cores; /* (o) num of cores/socket */ uint16_t threads; /* (o) num of threads/core */ uint16_t maxcpus; /* (o) max pluggable cpus */ + struct vm_numa numa; /* (o) NUMA configuration */ struct sx mem_segs_lock; /* (o) */ struct sx vcpus_init_lock; /* (o) */ }; @@ -644,6 +645,57 @@ return(0); } +int +vm_set_numa_configuration(struct vm *vm, struct vm_numa *numa) +{ + struct mem_domain *d1, *d2; + struct mem_map *end; + int i, j; + + if (numa->ndomains > VM_MAX_MEMDOMS) + return (EINVAL); + /* Check if the address ranges are well-formed. */ + for (i = 0; i < numa->ndomains; i++) { + d1 = &numa->domains[i]; + if (d1->start >= d1->end) + return (EINVAL); + } + /* Check if we have overlapping cpus or address ranges. */ + for (i = 0; i < numa->ndomains; i++) { + d1 = &numa->domains[i]; + for (j = 0; j < numa->ndomains; j++) { + if (j == i) + continue; + d2 = &numa->domains[j]; + if (CPU_OVERLAP(&d1->cpus, &d2->cpus)) + return (EEXIST); + if (d1->start <= d2->end && d1->end < d2->start) + return (EEXIST); + } + } + /* Check if the memory ranges fit. */ + end = &vm->mem_maps[0]; + for (i = 1; i < VM_MAX_MEMMAPS; i++) { + if (vm->mem_maps[i].len == 0) + break; + if (end->gpa < vm->mem_maps[i].gpa) + end = &vm->mem_maps[i]; + } + if ((end->gpa + end->len) < numa->domains[numa->ndomains - 1].end) + return (E2BIG); + + vm->numa = *numa; + + return (0); +} + +int +vm_get_numa_configuration(struct vm *vm, struct vm_numa *numa) +{ + *numa = vm->numa; + return (0); +} + static void vm_cleanup(struct vm *vm, bool destroy) { diff --git a/sys/amd64/vmm/vmm_dev.c b/sys/amd64/vmm/vmm_dev.c --- a/sys/amd64/vmm/vmm_dev.c +++ b/sys/amd64/vmm/vmm_dev.c @@ -445,6 +445,7 @@ struct vm_memmap *mm; struct vm_munmap *mu; struct vm_cpu_topology *topology; + struct vm_numa *numa; struct vm_readwrite_kernemu_device *kernemu; uint64_t *regvals; int *regnums; @@ -1058,6 +1059,14 @@ &topology->threads, &topology->maxcpus); error = 0; break; + case VM_SET_NUMA: + numa = (struct vm_numa *)data; + error = vm_set_numa_configuration(sc->vm, numa); + break; + case VM_GET_NUMA: + numa = (struct vm_numa *)data; + error = vm_get_numa_configuration(sc->vm, numa); + break; #ifdef BHYVE_SNAPSHOT case VM_SNAPSHOT_REQ: snapshot_meta = (struct vm_snapshot_meta *)data;