Index: sys/dev/acpica/acpi.c =================================================================== --- sys/dev/acpica/acpi.c +++ sys/dev/acpica/acpi.c @@ -208,6 +208,7 @@ DEVMETHOD(bus_setup_intr, bus_generic_setup_intr), DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr), DEVMETHOD(bus_hint_device_unit, acpi_hint_device_unit), + DEVMETHOD(bus_get_domain, acpi_get_domain), /* ACPI bus */ DEVMETHOD(acpi_id_probe, acpi_device_id_probe), @@ -798,6 +799,7 @@ retval += resource_list_print_type(rl, "drq", SYS_RES_DRQ, "%ld"); if (device_get_flags(child)) retval += printf(" flags %#x", device_get_flags(child)); + retval += bus_print_child_domain(bus, child); retval += bus_print_child_footer(bus, child); return (retval); @@ -1071,6 +1073,35 @@ } /* + * Fech the NUMA domain for the given device. + * + * If a device has a _PXM method, map that to a NUMA domain. + * + * If none is found, then it'll call the parent method. + * If there's no domain, return ENOENT. + */ +int +acpi_get_domain(device_t dev, device_t child, int *domain) +{ +#if MAXMEMDOM > 1 + ACPI_HANDLE h; + int d, pxm; + + h = acpi_get_handle(child); + if ((h != NULL) && + ACPI_SUCCESS(acpi_GetInteger(h, "_PXM", &pxm))) { + d = acpi_map_pxm_to_vm_domainid(pxm); + if (d < 0) + return (ENOENT); + *domain = d; + return (0); + } +#endif + /* No _PXM node; go up a level */ + return (bus_generic_get_domain(dev, child, domain)); +} + +/* * Pre-allocate/manage all memory and IO resources. Since rman can't handle * duplicates, we merge any in the sysresource attach routine. */ Index: sys/dev/acpica/acpi_pci.c =================================================================== --- sys/dev/acpica/acpi_pci.c +++ sys/dev/acpica/acpi_pci.c @@ -94,6 +94,7 @@ DEVMETHOD(bus_write_ivar, acpi_pci_write_ivar), DEVMETHOD(bus_child_location_str, acpi_pci_child_location_str_method), DEVMETHOD(bus_get_dma_tag, acpi_pci_get_dma_tag), + DEVMETHOD(bus_get_domain, acpi_get_domain), /* PCI interface */ DEVMETHOD(pci_set_powerstate, acpi_pci_set_powerstate_method), Index: sys/dev/acpica/acpivar.h =================================================================== --- sys/dev/acpica/acpivar.h +++ sys/dev/acpica/acpivar.h @@ -489,5 +489,16 @@ SYSCTL_DECL(_debug_acpi); +/* + * Map a PXM to a VM domain. + * + * Returns the VM domain ID if found, or -1 if not found / invalid. + */ +#if MAXMEMDOM > 1 +extern int acpi_map_pxm_to_vm_domainid(int pxm); +#endif + +extern int acpi_get_domain(device_t dev, device_t child, int *domain); + #endif /* _KERNEL */ #endif /* !_ACPIVAR_H_ */ Index: sys/dev/pci/pci.c =================================================================== --- sys/dev/pci/pci.c +++ sys/dev/pci/pci.c @@ -3949,6 +3949,7 @@ retval += printf(" at device %d.%d", pci_get_slot(child), pci_get_function(child)); + retval += bus_print_child_domain(dev, child); retval += bus_print_child_footer(dev, child); return (retval); Index: sys/kern/bus_if.m =================================================================== --- sys/kern/bus_if.m +++ sys/kern/bus_if.m @@ -692,3 +692,16 @@ device_t _dev; device_t _child; } DEFAULT bus_generic_resume_child; + +/** + * @brief Get the VM domain handle for the given bus and child. + * + * @param _dev the bus device + * @param _child the child device + * @param _domain a pointer to the bus's domain handle identifier + */ +METHOD int get_domain { + device_t _dev; + device_t _child; + int *_domain; +} DEFAULT bus_generic_get_domain; Index: sys/kern/subr_bus.c =================================================================== --- sys/kern/subr_bus.c +++ sys/kern/subr_bus.c @@ -54,6 +54,7 @@ #include #include #include +#include #include @@ -3751,6 +3752,25 @@ /** * @brief Helper function for implementing BUS_PRINT_CHILD(). * + * This function prints out the NUMA domain for the given device. + * + * @returns the number of characters printed + */ +int +bus_print_child_domain(device_t dev, device_t child) +{ + int domain; + + /* No domain? Don't print anything */ + if (BUS_GET_DOMAIN(dev, child, &domain) != 0) + return (0); + + return (printf(" numa-domain %d", domain)); +} + +/** + * @brief Helper function for implementing BUS_PRINT_CHILD(). + * * This function simply calls bus_print_child_header() followed by * bus_print_child_footer(). * @@ -3762,6 +3782,7 @@ int retval = 0; retval += bus_print_child_header(dev, child); + retval += bus_print_child_domain(dev, child); retval += bus_print_child_footer(dev, child); return (retval); @@ -4176,6 +4197,16 @@ return (BUS_CHILD_PRESENT(device_get_parent(dev), dev)); } +int +bus_generic_get_domain(device_t dev, device_t child, int *domain) +{ + + if (dev->parent) + return (BUS_GET_DOMAIN(dev->parent, dev, domain)); + + return (ENOENT); +} + /* * Some convenience functions to make it easier for drivers to use the * resource-management functions. All these really do is hide the Index: sys/sys/bus.h =================================================================== --- sys/sys/bus.h +++ sys/sys/bus.h @@ -331,6 +331,7 @@ bus_generic_get_resource_list (device_t, device_t); void bus_generic_new_pass(device_t dev); int bus_print_child_header(device_t dev, device_t child); +int bus_print_child_domain(device_t dev, device_t child); int bus_print_child_footer(device_t dev, device_t child); int bus_generic_print_child(device_t dev, device_t child); int bus_generic_probe(device_t dev); @@ -364,6 +365,8 @@ int bus_generic_write_ivar(device_t dev, device_t child, int which, uintptr_t value); +int bus_generic_get_domain(device_t dev, device_t child, int *domain); + /* * Wrapper functions for the BUS_*_RESOURCE methods to make client code * a little simpler. Index: sys/vm/vm_phys.c =================================================================== --- sys/vm/vm_phys.c +++ sys/vm/vm_phys.c @@ -118,6 +118,10 @@ SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD, &vm_ndomains, 0, "Number of physical memory domains available."); +static int vm_domain_policy = 0; +SYSCTL_INT(_vm, OID_AUTO, domain_policy, CTLFLAG_RW, &vm_domain_policy, + 0, "0=RR, 1=first-touch"); + static vm_page_t vm_phys_alloc_domain_pages(int domain, int flind, int pool, int order); static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind, @@ -184,6 +188,29 @@ #endif } +static int +vm_firsttouch_selectdomain(void) +{ +#if MAXMEMDOM > 1 + /* XXX TODO: critical section? */ + return PCPU_GET(domain); +#else + return (0); +#endif +} + +static int +vm_selectdomain(int do_local_alloc) +{ +#if MAXMEMDOM > 1 + if ((vm_domain_policy == 1) && (do_local_alloc == 1)) + return (vm_firsttouch_selectdomain()); + return (vm_rr_selectdomain()); +#else + return (0); +#endif +} + boolean_t vm_phys_domain_intersects(long mask, vm_paddr_t low, vm_paddr_t high) { @@ -474,8 +501,12 @@ KASSERT(order < VM_NFREEORDER, ("vm_phys_alloc_pages: order %d is out of range", order)); + /* + * First attempt - try local domain if applicable. + * Further attempts - just try round robin. + */ for (dom = 0; dom < vm_ndomains; dom++) { - domain = vm_rr_selectdomain(); + domain = vm_selectdomain(dom == 0); for (flind = 0; flind < vm_nfreelists; flind++) { m = vm_phys_alloc_domain_pages(domain, flind, pool, order); @@ -504,7 +535,7 @@ ("vm_phys_alloc_freelist_pages: order %d is out of range", order)); for (dom = 0; dom < vm_ndomains; dom++) { - domain = vm_rr_selectdomain(); + domain = vm_selectdomain(dom == 0); m = vm_phys_alloc_domain_pages(domain, flind, pool, order); if (m != NULL) return (m); @@ -1012,7 +1043,7 @@ for (order = 0; (1 << order) < npages; order++); dom = 0; restartdom: - domain = vm_rr_selectdomain(); + domain = vm_selectdomain(dom == 0); for (flind = 0; flind < vm_nfreelists; flind++) { for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; oind++) { for (pind = 0; pind < VM_NFREEPOOL; pind++) { Index: sys/x86/acpica/srat.c =================================================================== --- sys/x86/acpica/srat.c +++ sys/x86/acpica/srat.c @@ -62,6 +62,8 @@ static ACPI_TABLE_SRAT *srat; static vm_paddr_t srat_physaddr; +static int vm_domains[VM_PHYSSEG_MAX]; + static void srat_walk_table(acpi_subtable_handler *handler, void *arg); /* @@ -247,7 +249,6 @@ static int renumber_domains(void) { - int domains[VM_PHYSSEG_MAX]; int i, j, slot; /* Enumerate all the domains. */ @@ -255,17 +256,17 @@ for (i = 0; i < num_mem; i++) { /* See if this domain is already known. */ for (j = 0; j < vm_ndomains; j++) { - if (domains[j] >= mem_info[i].domain) + if (vm_domains[j] >= mem_info[i].domain) break; } - if (j < vm_ndomains && domains[j] == mem_info[i].domain) + if (j < vm_ndomains && vm_domains[j] == mem_info[i].domain) continue; /* Insert the new domain at slot 'j'. */ slot = j; for (j = vm_ndomains; j > slot; j--) - domains[j] = domains[j - 1]; - domains[slot] = mem_info[i].domain; + vm_domains[j] = vm_domains[j - 1]; + vm_domains[slot] = mem_info[i].domain; vm_ndomains++; if (vm_ndomains > MAXMEMDOM) { vm_ndomains = 1; @@ -280,15 +281,15 @@ * If the domain is already the right value, no need * to renumber. */ - if (domains[i] == i) + if (vm_domains[i] == i) continue; /* Walk the cpu[] and mem_info[] arrays to renumber. */ for (j = 0; j < num_mem; j++) - if (mem_info[j].domain == domains[i]) + if (mem_info[j].domain == vm_domains[i]) mem_info[j].domain = i; for (j = 0; j <= MAX_APIC_ID; j++) - if (cpus[j].enabled && cpus[j].domain == domains[i]) + if (cpus[j].enabled && cpus[j].domain == vm_domains[i]) cpus[j].domain = i; } KASSERT(vm_ndomains > 0, @@ -368,4 +369,23 @@ } } SYSINIT(srat_set_cpus, SI_SUB_CPU, SI_ORDER_ANY, srat_set_cpus, NULL); + +/* + * Map a _PXM value to a VM domain ID. + * + * Returns the domain ID, or -1 if no domain ID was found. + */ +int +acpi_map_pxm_to_vm_domainid(int pxm) +{ + int i; + + for (i = 0; i < vm_ndomains; i++) { + if (vm_domains[i] == pxm) + return (i); + } + + return (-1); +} + #endif /* MAXMEMDOM > 1 */