Index: sys/kern/kern_cpuset.c =================================================================== --- sys/kern/kern_cpuset.c +++ sys/kern/kern_cpuset.c @@ -65,7 +65,12 @@ #include #include #include +#include +#include #include +#include +#include +#include #ifdef DDB #include @@ -478,6 +483,26 @@ } +/* + * Are any of the domains in the mask empty? If so, silently + * remove them. If only empty domains are present, we must + * return failure. + */ +static bool +domainset_empty_vm(struct domainset *domain) +{ + int i, max; + + max = DOMAINSET_FLS(&domain->ds_mask) + 1; + for (i = 0; i < max; i++) { + if (DOMAINSET_ISSET(i, &domain->ds_mask) && + VM_DOMAIN_EMPTY(i)) + DOMAINSET_CLR(i, &domain->ds_mask); + } + + return (DOMAINSET_EMPTY(&domain->ds_mask)); +} + /* * Create or lookup a domainset based on the key held in 'domain'. */ @@ -1360,6 +1385,7 @@ DOMAINSET_SET(i, &dset->ds_mask); dset->ds_policy = DOMAINSET_POLICY_FIRSTTOUCH; dset->ds_prefer = -1; + (void)domainset_empty_vm(dset); curthread->td_domain.dr_policy = _domainset_create(dset, NULL); domainset_copy(dset, &domainset2); @@ -2087,6 +2113,13 @@ DOMAINSET_FILL(&domain.ds_mask); } + /* + * When given an impossible policy, fall back to interleaving + * across all domains + */ + if (domainset_empty_vm(&domain)) + domainset_copy(&domainset2, &domain); + switch (level) { case CPU_LEVEL_ROOT: case CPU_LEVEL_CPUSET: Index: sys/vm/uma_core.c =================================================================== --- sys/vm/uma_core.c +++ sys/vm/uma_core.c @@ -84,6 +84,7 @@ #include #include #include +#include #include #include #include @@ -2469,9 +2470,11 @@ if (bucket != NULL) bucket_free(zone, bucket, udata); - if (zone->uz_flags & UMA_ZONE_NUMA) + if (zone->uz_flags & UMA_ZONE_NUMA) { domain = PCPU_GET(domain); - else + if (VM_DOMAIN_EMPTY(domain)) + domain = UMA_ANYDOMAIN; + } else domain = UMA_ANYDOMAIN; /* Short-circuit for zones without buckets and low memory. */ @@ -2647,7 +2650,11 @@ rdomain = 0; rr = rdomain == UMA_ANYDOMAIN; if (rr) { - keg->uk_cursor = (keg->uk_cursor + 1) % vm_ndomains; + start = keg->uk_cursor; + do { + keg->uk_cursor = (keg->uk_cursor + 1) % vm_ndomains; + domain = keg->uk_cursor; + } while (VM_DOMAIN_EMPTY(domain) && domain != start); domain = start = keg->uk_cursor; /* Only block on the second pass. */ if ((flags & (M_WAITOK | M_NOVM)) == M_WAITOK) @@ -2698,8 +2705,11 @@ LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link); return (slab); } - if (rr) - domain = (domain + 1) % vm_ndomains; + if (rr) { + do { + domain = (domain + 1) % vm_ndomains; + } while (VM_DOMAIN_EMPTY(domain) && domain != start); + } } while (domain != start); /* Retry domain scan with blocking. */ @@ -2903,6 +2913,8 @@ uma_bucket_t bucket; int max; + CTR1(KTR_UMA, "zone_alloc:_bucket domain %d)", domain); + /* Don't wait for buckets, preserve caller's NOVM setting. */ bucket = bucket_alloc(zone, udata, M_NOWAIT | (flags & M_NOVM)); if (bucket == NULL) @@ -2970,6 +2982,11 @@ item = NULL; + if (domain != UMA_ANYDOMAIN) { + /* avoid allocs targeting empty domains */ + if (VM_DOMAIN_EMPTY(domain)) + domain = UMA_ANYDOMAIN; + } if (zone->uz_import(zone->uz_arg, &item, 1, domain, flags) != 1) goto fail; atomic_add_long(&zone->uz_allocs, 1); @@ -3139,9 +3156,11 @@ /* We are no longer associated with this CPU. */ critical_exit(); - if ((zone->uz_flags & UMA_ZONE_NUMA) != 0) + if ((zone->uz_flags & UMA_ZONE_NUMA) != 0) { domain = PCPU_GET(domain); - else + if (VM_DOMAIN_EMPTY(domain)) + domain = UMA_ANYDOMAIN; + } else domain = 0; zdom = &zone->uz_domain[0]; @@ -3588,7 +3607,9 @@ dom = &keg->uk_domain[slab->us_domain]; LIST_INSERT_HEAD(&dom->ud_free_slab, slab, us_link); slabs--; - domain = (domain + 1) % vm_ndomains; + do { + domain = (domain + 1) % vm_ndomains; + } while (VM_DOMAIN_EMPTY(domain)); } KEG_UNLOCK(keg); } @@ -3678,6 +3699,11 @@ vm_offset_t addr; uma_slab_t slab; + if (domain != UMA_ANYDOMAIN) { + /* avoid allocs targeting empty domains */ + if (VM_DOMAIN_EMPTY(domain)) + domain = UMA_ANYDOMAIN; + } slab = zone_alloc_item(slabzone, NULL, domain, wait); if (slab == NULL) return (NULL); Index: sys/vm/vm_kern.c =================================================================== --- sys/vm/vm_kern.c +++ sys/vm/vm_kern.c @@ -502,6 +502,8 @@ */ if (vm_ndomains > 1) { domain = (addr >> KVA_QUANTUM_SHIFT) % vm_ndomains; + while (VM_DOMAIN_EMPTY(domain)) + domain++; next = roundup2(addr + 1, KVA_QUANTUM); if (next > end || next < start) next = end; Index: sys/vm/vm_pageout.c =================================================================== --- sys/vm/vm_pageout.c +++ sys/vm/vm_pageout.c @@ -2082,6 +2082,13 @@ if (error != 0) panic("starting laundry for domain 0, error %d", error); for (i = 1; i < vm_ndomains; i++) { + if (VM_DOMAIN_EMPTY(i)) { + if (bootverbose) + printf("domain %d empty; skipping pageout\n", + i); + continue; + } + error = kthread_add(vm_pageout_worker, (void *)(uintptr_t)i, curproc, NULL, 0, 0, "dom%d", i); if (error != 0) { Index: sys/vm/vm_pagequeue.h =================================================================== --- sys/vm/vm_pagequeue.h +++ sys/vm/vm_pagequeue.h @@ -151,7 +151,8 @@ extern struct vm_domain vm_dom[MAXMEMDOM]; -#define VM_DOMAIN(n) (&vm_dom[(n)]) +#define VM_DOMAIN(n) (&vm_dom[(n)]) +#define VM_DOMAIN_EMPTY(n) (vm_dom[(n)].vmd_page_count == 0) #define vm_pagequeue_assert_locked(pq) mtx_assert(&(pq)->pq_mutex, MA_OWNED) #define vm_pagequeue_lock(pq) mtx_lock(&(pq)->pq_mutex) Index: sys/x86/acpica/srat.c =================================================================== --- sys/x86/acpica/srat.c +++ sys/x86/acpica/srat.c @@ -311,8 +311,20 @@ } for (i = 0; i <= max_apic_id; i++) if (cpus[i].enabled && !cpus[i].has_memory) { - printf("SRAT: No memory found for CPU %d\n", i); - return (ENXIO); + found = 0; + for (j = 0; j < num_mem && !found; j++) { + if (mem_info[j].domain == cpus[i].domain) + found = 1; + } + if (!found) { + if (bootverbose) + printf("SRAT: mem dom %d is empty\n", + cpus[i].domain); + mem_info[num_mem].start = 0; + mem_info[num_mem].end = 0; + mem_info[num_mem].domain = cpus[i].domain; + num_mem++; + } } return (0); }