Index: vm/vm_extern.h =================================================================== --- vm/vm_extern.h +++ vm/vm_extern.h @@ -56,14 +56,21 @@ /* These operate on virtual addresses backed by memory. */ vm_offset_t kmem_alloc_attr(struct vmem *, vm_size_t size, int flags, vm_paddr_t low, vm_paddr_t high, vm_memattr_t memattr); +vm_offset_t kmem_alloc_attr_domain(int domain, vm_size_t size, int flags, + vm_paddr_t low, vm_paddr_t high, vm_memattr_t memattr); vm_offset_t kmem_alloc_contig(struct vmem *, vm_size_t size, int flags, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary, vm_memattr_t memattr); +vm_offset_t kmem_alloc_contig_domain(int domain, vm_size_t size, int flags, + vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary, + vm_memattr_t memattr); vm_offset_t kmem_malloc(struct vmem *, vm_size_t size, int flags); +vm_offset_t kmem_malloc_domain(int domain, vm_size_t size, int flags); void kmem_free(struct vmem *, vm_offset_t, vm_size_t); /* This provides memory for previously allocated address space. */ int kmem_back(vm_object_t, vm_offset_t, vm_size_t, int); +int kmem_back_domain(int, vm_object_t, vm_offset_t, vm_size_t, int); void kmem_unback(vm_object_t, vm_offset_t, vm_size_t); /* Bootstrapping. */ Index: vm/vm_init.c =================================================================== --- vm/vm_init.c +++ vm/vm_init.c @@ -81,16 +81,25 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include #include + +#if VM_NRESERVLEVEL > 0 +#define KVA_QUANTUM (1 << (VM_LEVEL_0_ORDER + PAGE_SHIFT)) +#else + /* On non-superpage architectures want large import sizes. */ +#define KVA_QUANTUM (PAGE_SIZE * 1024) +#endif long physmem; /* @@ -107,7 +116,10 @@ { vm_offset_t addr; int result; - + + KASSERT((size % KVA_QUANTUM) == 0, + ("kva_import: Size %jd is not a multiple of %u", + (intmax_t)size, KVA_QUANTUM)); addr = vm_map_min(kernel_map); result = vm_map_find(kernel_map, NULL, 0, &addr, size, 0, VMFS_SUPER_SPACE, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT); @@ -130,6 +142,7 @@ vm_mem_init(dummy) void *dummy; { + int domain; /* * Initializes resident memory structures. From here on, all physical @@ -150,13 +163,15 @@ * Initialize the kernel_arena. This can grow on demand. */ vmem_init(kernel_arena, "kernel arena", 0, 0, PAGE_SIZE, 0, 0); - vmem_set_import(kernel_arena, kva_import, NULL, NULL, -#if VM_NRESERVLEVEL > 0 - 1 << (VM_LEVEL_0_ORDER + PAGE_SHIFT)); -#else - /* On non-superpage architectures want large import sizes. */ - PAGE_SIZE * 1024); -#endif + vmem_set_import(kernel_arena, kva_import, NULL, NULL, KVA_QUANTUM); + + for (domain = 0; domain < vm_ndomains; domain++) { + vm_dom[domain].vmd_kernel_arena = vmem_create( + "kernel arena domain", 0, 0, PAGE_SIZE, 0, M_WAITOK); + vmem_set_import(vm_dom[domain].vmd_kernel_arena, + (vmem_import_t *)vmem_alloc, NULL, kernel_arena, + KVA_QUANTUM); + } kmem_init_zero_region(); pmap_init(); Index: vm/vm_kern.c =================================================================== --- vm/vm_kern.c +++ vm/vm_kern.c @@ -67,6 +67,8 @@ #include __FBSDID("$FreeBSD$"); +#include "opt_vm.h" + #include #include #include /* for ticks and hz */ @@ -77,8 +79,10 @@ #include #include #include +#include #include +#include #include #include #include @@ -86,6 +90,7 @@ #include #include #include +#include #include #include #include @@ -161,17 +166,17 @@ * given flags, then the pages are zeroed before they are mapped. */ vm_offset_t -kmem_alloc_attr(vmem_t *vmem, vm_size_t size, int flags, vm_paddr_t low, +kmem_alloc_attr_domain(int domain, vm_size_t size, int flags, vm_paddr_t low, vm_paddr_t high, vm_memattr_t memattr) { + vmem_t *vmem; vm_object_t object = kernel_object; vm_offset_t addr, i, offset; vm_page_t m; int pflags, tries; - KASSERT(vmem == kernel_arena, - ("kmem_alloc_attr: Only kernel_arena is supported.")); size = round_page(size); + vmem = vm_dom[domain].vmd_kernel_arena; if (vmem_alloc(vmem, size, M_BESTFIT | flags, &addr)) return (0); offset = addr - VM_MIN_KERNEL_ADDRESS; @@ -182,13 +187,13 @@ for (i = 0; i < size; i += PAGE_SIZE) { tries = 0; retry: - m = vm_page_alloc_contig(object, atop(offset + i), - pflags, 1, low, high, PAGE_SIZE, 0, memattr); + m = vm_page_alloc_contig_domain(object, atop(offset + i), + domain, pflags, 1, low, high, PAGE_SIZE, 0, memattr); if (m == NULL) { VM_OBJECT_WUNLOCK(object); if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) { - if (!vm_page_reclaim_contig(pflags, 1, - low, high, PAGE_SIZE, 0) && + if (!vm_page_reclaim_contig_domain(pflags, 1, + domain, low, high, PAGE_SIZE, 0) && (flags & M_WAITOK) != 0) VM_WAIT; VM_OBJECT_WLOCK(object); @@ -199,6 +204,9 @@ vmem_free(vmem, addr, size); return (0); } + KASSERT(vm_phys_domidx(m) == domain, + ("kmem_alloc_attr_domain: Domain mismatch %d != %d", + vm_phys_domidx(m), domain)); if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0) pmap_zero_page(m); m->valid = VM_PAGE_BITS_ALL; @@ -209,6 +217,36 @@ return (addr); } +vm_offset_t +kmem_alloc_attr(vmem_t *vmem, vm_size_t size, int flags, vm_paddr_t low, + vm_paddr_t high, vm_memattr_t memattr) +{ + struct vm_domain_iterator vi; + vm_offset_t addr; + int domain, wait; + + KASSERT(vmem == kernel_arena, + ("kmem_alloc_attr: Only kernel_arena is supported.")); + addr = 0; + vm_policy_iterator_init(&vi); + wait = flags & M_WAITOK; + flags &= ~M_WAITOK; + flags |= M_NOWAIT; + while (vm_domain_iterator_run(&vi, &domain) == 0) { + if (vm_domain_iterator_isdone(&vi) && wait) { + flags |= wait; + flags &= ~M_NOWAIT; + } + addr = kmem_alloc_attr_domain(domain, size, flags, low, high, + memattr); + if (addr != 0) + break; + } + vm_policy_iterator_finish(&vi); + + return (addr); +} + /* * Allocates a region from the kernel address map and physically * contiguous pages within the specified address range to the kernel @@ -218,19 +256,19 @@ * mapped. */ vm_offset_t -kmem_alloc_contig(struct vmem *vmem, vm_size_t size, int flags, vm_paddr_t low, +kmem_alloc_contig_domain(int domain, vm_size_t size, int flags, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary, vm_memattr_t memattr) { + vmem_t *vmem; vm_object_t object = kernel_object; vm_offset_t addr, offset, tmp; vm_page_t end_m, m; u_long npages; int pflags, tries; - KASSERT(vmem == kernel_arena, - ("kmem_alloc_contig: Only kernel_arena is supported.")); size = round_page(size); + vmem = vm_dom[domain].vmd_kernel_arena; if (vmem_alloc(vmem, size, flags | M_BESTFIT, &addr)) return (0); offset = addr - VM_MIN_KERNEL_ADDRESS; @@ -241,13 +279,14 @@ VM_OBJECT_WLOCK(object); tries = 0; retry: - m = vm_page_alloc_contig(object, atop(offset), pflags, + m = vm_page_alloc_contig_domain(object, atop(offset), domain, pflags, npages, low, high, alignment, boundary, memattr); if (m == NULL) { VM_OBJECT_WUNLOCK(object); if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) { - if (!vm_page_reclaim_contig(pflags, npages, low, high, - alignment, boundary) && (flags & M_WAITOK) != 0) + if (!vm_page_reclaim_contig_domain(pflags, npages, + domain, low, high, alignment, boundary) && + (flags & M_WAITOK) != 0) VM_WAIT; VM_OBJECT_WLOCK(object); tries++; @@ -256,6 +295,9 @@ vmem_free(vmem, addr, size); return (0); } + KASSERT(vm_phys_domidx(m) == domain, + ("kmem_alloc_contig_domain: Domain mismatch %d != %d", + vm_phys_domidx(m), domain)); end_m = m + npages; tmp = addr; for (; m < end_m; m++) { @@ -270,6 +312,37 @@ return (addr); } +vm_offset_t +kmem_alloc_contig(struct vmem *vmem, vm_size_t size, int flags, vm_paddr_t low, + vm_paddr_t high, u_long alignment, vm_paddr_t boundary, + vm_memattr_t memattr) +{ + struct vm_domain_iterator vi; + vm_offset_t addr; + int domain, wait; + + KASSERT(vmem == kernel_arena, + ("kmem_alloc_contig: Only kernel_arena is supported.")); + addr = 0; + vm_policy_iterator_init(&vi); + wait = flags & M_WAITOK; + flags &= ~M_WAITOK; + flags |= M_NOWAIT; + while (vm_domain_iterator_run(&vi, &domain) == 0) { + if (vm_domain_iterator_isdone(&vi) && wait) { + flags |= wait; + flags &= ~M_NOWAIT; + } + addr = kmem_alloc_contig_domain(domain, size, flags, low, high, + alignment, boundary, memattr); + if (addr != 0) + break; + } + vm_policy_iterator_finish(&vi); + + return (addr); +} + /* * kmem_suballoc: * @@ -313,18 +386,18 @@ * Allocate wired-down pages in the kernel's address space. */ vm_offset_t -kmem_malloc(struct vmem *vmem, vm_size_t size, int flags) +kmem_malloc_domain(int domain, vm_size_t size, int flags) { + vmem_t *vmem; vm_offset_t addr; int rv; - KASSERT(vmem == kernel_arena, - ("kmem_malloc: Only kernel_arena is supported.")); + vmem = vm_dom[domain].vmd_kernel_arena; size = round_page(size); if (vmem_alloc(vmem, size, flags | M_BESTFIT, &addr)) return (0); - rv = kmem_back(kernel_object, addr, size, flags); + rv = kmem_back_domain(domain, kernel_object, addr, size, flags); if (rv != KERN_SUCCESS) { vmem_free(vmem, addr, size); return (0); @@ -332,20 +405,49 @@ return (addr); } +vm_offset_t +kmem_malloc(struct vmem *vmem, vm_size_t size, int flags) +{ + struct vm_domain_iterator vi; + vm_offset_t addr; + int domain, wait; + + KASSERT(vmem == kernel_arena, + ("kmem_malloc: Only kernel_arena is supported.")); + addr = 0; + vm_policy_iterator_init(&vi); + wait = flags & M_WAITOK; + flags &= ~M_WAITOK; + flags |= M_NOWAIT; + while (vm_domain_iterator_run(&vi, &domain) == 0) { + if (vm_domain_iterator_isdone(&vi) && wait) { + flags |= wait; + flags &= ~M_NOWAIT; + } + addr = kmem_malloc_domain(domain, size, flags); + if (addr != 0) + break; + } + vm_policy_iterator_finish(&vi); + + return (addr); +} + /* * kmem_back: * * Allocate physical pages for the specified virtual address range. */ int -kmem_back(vm_object_t object, vm_offset_t addr, vm_size_t size, int flags) +kmem_back_domain(int domain, vm_object_t object, vm_offset_t addr, + vm_size_t size, int flags) { vm_offset_t offset, i; vm_page_t m, mpred; int pflags; KASSERT(object == kernel_object, - ("kmem_back: only supports kernel object.")); + ("kmem_back_domain: only supports kernel object.")); offset = addr - VM_MIN_KERNEL_ADDRESS; pflags = malloc2vm_flags(flags) | VM_ALLOC_NOBUSY | VM_ALLOC_WIRED; @@ -358,8 +460,8 @@ retry: mpred = vm_radix_lookup_le(&object->rtree, atop(offset + i)); for (; i < size; i += PAGE_SIZE, mpred = m) { - m = vm_page_alloc_after(object, atop(offset + i), pflags, - mpred); + m = vm_page_alloc_domain_after(object, atop(offset + i), + domain, pflags, mpred); /* * Ran out of space, free everything up and return. Don't need @@ -373,6 +475,9 @@ kmem_unback(object, addr, i); return (KERN_NO_SPACE); } + KASSERT(vm_phys_domidx(m) == domain, + ("kmem_back_domain: Domain mismatch %d != %d", + vm_phys_domidx(m), domain)); if (flags & M_ZERO && (m->flags & PG_ZERO) == 0) pmap_zero_page(m); KASSERT((m->oflags & VPO_UNMANAGED) != 0, @@ -386,6 +491,33 @@ return (KERN_SUCCESS); } +int +kmem_back(vm_object_t object, vm_offset_t addr, vm_size_t size, int flags) +{ + struct vm_domain_iterator vi; + int domain, wait, ret; + + KASSERT(object == kernel_object, + ("kmem_back: only supports kernel object.")); + ret = 0; + vm_policy_iterator_init(&vi); + wait = flags & M_WAITOK; + flags &= ~M_WAITOK; + flags |= M_NOWAIT; + while (vm_domain_iterator_run(&vi, &domain) == 0) { + if (vm_domain_iterator_isdone(&vi) && wait) { + flags |= wait; + flags &= ~M_NOWAIT; + } + ret = kmem_back_domain(domain, object, addr, size, flags); + if (ret == KERN_SUCCESS) + break; + } + vm_policy_iterator_finish(&vi); + + return (addr); +} + /* * kmem_unback: * @@ -395,28 +527,41 @@ * A physical page must exist within the specified object at each index * that is being unmapped. */ -void -kmem_unback(vm_object_t object, vm_offset_t addr, vm_size_t size) +static int +_kmem_unback(vm_object_t object, vm_offset_t addr, vm_size_t size) { vm_page_t m, next; vm_offset_t end, offset; + int domain; KASSERT(object == kernel_object, ("kmem_unback: only supports kernel object.")); + if (size == 0) + return 0; pmap_remove(kernel_pmap, addr, addr + size); offset = addr - VM_MIN_KERNEL_ADDRESS; end = offset + size; VM_OBJECT_WLOCK(object); - for (m = vm_page_lookup(object, atop(offset)); offset < end; - offset += PAGE_SIZE, m = next) { + m = vm_page_lookup(object, atop(offset)); + domain = vm_phys_domidx(m); + for (; offset < end; offset += PAGE_SIZE, m = next) { next = vm_page_next(m); vm_page_unwire(m, PQ_NONE); vm_page_free(m); } VM_OBJECT_WUNLOCK(object); + + return domain; } +void +kmem_unback(vm_object_t object, vm_offset_t addr, vm_size_t size) +{ + + _kmem_unback(object, addr, size); +} + /* * kmem_free: * @@ -426,12 +571,13 @@ void kmem_free(struct vmem *vmem, vm_offset_t addr, vm_size_t size) { + int domain; KASSERT(vmem == kernel_arena, ("kmem_free: Only kernel_arena is supported.")); size = round_page(size); - kmem_unback(kernel_object, addr, size); - vmem_free(vmem, addr, size); + domain = _kmem_unback(kernel_object, addr, size); + vmem_free(vm_dom[domain].vmd_kernel_arena, addr, size); } /* Index: vm/vm_page.h =================================================================== --- vm/vm_page.h +++ vm/vm_page.h @@ -229,6 +229,7 @@ struct vm_domain { struct vm_pagequeue vmd_pagequeues[PQ_COUNT]; + struct vmem *vmd_kernel_arena; u_int vmd_page_count; u_int vmd_free_count; long vmd_segs; /* bitmask of the segments */ Index: vm/vm_page.c =================================================================== --- vm/vm_page.c +++ vm/vm_page.c @@ -2568,8 +2568,8 @@ * must be a power of two. */ bool -vm_page_reclaim_contig(int req, u_long npages, vm_paddr_t low, vm_paddr_t high, - u_long alignment, vm_paddr_t boundary) +vm_page_reclaim_contig_domain(int req, u_long npages, int domain, + vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary) { vm_paddr_t curr_low; vm_page_t m_run, m_runs[NRUNS]; @@ -2609,8 +2609,8 @@ curr_low = low; count = 0; for (;;) { - m_run = vm_phys_scan_contig(npages, curr_low, high, - alignment, boundary, options); + m_run = vm_phys_scan_contig(domain, npages, curr_low, + high, alignment, boundary, options); if (m_run == NULL) break; curr_low = VM_PAGE_TO_PHYS(m_run) + ptoa(npages); @@ -2650,6 +2650,28 @@ return (reclaimed != 0); } } + +bool +vm_page_reclaim_contig(int req, u_long npages, vm_paddr_t low, vm_paddr_t high, + u_long alignment, vm_paddr_t boundary) +{ + struct vm_domain_iterator vi; + int domain; + bool ret; + + ret = false; + vm_policy_iterator_init(&vi); + while (vm_domain_iterator_run(&vi, &domain) == 0) { + ret = vm_page_reclaim_contig_domain(req, npages, domain, low, + high, alignment, boundary); + if (ret) + break; + } + vm_policy_iterator_finish(&vi); + + return (ret); +} + /* * vm_wait: (also see VM_WAIT macro) Index: vm/vm_phys.h =================================================================== --- vm/vm_phys.h +++ vm/vm_phys.h @@ -86,8 +86,8 @@ void vm_phys_free_pages(vm_page_t m, int order); void vm_phys_init(void); vm_page_t vm_phys_paddr_to_vm_page(vm_paddr_t pa); -vm_page_t vm_phys_scan_contig(u_long npages, vm_paddr_t low, vm_paddr_t high, - u_long alignment, vm_paddr_t boundary, int options); +vm_page_t vm_phys_scan_contig(int domain, u_long npages, vm_paddr_t low, + vm_paddr_t high, u_long alignment, vm_paddr_t boundary, int options); void vm_phys_set_pool(int pool, vm_page_t m, int order); boolean_t vm_phys_unfree_page(vm_page_t m); int vm_phys_mem_affinity(int f, int t); @@ -101,7 +101,6 @@ static inline int vm_phys_domidx(vm_page_t m) { -#ifdef VM_NUMA_ALLOC int domn, segind; /* XXXKIB try to assert that the page is managed */ @@ -110,9 +109,6 @@ domn = vm_phys_segs[segind].domain; KASSERT(domn < vm_ndomains, ("domain %d m %p", domn, m)); return (domn); -#else - return (0); -#endif } /* Index: vm/vm_phys.c =================================================================== --- vm/vm_phys.c +++ vm/vm_phys.c @@ -968,7 +968,7 @@ * be a power of two. */ vm_page_t -vm_phys_scan_contig(u_long npages, vm_paddr_t low, vm_paddr_t high, +vm_phys_scan_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary, int options) { vm_paddr_t pa_end; @@ -983,6 +983,8 @@ return (NULL); for (segind = 0; segind < vm_phys_nsegs; segind++) { seg = &vm_phys_segs[segind]; + if (seg->domain != domain) + continue; if (seg->start >= high) break; if (low >= seg->end) Index: vm/vm_reserv.c =================================================================== --- vm/vm_reserv.c +++ vm/vm_reserv.c @@ -715,7 +715,7 @@ LIST_INSERT_HEAD(&object->rvq, rv, objq); rv->object = object; rv->pindex = first; - rv->domain = vm_phys_domidx(m); + rv->domain = domain; KASSERT(rv->popcnt == 0, ("vm_reserv_alloc_page: reserv %p's popcnt is corrupted", rv)); KASSERT(!rv->inpartpopq, @@ -734,6 +734,8 @@ found: index = VM_RESERV_INDEX(object, pindex); m = &rv->pages[index]; + KASSERT(object != kernel_object || vm_phys_domidx(m) == domain, + ("vm_reserv_alloc_page: Domain mismatch from reservation.")); /* Handle vm_page_rename(m, new_object, ...). */ if (popmap_is_set(rv->popmap, index)) return (NULL);