Index: head/sys/kern/subr_vmem.c =================================================================== --- head/sys/kern/subr_vmem.c +++ head/sys/kern/subr_vmem.c @@ -70,6 +70,7 @@ #include #include #include +#include #include #define VMEM_OPTORDER 5 @@ -186,6 +187,7 @@ static struct mtx_padalign __exclusive_cache_line vmem_list_lock; static LIST_HEAD(, vmem) vmem_list = LIST_HEAD_INITIALIZER(vmem_list); +static uma_zone_t vmem_zone; /* ---- misc */ #define VMEM_CONDVAR_INIT(vm, wchan) cv_init(&vm->vm_cv, wchan) @@ -255,11 +257,11 @@ VMEM_ASSERT_LOCKED(vm); /* - * Only allow the kernel arena to dip into reserve tags. It is the - * vmem where new tags come from. + * Only allow the kernel arena and arenas derived from kernel arena to + * dip into reserve tags. They are where new tags come from. */ flags &= BT_FLAGS; - if (vm != kernel_arena) + if (vm != kernel_arena && vm->vm_arg != kernel_arena) flags &= ~M_USE_RESERVE; /* @@ -615,23 +617,25 @@ vmem_bt_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *pflag, int wait) { vmem_addr_t addr; + int domain; *pflag = UMA_SLAB_KERNEL; + domain = 0; /* XXX Temporary. */ /* * Single thread boundary tag allocation so that the address space * and memory are added in one atomic operation. */ mtx_lock(&vmem_bt_lock); - if (vmem_xalloc(kernel_arena, bytes, 0, 0, 0, VMEM_ADDR_MIN, - VMEM_ADDR_MAX, M_NOWAIT | M_NOVM | M_USE_RESERVE | M_BESTFIT, - &addr) == 0) { - if (kmem_back(kernel_object, addr, bytes, + if (vmem_xalloc(vm_dom[domain].vmd_kernel_arena, bytes, 0, 0, 0, + VMEM_ADDR_MIN, VMEM_ADDR_MAX, + M_NOWAIT | M_NOVM | M_USE_RESERVE | M_BESTFIT, &addr) == 0) { + if (kmem_back_domain(domain, kernel_object, addr, bytes, M_NOWAIT | M_USE_RESERVE) == 0) { mtx_unlock(&vmem_bt_lock); return ((void *)addr); } - vmem_xfree(kernel_arena, addr, bytes); + vmem_xfree(vm_dom[domain].vmd_kernel_arena, addr, bytes); mtx_unlock(&vmem_bt_lock); /* * Out of memory, not address space. This may not even be @@ -657,9 +661,12 @@ { mtx_init(&vmem_list_lock, "vmem list lock", NULL, MTX_DEF); + vmem_zone = uma_zcreate("vmem", + sizeof(struct vmem), NULL, NULL, NULL, NULL, + UMA_ALIGN_PTR, UMA_ZONE_VM); vmem_bt_zone = uma_zcreate("vmem btag", sizeof(struct vmem_btag), NULL, NULL, NULL, NULL, - UMA_ALIGN_PTR, UMA_ZONE_VM); + UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE); #ifndef UMA_MD_SMALL_ALLOC mtx_init(&vmem_bt_lock, "btag lock", NULL, MTX_DEF); uma_prealloc(vmem_bt_zone, BT_MAXALLOC); @@ -826,7 +833,7 @@ VMEM_CONDVAR_DESTROY(vm); VMEM_LOCK_DESTROY(vm); - free(vm, M_VMEM); + uma_zfree(vmem_zone, vm); } static int @@ -1058,7 +1065,7 @@ vmem_t *vm; - vm = malloc(sizeof(*vm), M_VMEM, flags & (M_WAITOK|M_NOWAIT)); + vm = uma_zalloc(vmem_zone, flags & (M_WAITOK|M_NOWAIT)); if (vm == NULL) return (NULL); if (vmem_init(vm, name, base, size, quantum, qcache_max, Index: head/sys/vm/vm_extern.h =================================================================== --- head/sys/vm/vm_extern.h +++ head/sys/vm/vm_extern.h @@ -56,14 +56,21 @@ /* These operate on virtual addresses backed by memory. */ vm_offset_t kmem_alloc_attr(struct vmem *, vm_size_t size, int flags, vm_paddr_t low, vm_paddr_t high, vm_memattr_t memattr); +vm_offset_t kmem_alloc_attr_domain(int domain, vm_size_t size, int flags, + vm_paddr_t low, vm_paddr_t high, vm_memattr_t memattr); vm_offset_t kmem_alloc_contig(struct vmem *, vm_size_t size, int flags, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary, vm_memattr_t memattr); +vm_offset_t kmem_alloc_contig_domain(int domain, vm_size_t size, int flags, + vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary, + vm_memattr_t memattr); vm_offset_t kmem_malloc(struct vmem *, vm_size_t size, int flags); +vm_offset_t kmem_malloc_domain(int domain, vm_size_t size, int flags); void kmem_free(struct vmem *, vm_offset_t, vm_size_t); /* This provides memory for previously allocated address space. */ int kmem_back(vm_object_t, vm_offset_t, vm_size_t, int); +int kmem_back_domain(int, vm_object_t, vm_offset_t, vm_size_t, int); void kmem_unback(vm_object_t, vm_offset_t, vm_size_t); /* Bootstrapping. */ Index: head/sys/vm/vm_init.c =================================================================== --- head/sys/vm/vm_init.c +++ head/sys/vm/vm_init.c @@ -81,16 +81,25 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include #include + +#if VM_NRESERVLEVEL > 0 +#define KVA_QUANTUM (1 << (VM_LEVEL_0_ORDER + PAGE_SHIFT)) +#else + /* On non-superpage architectures want large import sizes. */ +#define KVA_QUANTUM (PAGE_SIZE * 1024) +#endif long physmem; /* @@ -107,7 +116,10 @@ { vm_offset_t addr; int result; - + + KASSERT((size % KVA_QUANTUM) == 0, + ("kva_import: Size %jd is not a multiple of %d", + (intmax_t)size, (int)KVA_QUANTUM)); addr = vm_map_min(kernel_map); result = vm_map_find(kernel_map, NULL, 0, &addr, size, 0, VMFS_SUPER_SPACE, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT); @@ -130,6 +142,7 @@ vm_mem_init(dummy) void *dummy; { + int domain; /* * Initializes resident memory structures. From here on, all physical @@ -150,13 +163,15 @@ * Initialize the kernel_arena. This can grow on demand. */ vmem_init(kernel_arena, "kernel arena", 0, 0, PAGE_SIZE, 0, 0); - vmem_set_import(kernel_arena, kva_import, NULL, NULL, -#if VM_NRESERVLEVEL > 0 - 1 << (VM_LEVEL_0_ORDER + PAGE_SHIFT)); -#else - /* On non-superpage architectures want large import sizes. */ - PAGE_SIZE * 1024); -#endif + vmem_set_import(kernel_arena, kva_import, NULL, NULL, KVA_QUANTUM); + + for (domain = 0; domain < vm_ndomains; domain++) { + vm_dom[domain].vmd_kernel_arena = vmem_create( + "kernel arena domain", 0, 0, PAGE_SIZE, 0, M_WAITOK); + vmem_set_import(vm_dom[domain].vmd_kernel_arena, + (vmem_import_t *)vmem_alloc, NULL, kernel_arena, + KVA_QUANTUM); + } kmem_init_zero_region(); pmap_init(); Index: head/sys/vm/vm_kern.c =================================================================== --- head/sys/vm/vm_kern.c +++ head/sys/vm/vm_kern.c @@ -67,9 +67,12 @@ #include __FBSDID("$FreeBSD$"); +#include "opt_vm.h" + #include #include #include /* for ticks and hz */ +#include #include #include #include @@ -77,15 +80,18 @@ #include #include #include +#include #include #include +#include #include #include #include #include #include #include +#include #include #include #include @@ -161,17 +167,17 @@ * given flags, then the pages are zeroed before they are mapped. */ vm_offset_t -kmem_alloc_attr(vmem_t *vmem, vm_size_t size, int flags, vm_paddr_t low, +kmem_alloc_attr_domain(int domain, vm_size_t size, int flags, vm_paddr_t low, vm_paddr_t high, vm_memattr_t memattr) { + vmem_t *vmem; vm_object_t object = kernel_object; vm_offset_t addr, i, offset; vm_page_t m; int pflags, tries; - KASSERT(vmem == kernel_arena, - ("kmem_alloc_attr: Only kernel_arena is supported.")); size = round_page(size); + vmem = vm_dom[domain].vmd_kernel_arena; if (vmem_alloc(vmem, size, M_BESTFIT | flags, &addr)) return (0); offset = addr - VM_MIN_KERNEL_ADDRESS; @@ -182,13 +188,13 @@ for (i = 0; i < size; i += PAGE_SIZE) { tries = 0; retry: - m = vm_page_alloc_contig(object, atop(offset + i), - pflags, 1, low, high, PAGE_SIZE, 0, memattr); + m = vm_page_alloc_contig_domain(object, atop(offset + i), + domain, pflags, 1, low, high, PAGE_SIZE, 0, memattr); if (m == NULL) { VM_OBJECT_WUNLOCK(object); if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) { - if (!vm_page_reclaim_contig(pflags, 1, - low, high, PAGE_SIZE, 0) && + if (!vm_page_reclaim_contig_domain(domain, + pflags, 1, low, high, PAGE_SIZE, 0) && (flags & M_WAITOK) != 0) VM_WAIT; VM_OBJECT_WLOCK(object); @@ -199,6 +205,9 @@ vmem_free(vmem, addr, size); return (0); } + KASSERT(vm_phys_domidx(m) == domain, + ("kmem_alloc_attr_domain: Domain mismatch %d != %d", + vm_phys_domidx(m), domain)); if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0) pmap_zero_page(m); m->valid = VM_PAGE_BITS_ALL; @@ -209,6 +218,28 @@ return (addr); } +vm_offset_t +kmem_alloc_attr(vmem_t *vmem, vm_size_t size, int flags, vm_paddr_t low, + vm_paddr_t high, vm_memattr_t memattr) +{ + struct vm_domainset_iter di; + vm_offset_t addr; + int domain; + + KASSERT(vmem == kernel_arena, + ("kmem_alloc_attr: Only kernel_arena is supported.")); + + vm_domainset_iter_malloc_init(&di, kernel_object, &domain, &flags); + do { + addr = kmem_alloc_attr_domain(domain, size, flags, low, high, + memattr); + if (addr != 0) + break; + } while (vm_domainset_iter_malloc(&di, &domain, &flags) == 0); + + return (addr); +} + /* * Allocates a region from the kernel address map and physically * contiguous pages within the specified address range to the kernel @@ -218,19 +249,19 @@ * mapped. */ vm_offset_t -kmem_alloc_contig(struct vmem *vmem, vm_size_t size, int flags, vm_paddr_t low, +kmem_alloc_contig_domain(int domain, vm_size_t size, int flags, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary, vm_memattr_t memattr) { + vmem_t *vmem; vm_object_t object = kernel_object; vm_offset_t addr, offset, tmp; vm_page_t end_m, m; u_long npages; int pflags, tries; - KASSERT(vmem == kernel_arena, - ("kmem_alloc_contig: Only kernel_arena is supported.")); size = round_page(size); + vmem = vm_dom[domain].vmd_kernel_arena; if (vmem_alloc(vmem, size, flags | M_BESTFIT, &addr)) return (0); offset = addr - VM_MIN_KERNEL_ADDRESS; @@ -241,13 +272,14 @@ VM_OBJECT_WLOCK(object); tries = 0; retry: - m = vm_page_alloc_contig(object, atop(offset), pflags, + m = vm_page_alloc_contig_domain(object, atop(offset), domain, pflags, npages, low, high, alignment, boundary, memattr); if (m == NULL) { VM_OBJECT_WUNLOCK(object); if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) { - if (!vm_page_reclaim_contig(pflags, npages, low, high, - alignment, boundary) && (flags & M_WAITOK) != 0) + if (!vm_page_reclaim_contig_domain(domain, pflags, + npages, low, high, alignment, boundary) && + (flags & M_WAITOK) != 0) VM_WAIT; VM_OBJECT_WLOCK(object); tries++; @@ -256,6 +288,9 @@ vmem_free(vmem, addr, size); return (0); } + KASSERT(vm_phys_domidx(m) == domain, + ("kmem_alloc_contig_domain: Domain mismatch %d != %d", + vm_phys_domidx(m), domain)); end_m = m + npages; tmp = addr; for (; m < end_m; m++) { @@ -270,6 +305,29 @@ return (addr); } +vm_offset_t +kmem_alloc_contig(struct vmem *vmem, vm_size_t size, int flags, vm_paddr_t low, + vm_paddr_t high, u_long alignment, vm_paddr_t boundary, + vm_memattr_t memattr) +{ + struct vm_domainset_iter di; + vm_offset_t addr; + int domain; + + KASSERT(vmem == kernel_arena, + ("kmem_alloc_contig: Only kernel_arena is supported.")); + + vm_domainset_iter_malloc_init(&di, kernel_object, &domain, &flags); + do { + addr = kmem_alloc_contig_domain(domain, size, flags, low, high, + alignment, boundary, memattr); + if (addr != 0) + break; + } while (vm_domainset_iter_malloc(&di, &domain, &flags) == 0); + + return (addr); +} + /* * kmem_suballoc: * @@ -313,18 +371,18 @@ * Allocate wired-down pages in the kernel's address space. */ vm_offset_t -kmem_malloc(struct vmem *vmem, vm_size_t size, int flags) +kmem_malloc_domain(int domain, vm_size_t size, int flags) { + vmem_t *vmem; vm_offset_t addr; int rv; - KASSERT(vmem == kernel_arena, - ("kmem_malloc: Only kernel_arena is supported.")); + vmem = vm_dom[domain].vmd_kernel_arena; size = round_page(size); if (vmem_alloc(vmem, size, flags | M_BESTFIT, &addr)) return (0); - rv = kmem_back(kernel_object, addr, size, flags); + rv = kmem_back_domain(domain, kernel_object, addr, size, flags); if (rv != KERN_SUCCESS) { vmem_free(vmem, addr, size); return (0); @@ -332,20 +390,41 @@ return (addr); } +vm_offset_t +kmem_malloc(struct vmem *vmem, vm_size_t size, int flags) +{ + struct vm_domainset_iter di; + vm_offset_t addr; + int domain; + + KASSERT(vmem == kernel_arena, + ("kmem_malloc: Only kernel_arena is supported.")); + + vm_domainset_iter_malloc_init(&di, kernel_object, &domain, &flags); + do { + addr = kmem_malloc_domain(domain, size, flags); + if (addr != 0) + break; + } while (vm_domainset_iter_malloc(&di, &domain, &flags) == 0); + + return (addr); +} + /* * kmem_back: * * Allocate physical pages for the specified virtual address range. */ int -kmem_back(vm_object_t object, vm_offset_t addr, vm_size_t size, int flags) +kmem_back_domain(int domain, vm_object_t object, vm_offset_t addr, + vm_size_t size, int flags) { vm_offset_t offset, i; vm_page_t m, mpred; int pflags; KASSERT(object == kernel_object, - ("kmem_back: only supports kernel object.")); + ("kmem_back_domain: only supports kernel object.")); offset = addr - VM_MIN_KERNEL_ADDRESS; pflags = malloc2vm_flags(flags) | VM_ALLOC_NOBUSY | VM_ALLOC_WIRED; @@ -358,8 +437,8 @@ retry: mpred = vm_radix_lookup_le(&object->rtree, atop(offset + i)); for (; i < size; i += PAGE_SIZE, mpred = m) { - m = vm_page_alloc_after(object, atop(offset + i), pflags, - mpred); + m = vm_page_alloc_domain_after(object, atop(offset + i), + domain, pflags, mpred); /* * Ran out of space, free everything up and return. Don't need @@ -373,6 +452,9 @@ kmem_unback(object, addr, i); return (KERN_NO_SPACE); } + KASSERT(vm_phys_domidx(m) == domain, + ("kmem_back_domain: Domain mismatch %d != %d", + vm_phys_domidx(m), domain)); if (flags & M_ZERO && (m->flags & PG_ZERO) == 0) pmap_zero_page(m); KASSERT((m->oflags & VPO_UNMANAGED) != 0, @@ -386,6 +468,26 @@ return (KERN_SUCCESS); } +int +kmem_back(vm_object_t object, vm_offset_t addr, vm_size_t size, int flags) +{ + struct vm_domainset_iter di; + int domain; + int ret; + + KASSERT(object == kernel_object, + ("kmem_back: only supports kernel object.")); + + vm_domainset_iter_malloc_init(&di, kernel_object, &domain, &flags); + do { + ret = kmem_back_domain(domain, object, addr, size, flags); + if (ret == KERN_SUCCESS) + break; + } while (vm_domainset_iter_malloc(&di, &domain, &flags) == 0); + + return (ret); +} + /* * kmem_unback: * @@ -395,28 +497,41 @@ * A physical page must exist within the specified object at each index * that is being unmapped. */ -void -kmem_unback(vm_object_t object, vm_offset_t addr, vm_size_t size) +static int +_kmem_unback(vm_object_t object, vm_offset_t addr, vm_size_t size) { vm_page_t m, next; vm_offset_t end, offset; + int domain; KASSERT(object == kernel_object, ("kmem_unback: only supports kernel object.")); + if (size == 0) + return (0); pmap_remove(kernel_pmap, addr, addr + size); offset = addr - VM_MIN_KERNEL_ADDRESS; end = offset + size; VM_OBJECT_WLOCK(object); - for (m = vm_page_lookup(object, atop(offset)); offset < end; - offset += PAGE_SIZE, m = next) { + m = vm_page_lookup(object, atop(offset)); + domain = vm_phys_domidx(m); + for (; offset < end; offset += PAGE_SIZE, m = next) { next = vm_page_next(m); vm_page_unwire(m, PQ_NONE); vm_page_free(m); } VM_OBJECT_WUNLOCK(object); + + return (domain); } +void +kmem_unback(vm_object_t object, vm_offset_t addr, vm_size_t size) +{ + + _kmem_unback(object, addr, size); +} + /* * kmem_free: * @@ -426,12 +541,13 @@ void kmem_free(struct vmem *vmem, vm_offset_t addr, vm_size_t size) { + int domain; KASSERT(vmem == kernel_arena, ("kmem_free: Only kernel_arena is supported.")); size = round_page(size); - kmem_unback(kernel_object, addr, size); - vmem_free(vmem, addr, size); + domain = _kmem_unback(kernel_object, addr, size); + vmem_free(vm_dom[domain].vmd_kernel_arena, addr, size); } /* Index: head/sys/vm/vm_phys.h =================================================================== --- head/sys/vm/vm_phys.h +++ head/sys/vm/vm_phys.h @@ -101,7 +101,6 @@ static inline int vm_phys_domidx(vm_page_t m) { -#ifdef VM_NUMA_ALLOC int domn, segind; /* XXXKIB try to assert that the page is managed */ @@ -110,9 +109,6 @@ domn = vm_phys_segs[segind].domain; KASSERT(domn < vm_ndomains, ("domain %d m %p", domn, m)); return (domn); -#else - return (0); -#endif } /* Index: head/sys/vm/vm_reserv.c =================================================================== --- head/sys/vm/vm_reserv.c +++ head/sys/vm/vm_reserv.c @@ -549,6 +549,8 @@ VM_LEVEL_0_SIZE), boundary > VM_LEVEL_0_SIZE ? boundary : 0); if (m == NULL) return (NULL); + KASSERT(vm_phys_domidx(m) == domain, + ("vm_reserv_alloc_contig: Page domain does not match requested.")); /* * The allocated physical pages always begin at a reservation @@ -568,7 +570,7 @@ LIST_INSERT_HEAD(&object->rvq, rv, objq); rv->object = object; rv->pindex = first; - rv->domain = vm_phys_domidx(m); + rv->domain = domain; KASSERT(rv->popcnt == 0, ("vm_reserv_alloc_contig: reserv %p's popcnt is corrupted", rv)); @@ -715,7 +717,7 @@ LIST_INSERT_HEAD(&object->rvq, rv, objq); rv->object = object; rv->pindex = first; - rv->domain = vm_phys_domidx(m); + rv->domain = domain; KASSERT(rv->popcnt == 0, ("vm_reserv_alloc_page: reserv %p's popcnt is corrupted", rv)); KASSERT(!rv->inpartpopq, @@ -734,6 +736,8 @@ found: index = VM_RESERV_INDEX(object, pindex); m = &rv->pages[index]; + KASSERT(object != kernel_object || vm_phys_domidx(m) == domain, + ("vm_reserv_alloc_page: Domain mismatch from reservation.")); /* Handle vm_page_rename(m, new_object, ...). */ if (popmap_is_set(rv->popmap, index)) return (NULL);