Index: head/sys/vm/vm_domain.h =================================================================== --- head/sys/vm/vm_domain.h +++ head/sys/vm/vm_domain.h @@ -63,4 +63,7 @@ extern int vm_domain_iterator_isdone(struct vm_domain_iterator *vi); extern int vm_domain_iterator_cleanup(struct vm_domain_iterator *vi); +extern void vm_policy_iterator_init(struct vm_domain_iterator *vi); +extern void vm_policy_iterator_finish(struct vm_domain_iterator *vi); + #endif /* __VM_DOMAIN_H__ */ Index: head/sys/vm/vm_domain.c =================================================================== --- head/sys/vm/vm_domain.c +++ head/sys/vm/vm_domain.c @@ -61,6 +61,118 @@ #include +/* + * Default to first-touch + round-robin. + */ +static struct mtx vm_default_policy_mtx; +MTX_SYSINIT(vm_default_policy, &vm_default_policy_mtx, "default policy mutex", + MTX_DEF); +#ifdef VM_NUMA_ALLOC +static struct vm_domain_policy vm_default_policy = + VM_DOMAIN_POLICY_STATIC_INITIALISER(VM_POLICY_FIRST_TOUCH_ROUND_ROBIN, 0); +#else +/* Use round-robin so the domain policy code will only try once per allocation */ +static struct vm_domain_policy vm_default_policy = + VM_DOMAIN_POLICY_STATIC_INITIALISER(VM_POLICY_ROUND_ROBIN, 0); +#endif + +static int +sysctl_vm_default_policy(SYSCTL_HANDLER_ARGS) +{ + char policy_name[32]; + int error; + + mtx_lock(&vm_default_policy_mtx); + + /* Map policy to output string */ + switch (vm_default_policy.p.policy) { + case VM_POLICY_FIRST_TOUCH: + strcpy(policy_name, "first-touch"); + break; + case VM_POLICY_FIRST_TOUCH_ROUND_ROBIN: + strcpy(policy_name, "first-touch-rr"); + break; + case VM_POLICY_ROUND_ROBIN: + default: + strcpy(policy_name, "rr"); + break; + } + mtx_unlock(&vm_default_policy_mtx); + + error = sysctl_handle_string(oidp, &policy_name[0], + sizeof(policy_name), req); + if (error != 0 || req->newptr == NULL) + return (error); + + mtx_lock(&vm_default_policy_mtx); + /* Set: match on the subset of policies that make sense as a default */ + if (strcmp("first-touch-rr", policy_name) == 0) { + vm_domain_policy_set(&vm_default_policy, + VM_POLICY_FIRST_TOUCH_ROUND_ROBIN, 0); + } else if (strcmp("first-touch", policy_name) == 0) { + vm_domain_policy_set(&vm_default_policy, + VM_POLICY_FIRST_TOUCH, 0); + } else if (strcmp("rr", policy_name) == 0) { + vm_domain_policy_set(&vm_default_policy, + VM_POLICY_ROUND_ROBIN, 0); + } else { + error = EINVAL; + goto finish; + } + + error = 0; +finish: + mtx_unlock(&vm_default_policy_mtx); + return (error); +} + +SYSCTL_PROC(_vm, OID_AUTO, default_policy, CTLTYPE_STRING | CTLFLAG_RW, + 0, 0, sysctl_vm_default_policy, "A", + "Default policy (rr, first-touch, first-touch-rr"); + +/* + * Initialise a VM domain iterator. + * + * Check the thread policy, then the proc policy, + * then default to the system policy. + */ +void +vm_policy_iterator_init(struct vm_domain_iterator *vi) +{ +#ifdef VM_NUMA_ALLOC + struct vm_domain_policy lcl; +#endif + + vm_domain_iterator_init(vi); + +#ifdef VM_NUMA_ALLOC + /* Copy out the thread policy */ + vm_domain_policy_localcopy(&lcl, &curthread->td_vm_dom_policy); + if (lcl.p.policy != VM_POLICY_NONE) { + /* Thread policy is present; use it */ + vm_domain_iterator_set_policy(vi, &lcl); + return; + } + + vm_domain_policy_localcopy(&lcl, + &curthread->td_proc->p_vm_dom_policy); + if (lcl.p.policy != VM_POLICY_NONE) { + /* Process policy is present; use it */ + vm_domain_iterator_set_policy(vi, &lcl); + return; + } +#endif + /* Use system default policy */ + vm_domain_iterator_set_policy(vi, &vm_default_policy); +} + +void +vm_policy_iterator_finish(struct vm_domain_iterator *vi) +{ + + vm_domain_iterator_cleanup(vi); +} + #ifdef VM_NUMA_ALLOC static __inline int vm_domain_rr_selectdomain(int skip_domain) Index: head/sys/vm/vm_page.h =================================================================== --- head/sys/vm/vm_page.h +++ head/sys/vm/vm_page.h @@ -476,16 +476,24 @@ void vm_page_activate (vm_page_t); void vm_page_advise(vm_page_t m, int advice); vm_page_t vm_page_alloc(vm_object_t, vm_pindex_t, int); +vm_page_t vm_page_alloc_domain(vm_object_t, vm_pindex_t, int, int); vm_page_t vm_page_alloc_after(vm_object_t, vm_pindex_t, int, vm_page_t); +vm_page_t vm_page_alloc_domain_after(vm_object_t, vm_pindex_t, int, int, + vm_page_t); vm_page_t vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary, vm_memattr_t memattr); +vm_page_t vm_page_alloc_contig_domain(vm_object_t object, + vm_pindex_t pindex, int domain, int req, u_long npages, vm_paddr_t low, + vm_paddr_t high, u_long alignment, vm_paddr_t boundary, + vm_memattr_t memattr); vm_page_t vm_page_alloc_freelist(int, int); +vm_page_t vm_page_alloc_freelist_domain(int, int, int); void vm_page_change_lock(vm_page_t m, struct mtx **mtx); vm_page_t vm_page_grab (vm_object_t, vm_pindex_t, int); int vm_page_grab_pages(vm_object_t object, vm_pindex_t pindex, int allocflags, vm_page_t *ma, int count); -void vm_page_deactivate (vm_page_t); +void vm_page_deactivate(vm_page_t); void vm_page_deactivate_noreuse(vm_page_t); void vm_page_dequeue(vm_page_t m); void vm_page_dequeue_locked(vm_page_t m); @@ -506,6 +514,8 @@ void vm_page_readahead_finish(vm_page_t m); bool vm_page_reclaim_contig(int req, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary); +bool vm_page_reclaim_contig_domain(int req, u_long npages, int domain, + vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary); void vm_page_reference(vm_page_t m); void vm_page_remove (vm_page_t); int vm_page_rename (vm_page_t, vm_object_t, vm_pindex_t); Index: head/sys/vm/vm_page.c =================================================================== --- head/sys/vm/vm_page.c +++ head/sys/vm/vm_page.c @@ -109,6 +109,7 @@ #include #include #include +#include #include #include #include @@ -1603,6 +1604,16 @@ vm_radix_lookup_le(&object->rtree, pindex) : NULL)); } +vm_page_t +vm_page_alloc_domain(vm_object_t object, vm_pindex_t pindex, int domain, + int req) +{ + + return (vm_page_alloc_domain_after(object, pindex, domain, req, + object != NULL ? vm_radix_lookup_le(&object->rtree, pindex) : + NULL)); +} + /* * Allocate a page in the specified object with the given page index. To * optimize insertion of the page into the object, the caller must also specifiy @@ -1610,10 +1621,35 @@ * page index, or NULL if no such page exists. */ vm_page_t -vm_page_alloc_after(vm_object_t object, vm_pindex_t pindex, int req, - vm_page_t mpred) +vm_page_alloc_after(vm_object_t object, vm_pindex_t pindex, + int req, vm_page_t mpred) { + struct vm_domain_iterator vi; vm_page_t m; + int domain, wait; + + m = NULL; + vm_policy_iterator_init(&vi); + wait = req & (VM_ALLOC_WAITFAIL | VM_ALLOC_WAITOK); + req &= ~wait; + while (vm_domain_iterator_run(&vi, &domain) == 0) { + if (vm_domain_iterator_isdone(&vi)) + req |= wait; + m = vm_page_alloc_domain_after(object, pindex, domain, req, + mpred); + if (m != NULL) + break; + } + vm_policy_iterator_finish(&vi); + + return (m); +} + +vm_page_t +vm_page_alloc_domain_after(vm_object_t object, vm_pindex_t pindex, int domain, + int req, vm_page_t mpred) +{ + vm_page_t m; int flags, req_class; u_int free_count; @@ -1643,6 +1679,7 @@ * for the request class. */ again: + m = NULL; mtx_lock(&vm_page_queue_free_mtx); if (vm_cnt.v_free_count > vm_cnt.v_free_reserved || (req_class == VM_ALLOC_SYSTEM && @@ -1655,23 +1692,26 @@ #if VM_NRESERVLEVEL > 0 if (object == NULL || (object->flags & (OBJ_COLORED | OBJ_FICTITIOUS)) != OBJ_COLORED || (m = - vm_reserv_alloc_page(object, pindex, mpred)) == NULL) + vm_reserv_alloc_page(object, pindex, domain, + mpred)) == NULL) #endif { /* * If not, allocate it from the free page queues. */ - m = vm_phys_alloc_pages(object != NULL ? + m = vm_phys_alloc_pages(domain, object != NULL ? VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT, 0); #if VM_NRESERVLEVEL > 0 - if (m == NULL && vm_reserv_reclaim_inactive()) { - m = vm_phys_alloc_pages(object != NULL ? + if (m == NULL && vm_reserv_reclaim_inactive(domain)) { + m = vm_phys_alloc_pages(domain, + object != NULL ? VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT, 0); } #endif } - } else { + } + if (m == NULL) { /* * Not allocatable, give up. */ @@ -1799,6 +1839,32 @@ u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary, vm_memattr_t memattr) { + struct vm_domain_iterator vi; + vm_page_t m; + int domain, wait; + + m = NULL; + vm_policy_iterator_init(&vi); + wait = req & (VM_ALLOC_WAITFAIL | VM_ALLOC_WAITOK); + req &= ~wait; + while (vm_domain_iterator_run(&vi, &domain) == 0) { + if (vm_domain_iterator_isdone(&vi)) + req |= wait; + m = vm_page_alloc_contig_domain(object, pindex, domain, req, + npages, low, high, alignment, boundary, memattr); + if (m != NULL) + break; + } + vm_policy_iterator_finish(&vi); + + return (m); +} + +vm_page_t +vm_page_alloc_contig_domain(vm_object_t object, vm_pindex_t pindex, int domain, + int req, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, + vm_paddr_t boundary, vm_memattr_t memattr) +{ vm_page_t m, m_ret, mpred; u_int busy_lock, flags, oflags; int req_class; @@ -1838,6 +1904,7 @@ * below the lower bound for the allocation class? */ again: + m_ret = NULL; mtx_lock(&vm_page_queue_free_mtx); if (vm_cnt.v_free_count >= npages + vm_cnt.v_free_reserved || (req_class == VM_ALLOC_SYSTEM && @@ -1850,31 +1917,27 @@ #if VM_NRESERVLEVEL > 0 retry: if (object == NULL || (object->flags & OBJ_COLORED) == 0 || - (m_ret = vm_reserv_alloc_contig(object, pindex, npages, - low, high, alignment, boundary, mpred)) == NULL) + (m_ret = vm_reserv_alloc_contig(object, pindex, domain, + npages, low, high, alignment, boundary, mpred)) == NULL) #endif /* * If not, allocate them from the free page queues. */ - m_ret = vm_phys_alloc_contig(npages, low, high, + m_ret = vm_phys_alloc_contig(domain, npages, low, high, alignment, boundary); - } else { - if (vm_page_alloc_fail(object, req)) - goto again; - return (NULL); - } - if (m_ret != NULL) - vm_phys_freecnt_adj(m_ret, -npages); - else { #if VM_NRESERVLEVEL > 0 - if (vm_reserv_reclaim_contig(npages, low, high, alignment, - boundary)) + if (m_ret == NULL && vm_reserv_reclaim_contig( + domain, npages, low, high, alignment, boundary)) goto retry; #endif } - mtx_unlock(&vm_page_queue_free_mtx); - if (m_ret == NULL) + if (m_ret == NULL) { + if (vm_page_alloc_fail(object, req)) + goto again; return (NULL); + } + vm_phys_freecnt_adj(m_ret, -npages); + mtx_unlock(&vm_page_queue_free_mtx); for (m = m_ret; m < &m_ret[npages]; m++) vm_page_alloc_check(m); @@ -1988,7 +2051,30 @@ vm_page_t vm_page_alloc_freelist(int flind, int req) { + struct vm_domain_iterator vi; vm_page_t m; + int domain, wait; + + m = NULL; + vm_policy_iterator_init(&vi); + wait = req & (VM_ALLOC_WAITFAIL | VM_ALLOC_WAITOK); + req &= ~wait; + while (vm_domain_iterator_run(&vi, &domain) == 0) { + if (vm_domain_iterator_isdone(&vi)) + req |= wait; + m = vm_page_alloc_freelist_domain(domain, flind, req); + if (m != NULL) + break; + } + vm_policy_iterator_finish(&vi); + + return (m); +} + +vm_page_t +vm_page_alloc_freelist_domain(int domain, int flind, int req) +{ + vm_page_t m; u_int flags, free_count; int req_class; @@ -2009,15 +2095,12 @@ (req_class == VM_ALLOC_SYSTEM && vm_cnt.v_free_count > vm_cnt.v_interrupt_free_min) || (req_class == VM_ALLOC_INTERRUPT && - vm_cnt.v_free_count > 0)) { - m = vm_phys_alloc_freelist_pages(flind, VM_FREEPOOL_DIRECT, 0); - } else { + vm_cnt.v_free_count > 0)) + m = vm_phys_alloc_freelist_pages(domain, flind, + VM_FREEPOOL_DIRECT, 0); + if (m == NULL) { if (vm_page_alloc_fail(NULL, req)) goto again; - return (NULL); - } - if (m == NULL) { - mtx_unlock(&vm_page_queue_free_mtx); return (NULL); } free_count = vm_phys_freecnt_adj(m, -1); Index: head/sys/vm/vm_phys.h =================================================================== --- head/sys/vm/vm_phys.h +++ head/sys/vm/vm_phys.h @@ -72,10 +72,11 @@ * The following functions are only to be used by the virtual memory system. */ void vm_phys_add_seg(vm_paddr_t start, vm_paddr_t end); -vm_page_t vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high, - u_long alignment, vm_paddr_t boundary); -vm_page_t vm_phys_alloc_freelist_pages(int freelist, int pool, int order); -vm_page_t vm_phys_alloc_pages(int pool, int order); +vm_page_t vm_phys_alloc_contig(int domain, u_long npages, vm_paddr_t low, + vm_paddr_t high, u_long alignment, vm_paddr_t boundary); +vm_page_t vm_phys_alloc_freelist_pages(int domain, int freelist, int pool, + int order); +vm_page_t vm_phys_alloc_pages(int domain, int pool, int order); boolean_t vm_phys_domain_intersects(long mask, vm_paddr_t low, vm_paddr_t high); int vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end, vm_memattr_t memattr); @@ -92,12 +93,13 @@ int vm_phys_mem_affinity(int f, int t); /* - * vm_phys_domain: * - * Return the memory domain the page belongs to. + * vm_phys_domidx: + * + * Return the index of the domain the page belongs to. */ -static inline struct vm_domain * -vm_phys_domain(vm_page_t m) +static inline int +vm_phys_domidx(vm_page_t m) { #ifdef VM_NUMA_ALLOC int domn, segind; @@ -107,10 +109,22 @@ KASSERT(segind < vm_phys_nsegs, ("segind %d m %p", segind, m)); domn = vm_phys_segs[segind].domain; KASSERT(domn < vm_ndomains, ("domain %d m %p", domn, m)); - return (&vm_dom[domn]); + return (domn); #else - return (&vm_dom[0]); + return (0); #endif +} + +/* + * vm_phys_domain: + * + * Return the memory domain the page belongs to. + */ +static inline struct vm_domain * +vm_phys_domain(vm_page_t m) +{ + + return (&vm_dom[vm_phys_domidx(m)]); } static inline u_int Index: head/sys/vm/vm_phys.c =================================================================== --- head/sys/vm/vm_phys.c +++ head/sys/vm/vm_phys.c @@ -151,23 +151,6 @@ SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD, &vm_ndomains, 0, "Number of physical memory domains available."); -/* - * Default to first-touch + round-robin. - */ -static struct mtx vm_default_policy_mtx; -MTX_SYSINIT(vm_default_policy, &vm_default_policy_mtx, "default policy mutex", - MTX_DEF); -#ifdef VM_NUMA_ALLOC -static struct vm_domain_policy vm_default_policy = - VM_DOMAIN_POLICY_STATIC_INITIALISER(VM_POLICY_FIRST_TOUCH_ROUND_ROBIN, 0); -#else -/* Use round-robin so the domain policy code will only try once per allocation */ -static struct vm_domain_policy vm_default_policy = - VM_DOMAIN_POLICY_STATIC_INITIALISER(VM_POLICY_ROUND_ROBIN, 0); -#endif - -static vm_page_t vm_phys_alloc_domain_pages(int domain, int flind, int pool, - int order); static vm_page_t vm_phys_alloc_seg_contig(struct vm_phys_seg *seg, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary); @@ -176,60 +159,6 @@ static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order); -static int -sysctl_vm_default_policy(SYSCTL_HANDLER_ARGS) -{ - char policy_name[32]; - int error; - - mtx_lock(&vm_default_policy_mtx); - - /* Map policy to output string */ - switch (vm_default_policy.p.policy) { - case VM_POLICY_FIRST_TOUCH: - strcpy(policy_name, "first-touch"); - break; - case VM_POLICY_FIRST_TOUCH_ROUND_ROBIN: - strcpy(policy_name, "first-touch-rr"); - break; - case VM_POLICY_ROUND_ROBIN: - default: - strcpy(policy_name, "rr"); - break; - } - mtx_unlock(&vm_default_policy_mtx); - - error = sysctl_handle_string(oidp, &policy_name[0], - sizeof(policy_name), req); - if (error != 0 || req->newptr == NULL) - return (error); - - mtx_lock(&vm_default_policy_mtx); - /* Set: match on the subset of policies that make sense as a default */ - if (strcmp("first-touch-rr", policy_name) == 0) { - vm_domain_policy_set(&vm_default_policy, - VM_POLICY_FIRST_TOUCH_ROUND_ROBIN, 0); - } else if (strcmp("first-touch", policy_name) == 0) { - vm_domain_policy_set(&vm_default_policy, - VM_POLICY_FIRST_TOUCH, 0); - } else if (strcmp("rr", policy_name) == 0) { - vm_domain_policy_set(&vm_default_policy, - VM_POLICY_ROUND_ROBIN, 0); - } else { - error = EINVAL; - goto finish; - } - - error = 0; -finish: - mtx_unlock(&vm_default_policy_mtx); - return (error); -} - -SYSCTL_PROC(_vm, OID_AUTO, default_policy, CTLTYPE_STRING | CTLFLAG_RW, - 0, 0, sysctl_vm_default_policy, "A", - "Default policy (rr, first-touch, first-touch-rr"); - /* * Red-black tree helpers for vm fictitious range management. */ @@ -271,71 +200,6 @@ (uintmax_t)p1->end, (uintmax_t)p2->start, (uintmax_t)p2->end); } -#ifdef notyet -static __inline int -vm_rr_selectdomain(void) -{ -#ifdef VM_NUMA_ALLOC - struct thread *td; - - td = curthread; - - td->td_dom_rr_idx++; - td->td_dom_rr_idx %= vm_ndomains; - return (td->td_dom_rr_idx); -#else - return (0); -#endif -} -#endif /* notyet */ - -/* - * Initialise a VM domain iterator. - * - * Check the thread policy, then the proc policy, - * then default to the system policy. - * - * Later on the various layers will have this logic - * plumbed into them and the phys code will be explicitly - * handed a VM domain policy to use. - */ -static void -vm_policy_iterator_init(struct vm_domain_iterator *vi) -{ -#ifdef VM_NUMA_ALLOC - struct vm_domain_policy lcl; -#endif - - vm_domain_iterator_init(vi); - -#ifdef VM_NUMA_ALLOC - /* Copy out the thread policy */ - vm_domain_policy_localcopy(&lcl, &curthread->td_vm_dom_policy); - if (lcl.p.policy != VM_POLICY_NONE) { - /* Thread policy is present; use it */ - vm_domain_iterator_set_policy(vi, &lcl); - return; - } - - vm_domain_policy_localcopy(&lcl, - &curthread->td_proc->p_vm_dom_policy); - if (lcl.p.policy != VM_POLICY_NONE) { - /* Process policy is present; use it */ - vm_domain_iterator_set_policy(vi, &lcl); - return; - } -#endif - /* Use system default policy */ - vm_domain_iterator_set_policy(vi, &vm_default_policy); -} - -static void -vm_policy_iterator_finish(struct vm_domain_iterator *vi) -{ - - vm_domain_iterator_cleanup(vi); -} - boolean_t vm_phys_domain_intersects(long mask, vm_paddr_t low, vm_paddr_t high) { @@ -504,7 +368,7 @@ KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX, ("vm_phys_create_seg: increase VM_PHYSSEG_MAX")); - KASSERT(domain < vm_ndomains, + KASSERT(domain >= 0 && domain < vm_ndomains, ("vm_phys_create_seg: invalid domain provided")); seg = &vm_phys_segs[vm_phys_nsegs++]; while (seg > vm_phys_segs && (seg - 1)->start >= end) { @@ -736,29 +600,16 @@ * The free page queues must be locked. */ vm_page_t -vm_phys_alloc_pages(int pool, int order) +vm_phys_alloc_pages(int domain, int pool, int order) { vm_page_t m; - int domain, flind; - struct vm_domain_iterator vi; + int flind; - KASSERT(pool < VM_NFREEPOOL, - ("vm_phys_alloc_pages: pool %d is out of range", pool)); - KASSERT(order < VM_NFREEORDER, - ("vm_phys_alloc_pages: order %d is out of range", order)); - - vm_policy_iterator_init(&vi); - - while ((vm_domain_iterator_run(&vi, &domain)) == 0) { - for (flind = 0; flind < vm_nfreelists; flind++) { - m = vm_phys_alloc_domain_pages(domain, flind, pool, - order); - if (m != NULL) - return (m); - } + for (flind = 0; flind < vm_nfreelists; flind++) { + m = vm_phys_alloc_freelist_pages(domain, flind, pool, order); + if (m != NULL) + return (m); } - - vm_policy_iterator_finish(&vi); return (NULL); } @@ -770,41 +621,23 @@ * The free page queues must be locked. */ vm_page_t -vm_phys_alloc_freelist_pages(int freelist, int pool, int order) +vm_phys_alloc_freelist_pages(int domain, int flind, int pool, int order) { + struct vm_freelist *alt, *fl; vm_page_t m; - struct vm_domain_iterator vi; - int domain; + int oind, pind; - KASSERT(freelist < VM_NFREELIST, + KASSERT(domain >= 0 && domain < vm_ndomains, + ("vm_phys_alloc_freelist_pages: domain %d is out of range", + domain)); + KASSERT(flind < VM_NFREELIST, ("vm_phys_alloc_freelist_pages: freelist %d is out of range", - freelist)); + flind)); KASSERT(pool < VM_NFREEPOOL, ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool)); KASSERT(order < VM_NFREEORDER, ("vm_phys_alloc_freelist_pages: order %d is out of range", order)); - vm_policy_iterator_init(&vi); - - while ((vm_domain_iterator_run(&vi, &domain)) == 0) { - m = vm_phys_alloc_domain_pages(domain, - vm_freelist_to_flind[freelist], pool, order); - if (m != NULL) - return (m); - } - - vm_policy_iterator_finish(&vi); - return (NULL); -} - -static vm_page_t -vm_phys_alloc_domain_pages(int domain, int flind, int pool, int order) -{ - struct vm_freelist *fl; - struct vm_freelist *alt; - int oind, pind; - vm_page_t m; - mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); fl = &vm_phys_free_queues[domain][flind][pool][0]; for (oind = order; oind < VM_NFREEORDER; oind++) { @@ -1261,14 +1094,13 @@ * "alignment" and "boundary" must be a power of two. */ vm_page_t -vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high, +vm_phys_alloc_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary) { vm_paddr_t pa_end, pa_start; vm_page_t m_run; - struct vm_domain_iterator vi; struct vm_phys_seg *seg; - int domain, segind; + int segind; KASSERT(npages > 0, ("npages is 0")); KASSERT(powerof2(alignment), ("alignment is not a power of 2")); @@ -1276,12 +1108,6 @@ mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); if (low >= high) return (NULL); - vm_policy_iterator_init(&vi); -restartdom: - if (vm_domain_iterator_run(&vi, &domain) != 0) { - vm_policy_iterator_finish(&vi); - return (NULL); - } m_run = NULL; for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) { seg = &vm_phys_segs[segind]; @@ -1304,9 +1130,6 @@ if (m_run != NULL) break; } - if (m_run == NULL && !vm_domain_iterator_isdone(&vi)) - goto restartdom; - vm_policy_iterator_finish(&vi); return (m_run); } Index: head/sys/vm/vm_reserv.h =================================================================== --- head/sys/vm/vm_reserv.h +++ head/sys/vm/vm_reserv.h @@ -48,19 +48,20 @@ * The following functions are only to be used by the virtual memory system. */ vm_page_t vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, - u_long npages, vm_paddr_t low, vm_paddr_t high, + int domain, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary, vm_page_t mpred); vm_page_t vm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex, - vm_page_t mpred); + int domain, vm_page_t mpred); void vm_reserv_break_all(vm_object_t object); boolean_t vm_reserv_free_page(vm_page_t m); void vm_reserv_init(void); bool vm_reserv_is_page_free(vm_page_t m); int vm_reserv_level(vm_page_t m); int vm_reserv_level_iffullpop(vm_page_t m); -boolean_t vm_reserv_reclaim_contig(u_long npages, vm_paddr_t low, - vm_paddr_t high, u_long alignment, vm_paddr_t boundary); -boolean_t vm_reserv_reclaim_inactive(void); +boolean_t vm_reserv_reclaim_contig(int domain, u_long npages, + vm_paddr_t low, vm_paddr_t high, u_long alignment, + vm_paddr_t boundary); +boolean_t vm_reserv_reclaim_inactive(int domain); void vm_reserv_rename(vm_page_t m, vm_object_t new_object, vm_object_t old_object, vm_pindex_t old_object_offset); int vm_reserv_size(int level); Index: head/sys/vm/vm_reserv.c =================================================================== --- head/sys/vm/vm_reserv.c +++ head/sys/vm/vm_reserv.c @@ -170,6 +170,7 @@ vm_object_t object; /* containing object */ vm_pindex_t pindex; /* offset within object */ vm_page_t pages; /* first page of a superpage */ + int domain; /* NUMA domain */ int popcnt; /* # of pages in use */ char inpartpopq; popmap_t popmap[NPOPMAP]; /* bit vector of used pages */ @@ -207,8 +208,7 @@ * * Access to this queue is synchronized by the free page queue lock. */ -static TAILQ_HEAD(, vm_reserv) vm_rvq_partpop = - TAILQ_HEAD_INITIALIZER(vm_rvq_partpop); +static TAILQ_HEAD(, vm_reserv) vm_rvq_partpop[MAXMEMDOM]; static SYSCTL_NODE(_vm, OID_AUTO, reserv, CTLFLAG_RD, 0, "Reservation Info"); @@ -277,24 +277,27 @@ { struct sbuf sbuf; vm_reserv_t rv; - int counter, error, level, unused_pages; + int counter, error, domain, level, unused_pages; error = sysctl_wire_old_buffer(req, 0); if (error != 0) return (error); sbuf_new_for_sysctl(&sbuf, NULL, 128, req); - sbuf_printf(&sbuf, "\nLEVEL SIZE NUMBER\n\n"); - for (level = -1; level <= VM_NRESERVLEVEL - 2; level++) { - counter = 0; - unused_pages = 0; - mtx_lock(&vm_page_queue_free_mtx); - TAILQ_FOREACH(rv, &vm_rvq_partpop/*[level]*/, partpopq) { - counter++; - unused_pages += VM_LEVEL_0_NPAGES - rv->popcnt; + sbuf_printf(&sbuf, "\nDOMAIN LEVEL SIZE NUMBER\n\n"); + for (domain = 0; domain < vm_ndomains; domain++) { + for (level = -1; level <= VM_NRESERVLEVEL - 2; level++) { + counter = 0; + unused_pages = 0; + mtx_lock(&vm_page_queue_free_mtx); + TAILQ_FOREACH(rv, &vm_rvq_partpop[domain], partpopq) { + counter++; + unused_pages += VM_LEVEL_0_NPAGES - rv->popcnt; + } + mtx_unlock(&vm_page_queue_free_mtx); + sbuf_printf(&sbuf, "%6d, %7d, %6dK, %6d\n", + domain, level, + unused_pages * ((int)PAGE_SIZE / 1024), counter); } - mtx_unlock(&vm_page_queue_free_mtx); - sbuf_printf(&sbuf, "%5d: %6dK, %6d\n", level, - unused_pages * ((int)PAGE_SIZE / 1024), counter); } error = sbuf_finish(&sbuf); sbuf_delete(&sbuf); @@ -321,8 +324,11 @@ index)); KASSERT(rv->popcnt > 0, ("vm_reserv_depopulate: reserv %p's popcnt is corrupted", rv)); + KASSERT(rv->domain >= 0 && rv->domain < vm_ndomains, + ("vm_reserv_depopulate: reserv %p's domain is corrupted %d", + rv, rv->domain)); if (rv->inpartpopq) { - TAILQ_REMOVE(&vm_rvq_partpop, rv, partpopq); + TAILQ_REMOVE(&vm_rvq_partpop[rv->domain], rv, partpopq); rv->inpartpopq = FALSE; } else { KASSERT(rv->pages->psind == 1, @@ -335,11 +341,12 @@ if (rv->popcnt == 0) { LIST_REMOVE(rv, objq); rv->object = NULL; + rv->domain = -1; vm_phys_free_pages(rv->pages, VM_LEVEL_0_ORDER); vm_reserv_freed++; } else { rv->inpartpopq = TRUE; - TAILQ_INSERT_TAIL(&vm_rvq_partpop, rv, partpopq); + TAILQ_INSERT_TAIL(&vm_rvq_partpop[rv->domain], rv, partpopq); } } @@ -384,15 +391,18 @@ ("vm_reserv_populate: reserv %p is already full", rv)); KASSERT(rv->pages->psind == 0, ("vm_reserv_populate: reserv %p is already promoted", rv)); + KASSERT(rv->domain >= 0 && rv->domain < vm_ndomains, + ("vm_reserv_populate: reserv %p's domain is corrupted %d", + rv, rv->domain)); if (rv->inpartpopq) { - TAILQ_REMOVE(&vm_rvq_partpop, rv, partpopq); + TAILQ_REMOVE(&vm_rvq_partpop[rv->domain], rv, partpopq); rv->inpartpopq = FALSE; } popmap_set(rv->popmap, index); rv->popcnt++; if (rv->popcnt < VM_LEVEL_0_NPAGES) { rv->inpartpopq = TRUE; - TAILQ_INSERT_TAIL(&vm_rvq_partpop, rv, partpopq); + TAILQ_INSERT_TAIL(&vm_rvq_partpop[rv->domain], rv, partpopq); } else rv->pages->psind = 1; } @@ -413,9 +423,9 @@ * The object and free page queue must be locked. */ vm_page_t -vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, u_long npages, - vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary, - vm_page_t mpred) +vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, int domain, + u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, + vm_paddr_t boundary, vm_page_t mpred) { vm_paddr_t pa, size; vm_page_t m, m_ret, msucc; @@ -535,7 +545,7 @@ * specified index may not be the first page within the first new * reservation. */ - m = vm_phys_alloc_contig(allocpages, low, high, ulmax(alignment, + m = vm_phys_alloc_contig(domain, allocpages, low, high, ulmax(alignment, VM_LEVEL_0_SIZE), boundary > VM_LEVEL_0_SIZE ? boundary : 0); if (m == NULL) return (NULL); @@ -558,6 +568,7 @@ LIST_INSERT_HEAD(&object->rvq, rv, objq); rv->object = object; rv->pindex = first; + rv->domain = vm_phys_domidx(m); KASSERT(rv->popcnt == 0, ("vm_reserv_alloc_contig: reserv %p's popcnt is corrupted", rv)); @@ -613,7 +624,8 @@ * The object and free page queue must be locked. */ vm_page_t -vm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex, vm_page_t mpred) +vm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex, int domain, + vm_page_t mpred) { vm_page_t m, msucc; vm_pindex_t first, leftcap, rightcap; @@ -692,7 +704,7 @@ /* * Allocate and populate the new reservation. */ - m = vm_phys_alloc_pages(VM_FREEPOOL_DEFAULT, VM_LEVEL_0_ORDER); + m = vm_phys_alloc_pages(domain, VM_FREEPOOL_DEFAULT, VM_LEVEL_0_ORDER); if (m == NULL) return (NULL); rv = vm_reserv_from_page(m); @@ -703,6 +715,7 @@ LIST_INSERT_HEAD(&object->rvq, rv, objq); rv->object = object; rv->pindex = first; + rv->domain = vm_phys_domidx(m); KASSERT(rv->popcnt == 0, ("vm_reserv_alloc_page: reserv %p's popcnt is corrupted", rv)); KASSERT(!rv->inpartpopq, @@ -749,6 +762,7 @@ ("vm_reserv_break: reserv %p's inpartpopq is TRUE", rv)); LIST_REMOVE(rv, objq); rv->object = NULL; + rv->domain = -1; if (m != NULL) { /* * Since the reservation is being broken, there is no harm in @@ -818,7 +832,7 @@ KASSERT(rv->object == object, ("vm_reserv_break_all: reserv %p is corrupted", rv)); if (rv->inpartpopq) { - TAILQ_REMOVE(&vm_rvq_partpop, rv, partpopq); + TAILQ_REMOVE(&vm_rvq_partpop[rv->domain], rv, partpopq); rv->inpartpopq = FALSE; } vm_reserv_break(rv, NULL); @@ -856,7 +870,7 @@ { vm_paddr_t paddr; struct vm_phys_seg *seg; - int segind; + int i, segind; /* * Initialize the reservation array. Specifically, initialize the @@ -871,6 +885,8 @@ paddr += VM_LEVEL_0_SIZE; } } + for (i = 0; i < MAXMEMDOM; i++) + TAILQ_INIT(&vm_rvq_partpop[i]); } /* @@ -928,7 +944,10 @@ mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); KASSERT(rv->inpartpopq, ("vm_reserv_reclaim: reserv %p's inpartpopq is FALSE", rv)); - TAILQ_REMOVE(&vm_rvq_partpop, rv, partpopq); + KASSERT(rv->domain >= 0 && rv->domain < vm_ndomains, + ("vm_reserv_reclaim: reserv %p's domain is corrupted %d", + rv, rv->domain)); + TAILQ_REMOVE(&vm_rvq_partpop[rv->domain], rv, partpopq); rv->inpartpopq = FALSE; vm_reserv_break(rv, NULL); vm_reserv_reclaimed++; @@ -942,12 +961,12 @@ * The free page queue lock must be held. */ boolean_t -vm_reserv_reclaim_inactive(void) +vm_reserv_reclaim_inactive(int domain) { vm_reserv_t rv; mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); - if ((rv = TAILQ_FIRST(&vm_rvq_partpop)) != NULL) { + if ((rv = TAILQ_FIRST(&vm_rvq_partpop[domain])) != NULL) { vm_reserv_reclaim(rv); return (TRUE); } @@ -963,8 +982,8 @@ * The free page queue lock must be held. */ boolean_t -vm_reserv_reclaim_contig(u_long npages, vm_paddr_t low, vm_paddr_t high, - u_long alignment, vm_paddr_t boundary) +vm_reserv_reclaim_contig(int domain, u_long npages, vm_paddr_t low, + vm_paddr_t high, u_long alignment, vm_paddr_t boundary) { vm_paddr_t pa, size; vm_reserv_t rv; @@ -974,7 +993,7 @@ if (npages > VM_LEVEL_0_NPAGES - 1) return (FALSE); size = npages << PAGE_SHIFT; - TAILQ_FOREACH(rv, &vm_rvq_partpop, partpopq) { + TAILQ_FOREACH(rv, &vm_rvq_partpop[domain], partpopq) { pa = VM_PAGE_TO_PHYS(&rv->pages[VM_LEVEL_0_NPAGES - 1]); if (pa + PAGE_SIZE - size < low) { /* This entire reservation is too low; go to next. */