Index: head/sys/vm/vm_page.c =================================================================== --- head/sys/vm/vm_page.c +++ head/sys/vm/vm_page.c @@ -177,7 +177,6 @@ static void vm_page_alloc_check(vm_page_t m); static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits); static void vm_page_enqueue(uint8_t queue, vm_page_t m); -static void vm_page_free_phys(struct vm_domain *vmd, vm_page_t m); static void vm_page_init(void *dummy); static int vm_page_insert_after(vm_page_t m, vm_object_t object, vm_pindex_t pindex, vm_page_t mpred); @@ -1677,10 +1676,10 @@ * for the request class and false otherwise. */ int -vm_domain_available(struct vm_domain *vmd, int req, int npages) +vm_domain_allocate(struct vm_domain *vmd, int req, int npages) { + u_int limit, old, new; - vm_domain_free_assert_locked(vmd); req = req & VM_ALLOC_CLASS_MASK; /* @@ -1688,15 +1687,34 @@ */ if (curproc == pageproc && req != VM_ALLOC_INTERRUPT) req = VM_ALLOC_SYSTEM; + if (req == VM_ALLOC_INTERRUPT) + limit = 0; + else if (req == VM_ALLOC_SYSTEM) + limit = vmd->vmd_interrupt_free_min; + else + limit = vmd->vmd_free_reserved; - if (vmd->vmd_free_count >= npages + vmd->vmd_free_reserved || - (req == VM_ALLOC_SYSTEM && - vmd->vmd_free_count >= npages + vmd->vmd_interrupt_free_min) || - (req == VM_ALLOC_INTERRUPT && - vmd->vmd_free_count >= npages)) - return (1); + /* + * Attempt to reserve the pages. Fail if we're below the limit. + */ + limit += npages; + old = vmd->vmd_free_count; + do { + if (old < limit) + return (0); + new = old - npages; + } while (atomic_fcmpset_int(&vmd->vmd_free_count, &old, new) == 0); - return (0); + /* Wake the page daemon if we've crossed the threshold. */ + if (vm_paging_needed(vmd, new) && !vm_paging_needed(vmd, old)) + pagedaemon_wakeup(vmd->vmd_domain); + + /* Only update bitsets on transitions. */ + if ((old >= vmd->vmd_free_min && new < vmd->vmd_free_min) || + (old >= vmd->vmd_free_severe && new < vmd->vmd_free_severe)) + vm_domain_set(vmd); + + return (1); } vm_page_t @@ -1723,44 +1741,34 @@ again: m = NULL; #if VM_NRESERVLEVEL > 0 + /* + * Can we allocate the page from a reservation? + */ if (vm_object_reserv(object) && - (m = vm_reserv_extend(req, object, pindex, domain, mpred)) - != NULL) { + ((m = vm_reserv_extend(req, object, pindex, domain, mpred)) != NULL || + (m = vm_reserv_alloc_page(req, object, pindex, domain, mpred)) != NULL)) { domain = vm_phys_domain(m); vmd = VM_DOMAIN(domain); goto found; } #endif vmd = VM_DOMAIN(domain); - vm_domain_free_lock(vmd); - if (vm_domain_available(vmd, req, 1)) { + if (vm_domain_allocate(vmd, req, 1)) { /* - * Can we allocate the page from a reservation? + * If not, allocate it from the free page queues. */ + vm_domain_free_lock(vmd); + m = vm_phys_alloc_pages(domain, object != NULL ? + VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT, 0); + vm_domain_free_unlock(vmd); + if (m == NULL) { + vm_domain_freecnt_inc(vmd, 1); #if VM_NRESERVLEVEL > 0 - if (!vm_object_reserv(object) || - (m = vm_reserv_alloc_page(object, pindex, - domain, mpred)) == NULL) + if (vm_reserv_reclaim_inactive(domain)) + goto again; #endif - { - /* - * If not, allocate it from the free page queues. - */ - m = vm_phys_alloc_pages(domain, object != NULL ? - VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT, 0); -#if VM_NRESERVLEVEL > 0 - if (m == NULL && vm_reserv_reclaim_inactive(domain)) { - m = vm_phys_alloc_pages(domain, - object != NULL ? - VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT, - 0); - } -#endif } } - if (m != NULL) - vm_domain_freecnt_dec(vmd, 1); - vm_domain_free_unlock(vmd); if (m == NULL) { /* * Not allocatable, give up. @@ -1775,9 +1783,7 @@ */ KASSERT(m != NULL, ("missing page")); -#if VM_NRESERVLEVEL > 0 found: -#endif vm_page_alloc_check(m); /* @@ -1934,9 +1940,14 @@ */ again: #if VM_NRESERVLEVEL > 0 + /* + * Can we allocate the pages from a reservation? + */ if (vm_object_reserv(object) && - (m_ret = vm_reserv_extend_contig(req, object, pindex, domain, - npages, low, high, alignment, boundary, mpred)) != NULL) { + ((m_ret = vm_reserv_extend_contig(req, object, pindex, domain, + npages, low, high, alignment, boundary, mpred)) != NULL || + (m_ret = vm_reserv_alloc_contig(req, object, pindex, domain, + npages, low, high, alignment, boundary, mpred)) != NULL)) { domain = vm_phys_domain(m_ret); vmd = VM_DOMAIN(domain); goto found; @@ -1944,31 +1955,23 @@ #endif m_ret = NULL; vmd = VM_DOMAIN(domain); - vm_domain_free_lock(vmd); - if (vm_domain_available(vmd, req, npages)) { + if (vm_domain_allocate(vmd, req, npages)) { /* - * Can we allocate the pages from a reservation? + * allocate them from the free page queues. */ + vm_domain_free_lock(vmd); + m_ret = vm_phys_alloc_contig(domain, npages, low, high, + alignment, boundary); + vm_domain_free_unlock(vmd); + if (m_ret == NULL) { + vm_domain_freecnt_inc(vmd, npages); #if VM_NRESERVLEVEL > 0 -retry: - if (!vm_object_reserv(object) || - (m_ret = vm_reserv_alloc_contig(object, pindex, domain, - npages, low, high, alignment, boundary, mpred)) == NULL) + if (vm_reserv_reclaim_contig(domain, npages, low, + high, alignment, boundary)) + goto again; #endif - /* - * If not, allocate them from the free page queues. - */ - m_ret = vm_phys_alloc_contig(domain, npages, low, high, - alignment, boundary); -#if VM_NRESERVLEVEL > 0 - if (m_ret == NULL && vm_reserv_reclaim_contig( - domain, npages, low, high, alignment, boundary)) - goto retry; -#endif + } } - if (m_ret != NULL) - vm_domain_freecnt_dec(vmd, npages); - vm_domain_free_unlock(vmd); if (m_ret == NULL) { if (vm_domain_alloc_fail(vmd, object, req)) goto again; @@ -2109,13 +2112,14 @@ */ vmd = VM_DOMAIN(domain); again: - vm_domain_free_lock(vmd); - if (vm_domain_available(vmd, req, 1)) + if (vm_domain_allocate(vmd, req, 1)) { + vm_domain_free_lock(vmd); m = vm_phys_alloc_freelist_pages(domain, freelist, VM_FREEPOOL_DIRECT, 0); - if (m != NULL) - vm_domain_freecnt_dec(vmd, 1); - vm_domain_free_unlock(vmd); + vm_domain_free_unlock(vmd); + if (m == NULL) + vm_domain_freecnt_inc(vmd, 1); + } if (m == NULL) { if (vm_domain_alloc_fail(vmd, NULL, req)) goto again; @@ -2491,8 +2495,9 @@ vm_page_remque(m); vm_page_replace_checked(m_new, object, m->pindex, m); - m->valid = 0; - vm_page_undirty(m); + if (vm_page_free_prep(m, false)) + SLIST_INSERT_HEAD(&free, m, + plinks.s.ss); /* * The new page must be deactivated @@ -2504,10 +2509,12 @@ m->flags &= ~PG_ZERO; vm_page_remque(m); vm_page_remove(m); + if (vm_page_free_prep(m, false)) + SLIST_INSERT_HEAD(&free, m, + plinks.s.ss); KASSERT(m->dirty == 0, ("page %p is dirty", m)); } - SLIST_INSERT_HEAD(&free, m, plinks.s.ss); } else error = EBUSY; unlock: @@ -2548,7 +2555,7 @@ do { MPASS(vm_phys_domain(m) == domain); SLIST_REMOVE_HEAD(&free, plinks.s.ss); - vm_page_free_phys(vmd, m); + vm_phys_free_pages(m, 0); cnt++; } while ((m = SLIST_FIRST(&free)) != NULL); vm_domain_free_unlock(vmd); @@ -3159,24 +3166,12 @@ if (pmap_page_get_memattr(m) != VM_MEMATTR_DEFAULT) pmap_page_set_memattr(m, VM_MEMATTR_DEFAULT); - return (true); -} - -/* - * Insert the page into the physical memory allocator's free page - * queues. This is the last step to free a page. The caller is - * responsible for adjusting the free page count. - */ -static void -vm_page_free_phys(struct vm_domain *vmd, vm_page_t m) -{ - - vm_domain_free_assert_locked(vmd); - #if VM_NRESERVLEVEL > 0 - if (!vm_reserv_free_page(m)) + if (vm_reserv_free_page(m)) + return (false); #endif - vm_phys_free_pages(m, 0); + + return (true); } void @@ -3200,7 +3195,7 @@ vmd = vm_pagequeue_domain(m); vm_domain_free_lock(vmd); } - vm_page_free_phys(vmd, m); + vm_phys_free_pages(m, 0); cnt++; } if (vmd != NULL) { @@ -3227,7 +3222,7 @@ return; vmd = vm_pagequeue_domain(m); vm_domain_free_lock(vmd); - vm_page_free_phys(vmd, m); + vm_phys_free_pages(m, 0); vm_domain_free_unlock(vmd); vm_domain_freecnt_inc(vmd, 1); } Index: head/sys/vm/vm_pagequeue.h =================================================================== --- head/sys/vm/vm_pagequeue.h +++ head/sys/vm/vm_pagequeue.h @@ -180,7 +180,7 @@ void vm_domain_set(struct vm_domain *vmd); void vm_domain_clear(struct vm_domain *vmd); -int vm_domain_available(struct vm_domain *vmd, int req, int npages); +int vm_domain_allocate(struct vm_domain *vmd, int req, int npages); /* * vm_pagequeue_domain: @@ -265,23 +265,6 @@ new >= vmd->vmd_pageout_free_min))) vm_domain_clear(vmd); } - -static inline void -vm_domain_freecnt_dec(struct vm_domain *vmd, int adj) -{ - u_int old, new; - - old = atomic_fetchadd_int(&vmd->vmd_free_count, -adj); - new = old - adj; - KASSERT(new >= 0, ("vm_domain_freecnt_dec: free count underflow")); - if (vm_paging_needed(vmd, new) && !vm_paging_needed(vmd, old)) - pagedaemon_wakeup(vmd->vmd_domain); - /* Only update bitsets on transitions. */ - if ((old >= vmd->vmd_free_min && new < vmd->vmd_free_min) || - (old >= vmd->vmd_free_severe && new < vmd->vmd_free_severe)) - vm_domain_set(vmd); -} - #endif /* _KERNEL */ #endif /* !_VM_PAGEQUEUE_ */ Index: head/sys/vm/vm_reserv.h =================================================================== --- head/sys/vm/vm_reserv.h +++ head/sys/vm/vm_reserv.h @@ -47,14 +47,14 @@ /* * The following functions are only to be used by the virtual memory system. */ -vm_page_t vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, +vm_page_t vm_reserv_alloc_contig(int req, vm_object_t object, vm_pindex_t pindex, int domain, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary, vm_page_t mpred); vm_page_t vm_reserv_extend_contig(int req, vm_object_t object, vm_pindex_t pindex, int domain, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary, vm_page_t mpred); -vm_page_t vm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex, +vm_page_t vm_reserv_alloc_page(int req, vm_object_t object, vm_pindex_t pindex, int domain, vm_page_t mpred); vm_page_t vm_reserv_extend(int req, vm_object_t object, vm_pindex_t pindex, int domain, vm_page_t mpred); Index: head/sys/vm/vm_reserv.c =================================================================== --- head/sys/vm/vm_reserv.c +++ head/sys/vm/vm_reserv.c @@ -45,6 +45,8 @@ #include #include +#include +#include #include #include #include @@ -54,6 +56,7 @@ #include #include #include +#include #include #include @@ -166,22 +169,37 @@ * * A partially populated reservation can be broken and reclaimed at any time. * - * f - vm_domain_free_lock + * r - vm_reserv_lock + * d - vm_reserv_domain_lock * o - vm_reserv_object_lock * c - constant after boot */ struct vm_reserv { - TAILQ_ENTRY(vm_reserv) partpopq; /* (f) per-domain queue. */ - LIST_ENTRY(vm_reserv) objq; /* (o, f) object queue */ - vm_object_t object; /* (o, f) containing object */ - vm_pindex_t pindex; /* (o, f) offset in object */ + struct mtx lock; /* reservation lock. */ + TAILQ_ENTRY(vm_reserv) partpopq; /* (d) per-domain queue. */ + LIST_ENTRY(vm_reserv) objq; /* (o, r) object queue */ + vm_object_t object; /* (o, r) containing object */ + vm_pindex_t pindex; /* (o, r) offset in object */ vm_page_t pages; /* (c) first page */ - int domain; /* (c) NUMA domain. */ - int popcnt; /* (f) # of pages in use */ - char inpartpopq; /* (f) */ - popmap_t popmap[NPOPMAP]; /* (f) bit vector, used pages */ + uint16_t domain; /* (c) NUMA domain. */ + uint16_t popcnt; /* (r) # of pages in use */ + char inpartpopq; /* (d) */ + popmap_t popmap[NPOPMAP]; /* (r) bit vector, used pages */ }; +#define vm_reserv_lockptr(rv) (&(rv)->lock) +#define vm_reserv_assert_locked(rv) \ + mtx_assert(vm_reserv_lockptr(rv), MA_OWNED) +#define vm_reserv_lock(rv) mtx_lock(vm_reserv_lockptr(rv)) +#define vm_reserv_trylock(rv) mtx_trylock(vm_reserv_lockptr(rv)) +#define vm_reserv_unlock(rv) mtx_unlock(vm_reserv_lockptr(rv)) + +static struct mtx_padalign vm_reserv_domain_locks[MAXMEMDOM]; + +#define vm_reserv_domain_lockptr(d) &vm_reserv_domain_locks[(d)] +#define vm_reserv_domain_lock(d) mtx_lock(vm_reserv_domain_lockptr(d)) +#define vm_reserv_domain_unlock(d) mtx_unlock(vm_reserv_domain_lockptr(d)) + /* * The reservation array * @@ -218,13 +236,13 @@ static SYSCTL_NODE(_vm, OID_AUTO, reserv, CTLFLAG_RD, 0, "Reservation Info"); -static long vm_reserv_broken; -SYSCTL_LONG(_vm_reserv, OID_AUTO, broken, CTLFLAG_RD, - &vm_reserv_broken, 0, "Cumulative number of broken reservations"); +static counter_u64_t vm_reserv_broken = EARLY_COUNTER; +SYSCTL_COUNTER_U64(_vm_reserv, OID_AUTO, broken, CTLFLAG_RD, + &vm_reserv_broken, "Cumulative number of broken reservations"); -static long vm_reserv_freed; -SYSCTL_LONG(_vm_reserv, OID_AUTO, freed, CTLFLAG_RD, - &vm_reserv_freed, 0, "Cumulative number of freed reservations"); +static counter_u64_t vm_reserv_freed = EARLY_COUNTER; +SYSCTL_COUNTER_U64(_vm_reserv, OID_AUTO, freed, CTLFLAG_RD, + &vm_reserv_freed, "Cumulative number of freed reservations"); static int sysctl_vm_reserv_fullpop(SYSCTL_HANDLER_ARGS); @@ -236,9 +254,9 @@ SYSCTL_OID(_vm_reserv, OID_AUTO, partpopq, CTLTYPE_STRING | CTLFLAG_RD, NULL, 0, sysctl_vm_reserv_partpopq, "A", "Partially populated reservation queues"); -static long vm_reserv_reclaimed; -SYSCTL_LONG(_vm_reserv, OID_AUTO, reclaimed, CTLFLAG_RD, - &vm_reserv_reclaimed, 0, "Cumulative number of reclaimed reservations"); +static counter_u64_t vm_reserv_reclaimed = EARLY_COUNTER; +SYSCTL_COUNTER_U64(_vm_reserv, OID_AUTO, reclaimed, CTLFLAG_RD, + &vm_reserv_reclaimed, "Cumulative number of reclaimed reservations"); /* * The object lock pool is used to synchronize the rvq. We can not use a @@ -313,12 +331,12 @@ for (level = -1; level <= VM_NRESERVLEVEL - 2; level++) { counter = 0; unused_pages = 0; - vm_domain_free_lock(VM_DOMAIN(domain)); + vm_reserv_domain_lock(domain); TAILQ_FOREACH(rv, &vm_rvq_partpop[domain], partpopq) { counter++; unused_pages += VM_LEVEL_0_NPAGES - rv->popcnt; } - vm_domain_free_unlock(VM_DOMAIN(domain)); + vm_reserv_domain_unlock(domain); sbuf_printf(&sbuf, "%6d, %7d, %6dK, %6d\n", domain, level, unused_pages * ((int)PAGE_SIZE / 1024), counter); @@ -337,6 +355,9 @@ { vm_object_t object; + vm_reserv_assert_locked(rv); + CTR5(KTR_VM, "%s: rv %p object %p popcnt %d inpartpop %d", + __FUNCTION__, rv, rv->object, rv->popcnt, rv->inpartpopq); KASSERT(rv->object != NULL, ("vm_reserv_remove: reserv %p is free", rv)); KASSERT(!rv->inpartpopq, @@ -356,6 +377,11 @@ { int i; + vm_reserv_assert_locked(rv); + CTR6(KTR_VM, + "%s: rv %p(%p) object %p new %p popcnt %d", + __FUNCTION__, rv, rv->pages, rv->object, object, + rv->popcnt); KASSERT(rv->object == NULL, ("vm_reserv_insert: reserv %p isn't free", rv)); KASSERT(rv->popcnt == 0, @@ -377,14 +403,15 @@ * becomes zero, the reservation is destroyed. Additionally, moves the * reservation to the tail of the partially populated reservation queue if the * population count is non-zero. - * - * The free page queue lock must be held. */ static void vm_reserv_depopulate(vm_reserv_t rv, int index) { + struct vm_domain *vmd; - vm_domain_free_assert_locked(VM_DOMAIN(rv->domain)); + vm_reserv_assert_locked(rv); + CTR5(KTR_VM, "%s: rv %p object %p popcnt %d inpartpop %d", + __FUNCTION__, rv, rv->object, rv->popcnt, rv->inpartpopq); KASSERT(rv->object != NULL, ("vm_reserv_depopulate: reserv %p is free", rv)); KASSERT(popmap_is_set(rv->popmap, index), @@ -395,10 +422,7 @@ KASSERT(rv->domain >= 0 && rv->domain < vm_ndomains, ("vm_reserv_depopulate: reserv %p's domain is corrupted %d", rv, rv->domain)); - if (rv->inpartpopq) { - TAILQ_REMOVE(&vm_rvq_partpop[rv->domain], rv, partpopq); - rv->inpartpopq = FALSE; - } else { + if (rv->popcnt == VM_LEVEL_0_NPAGES) { KASSERT(rv->pages->psind == 1, ("vm_reserv_depopulate: reserv %p is already demoted", rv)); @@ -406,14 +430,25 @@ } popmap_clear(rv->popmap, index); rv->popcnt--; + vm_reserv_domain_lock(rv->domain); + if (rv->inpartpopq) { + TAILQ_REMOVE(&vm_rvq_partpop[rv->domain], rv, partpopq); + rv->inpartpopq = FALSE; + } + if (rv->popcnt != 0) { + rv->inpartpopq = TRUE; + TAILQ_INSERT_TAIL(&vm_rvq_partpop[rv->domain], rv, partpopq); + } + vm_reserv_domain_unlock(rv->domain); + vmd = VM_DOMAIN(rv->domain); if (rv->popcnt == 0) { vm_reserv_remove(rv); + vm_domain_free_lock(vmd); vm_phys_free_pages(rv->pages, VM_LEVEL_0_ORDER); - vm_reserv_freed++; - } else { - rv->inpartpopq = TRUE; - TAILQ_INSERT_TAIL(&vm_rvq_partpop[rv->domain], rv, partpopq); + vm_domain_free_unlock(vmd); + counter_u64_add(vm_reserv_freed, 1); } + vm_domain_freecnt_inc(vmd, 1); } /* @@ -484,7 +519,9 @@ vm_reserv_populate(vm_reserv_t rv, int index) { - vm_domain_free_assert_locked(VM_DOMAIN(rv->domain)); + vm_reserv_assert_locked(rv); + CTR5(KTR_VM, "%s: rv %p object %p popcnt %d inpartpop %d", + __FUNCTION__, rv, rv->object, rv->popcnt, rv->inpartpopq); KASSERT(rv->object != NULL, ("vm_reserv_populate: reserv %p is free", rv)); KASSERT(popmap_is_clear(rv->popmap, index), @@ -497,17 +534,23 @@ KASSERT(rv->domain >= 0 && rv->domain < vm_ndomains, ("vm_reserv_populate: reserv %p's domain is corrupted %d", rv, rv->domain)); + popmap_set(rv->popmap, index); + rv->popcnt++; + vm_reserv_domain_lock(rv->domain); if (rv->inpartpopq) { TAILQ_REMOVE(&vm_rvq_partpop[rv->domain], rv, partpopq); rv->inpartpopq = FALSE; } - popmap_set(rv->popmap, index); - rv->popcnt++; if (rv->popcnt < VM_LEVEL_0_NPAGES) { rv->inpartpopq = TRUE; TAILQ_INSERT_TAIL(&vm_rvq_partpop[rv->domain], rv, partpopq); - } else + } else { + KASSERT(rv->pages->psind == 0, + ("vm_reserv_populate: reserv %p is already promoted", + rv)); rv->pages->psind = 1; + } + vm_reserv_domain_unlock(rv->domain); } /* @@ -572,31 +615,29 @@ return (NULL); domain = rv->domain; vmd = VM_DOMAIN(domain); - vm_domain_free_lock(vmd); - if (rv->object != object || !vm_domain_available(vmd, req, npages)) { - m = NULL; + vm_reserv_lock(rv); + if (rv->object != object) goto out; - } m = &rv->pages[index]; pa = VM_PAGE_TO_PHYS(m); if (pa < low || pa + size > high || (pa & (alignment - 1)) != 0 || - ((pa ^ (pa + size - 1)) & ~(boundary - 1)) != 0) { - m = NULL; + ((pa ^ (pa + size - 1)) & ~(boundary - 1)) != 0) goto out; - } /* Handle vm_page_rename(m, new_object, ...). */ for (i = 0; i < npages; i++) { - if (popmap_is_set(rv->popmap, index + i)) { - m = NULL; + if (popmap_is_set(rv->popmap, index + i)) goto out; - } } + if (!vm_domain_allocate(vmd, req, npages)) + goto out; for (i = 0; i < npages; i++) vm_reserv_populate(rv, index + i); - vm_domain_freecnt_dec(vmd, npages); -out: - vm_domain_free_unlock(vmd); + vm_reserv_unlock(rv); return (m); + +out: + vm_reserv_unlock(rv); + return (NULL); } /* @@ -618,10 +659,11 @@ * The object and free page queue must be locked. */ vm_page_t -vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, int domain, +vm_reserv_alloc_contig(int req, vm_object_t object, vm_pindex_t pindex, int domain, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary, vm_page_t mpred) { + struct vm_domain *vmd; vm_paddr_t pa, size; vm_page_t m, m_ret, msucc; vm_pindex_t first, leftcap, rightcap; @@ -629,7 +671,6 @@ u_long allocpages, maxpages, minpages; int i, index, n; - vm_domain_free_assert_locked(VM_DOMAIN(domain)); VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(npages != 0, ("vm_reserv_alloc_contig: npages is 0")); @@ -737,9 +778,19 @@ * specified index may not be the first page within the first new * reservation. */ - m = vm_phys_alloc_contig(domain, allocpages, low, high, ulmax(alignment, - VM_LEVEL_0_SIZE), boundary > VM_LEVEL_0_SIZE ? boundary : 0); - if (m == NULL) + m = NULL; + vmd = VM_DOMAIN(domain); + if (vm_domain_allocate(vmd, req, npages)) { + vm_domain_free_lock(vmd); + m = vm_phys_alloc_contig(domain, allocpages, low, high, + ulmax(alignment, VM_LEVEL_0_SIZE), + boundary > VM_LEVEL_0_SIZE ? boundary : 0); + vm_domain_free_unlock(vmd); + if (m == NULL) { + vm_domain_freecnt_inc(vmd, npages); + return (NULL); + } + } else return (NULL); KASSERT(vm_phys_domain(m) == domain, ("vm_reserv_alloc_contig: Page domain does not match requested.")); @@ -757,6 +808,7 @@ KASSERT(rv->pages == m, ("vm_reserv_alloc_contig: reserv %p's pages is corrupted", rv)); + vm_reserv_lock(rv); vm_reserv_insert(rv, object, first); n = ulmin(VM_LEVEL_0_NPAGES - index, npages); for (i = 0; i < n; i++) @@ -766,6 +818,7 @@ m_ret = &rv->pages[index]; index = 0; } + vm_reserv_unlock(rv); m += VM_LEVEL_0_NPAGES; first += VM_LEVEL_0_NPAGES; allocpages -= VM_LEVEL_0_NPAGES; @@ -813,18 +866,20 @@ vmd = VM_DOMAIN(domain); index = VM_RESERV_INDEX(object, pindex); m = &rv->pages[index]; - vm_domain_free_lock(vmd); - if (vm_domain_available(vmd, req, 1) == 0 || - /* Handle reclaim race. */ - rv->object != object || + vm_reserv_lock(rv); + /* Handle reclaim race. */ + if (rv->object != object || /* Handle vm_page_rename(m, new_object, ...). */ - popmap_is_set(rv->popmap, index)) + popmap_is_set(rv->popmap, index)) { m = NULL; - if (m != NULL) { - vm_reserv_populate(rv, index); - vm_domain_freecnt_dec(vmd, 1); + goto out; } - vm_domain_free_unlock(vmd); + if (vm_domain_allocate(vmd, req, 1) == 0) + m = NULL; + else + vm_reserv_populate(rv, index); +out: + vm_reserv_unlock(rv); return (m); } @@ -840,15 +895,15 @@ * The object and free page queue must be locked. */ vm_page_t -vm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex, int domain, +vm_reserv_alloc_page(int req, vm_object_t object, vm_pindex_t pindex, int domain, vm_page_t mpred) { + struct vm_domain *vmd; vm_page_t m, msucc; vm_pindex_t first, leftcap, rightcap; vm_reserv_t rv; int index; - vm_domain_free_assert_locked(VM_DOMAIN(domain)); VM_OBJECT_ASSERT_WLOCKED(object); /* @@ -917,15 +972,28 @@ /* * Allocate and populate the new reservation. */ - m = vm_phys_alloc_pages(domain, VM_FREEPOOL_DEFAULT, VM_LEVEL_0_ORDER); - if (m == NULL) + m = NULL; + vmd = VM_DOMAIN(domain); + if (vm_domain_allocate(vmd, req, 1)) { + vm_domain_free_lock(vmd); + m = vm_phys_alloc_pages(domain, VM_FREEPOOL_DEFAULT, + VM_LEVEL_0_ORDER); + vm_domain_free_unlock(vmd); + if (m == NULL) { + vm_domain_freecnt_inc(vmd, 1); + return (NULL); + } + } else return (NULL); rv = vm_reserv_from_page(m); + vm_reserv_lock(rv); KASSERT(rv->pages == m, ("vm_reserv_alloc_page: reserv %p's pages is corrupted", rv)); vm_reserv_insert(rv, object, first); index = VM_RESERV_INDEX(object, pindex); vm_reserv_populate(rv, index); + vm_reserv_unlock(rv); + return (&rv->pages[index]); } @@ -942,7 +1010,9 @@ { int begin_zeroes, hi, i, lo; - vm_domain_free_assert_locked(VM_DOMAIN(rv->domain)); + vm_reserv_assert_locked(rv); + CTR5(KTR_VM, "%s: rv %p object %p popcnt %d inpartpop %d", + __FUNCTION__, rv, rv->object, rv->popcnt, rv->inpartpopq); vm_reserv_remove(rv); rv->pages->psind = 0; i = hi = 0; @@ -981,12 +1051,14 @@ if (i != NPOPMAP) /* Convert from ffsl() to ordinary bit numbering. */ hi--; + vm_domain_free_lock(VM_DOMAIN(rv->domain)); vm_phys_free_contig(&rv->pages[begin_zeroes], NBPOPMAP * i + hi - begin_zeroes); + vm_domain_free_unlock(VM_DOMAIN(rv->domain)); } while (i < NPOPMAP); KASSERT(rv->popcnt == 0, ("vm_reserv_break: reserv %p's popcnt is corrupted", rv)); - vm_reserv_broken++; + counter_u64_add(vm_reserv_broken, 1); } /* @@ -996,7 +1068,6 @@ vm_reserv_break_all(vm_object_t object) { vm_reserv_t rv; - struct vm_domain *vmd; /* * This access of object->rvq is unsynchronized so that the @@ -1005,27 +1076,22 @@ * lock prevents new additions, so we are guaranteed that when * it returns NULL the object is properly empty. */ - vmd = NULL; while ((rv = LIST_FIRST(&object->rvq)) != NULL) { - if (vmd != VM_DOMAIN(rv->domain)) { - if (vmd != NULL) - vm_domain_free_unlock(vmd); - vmd = VM_DOMAIN(rv->domain); - vm_domain_free_lock(vmd); - } + vm_reserv_lock(rv); /* Reclaim race. */ - if (rv->object != object) + if (rv->object != object) { + vm_reserv_unlock(rv); continue; - KASSERT(rv->object == object, - ("vm_reserv_break_all: reserv %p is corrupted", rv)); + } + vm_reserv_domain_lock(rv->domain); if (rv->inpartpopq) { TAILQ_REMOVE(&vm_rvq_partpop[rv->domain], rv, partpopq); rv->inpartpopq = FALSE; } + vm_reserv_domain_unlock(rv->domain); vm_reserv_break(rv); + vm_reserv_unlock(rv); } - if (vmd != NULL) - vm_domain_free_unlock(vmd); } /* @@ -1038,13 +1104,21 @@ vm_reserv_free_page(vm_page_t m) { vm_reserv_t rv; + boolean_t ret; rv = vm_reserv_from_page(m); if (rv->object == NULL) return (FALSE); - vm_domain_free_assert_locked(VM_DOMAIN(rv->domain)); - vm_reserv_depopulate(rv, m - rv->pages); - return (TRUE); + vm_reserv_lock(rv); + /* Re-validate after lock. */ + if (rv->object != NULL) { + vm_reserv_depopulate(rv, m - rv->pages); + ret = TRUE; + } else + ret = FALSE; + vm_reserv_unlock(rv); + + return (ret); } /* @@ -1058,6 +1132,7 @@ { vm_paddr_t paddr; struct vm_phys_seg *seg; + struct vm_reserv *rv; int i, segind; /* @@ -1068,15 +1143,22 @@ seg = &vm_phys_segs[segind]; paddr = roundup2(seg->start, VM_LEVEL_0_SIZE); while (paddr + VM_LEVEL_0_SIZE <= seg->end) { - vm_reserv_array[paddr >> VM_LEVEL_0_SHIFT].pages = - PHYS_TO_VM_PAGE(paddr); - vm_reserv_array[paddr >> VM_LEVEL_0_SHIFT].domain = - seg->domain; + rv = &vm_reserv_array[paddr >> VM_LEVEL_0_SHIFT]; + rv->pages = PHYS_TO_VM_PAGE(paddr); + rv->domain = seg->domain; + mtx_init(&rv->lock, "vm reserv", NULL, MTX_DEF); paddr += VM_LEVEL_0_SIZE; } } - for (i = 0; i < MAXMEMDOM; i++) + for (i = 0; i < MAXMEMDOM; i++) { + mtx_init(&vm_reserv_domain_locks[i], "VM reserv domain", NULL, + MTX_DEF); TAILQ_INIT(&vm_rvq_partpop[i]); + } + + for (i = 0; i < VM_RESERV_OBJ_LOCK_COUNT; i++) + mtx_init(&vm_reserv_object_mtx[i], "resv obj lock", NULL, + MTX_DEF); } /* @@ -1091,7 +1173,6 @@ rv = vm_reserv_from_page(m); if (rv->object == NULL) return (false); - vm_domain_free_assert_locked(VM_DOMAIN(rv->domain)); return (popmap_is_clear(rv->popmap, m - rv->pages)); } @@ -1131,7 +1212,10 @@ vm_reserv_reclaim(vm_reserv_t rv) { - vm_domain_free_assert_locked(VM_DOMAIN(rv->domain)); + vm_reserv_assert_locked(rv); + CTR5(KTR_VM, "%s: rv %p object %p popcnt %d inpartpop %d", + __FUNCTION__, rv, rv->object, rv->popcnt, rv->inpartpopq); + vm_reserv_domain_lock(rv->domain); KASSERT(rv->inpartpopq, ("vm_reserv_reclaim: reserv %p's inpartpopq is FALSE", rv)); KASSERT(rv->domain >= 0 && rv->domain < vm_ndomains, @@ -1139,8 +1223,9 @@ rv, rv->domain)); TAILQ_REMOVE(&vm_rvq_partpop[rv->domain], rv, partpopq); rv->inpartpopq = FALSE; + vm_reserv_domain_unlock(rv->domain); vm_reserv_break(rv); - vm_reserv_reclaimed++; + counter_u64_add(vm_reserv_reclaimed, 1); } /* @@ -1155,9 +1240,14 @@ { vm_reserv_t rv; - vm_domain_free_assert_locked(VM_DOMAIN(domain)); - if ((rv = TAILQ_FIRST(&vm_rvq_partpop[domain])) != NULL) { + while ((rv = TAILQ_FIRST(&vm_rvq_partpop[domain])) != NULL) { + vm_reserv_lock(rv); + if (rv != TAILQ_FIRST(&vm_rvq_partpop[domain])) { + vm_reserv_unlock(rv); + continue; + } vm_reserv_reclaim(rv); + vm_reserv_unlock(rv); return (TRUE); } return (FALSE); @@ -1176,14 +1266,16 @@ vm_paddr_t high, u_long alignment, vm_paddr_t boundary) { vm_paddr_t pa, size; - vm_reserv_t rv; + vm_reserv_t rv, rvn; int hi, i, lo, low_index, next_free; - vm_domain_free_assert_locked(VM_DOMAIN(domain)); if (npages > VM_LEVEL_0_NPAGES - 1) return (FALSE); size = npages << PAGE_SHIFT; - TAILQ_FOREACH(rv, &vm_rvq_partpop[domain], partpopq) { + vm_reserv_domain_lock(domain); +again: + for (rv = TAILQ_FIRST(&vm_rvq_partpop[domain]); rv != NULL; rv = rvn) { + rvn = TAILQ_NEXT(rv, partpopq); pa = VM_PAGE_TO_PHYS(&rv->pages[VM_LEVEL_0_NPAGES - 1]); if (pa + PAGE_SIZE - size < low) { /* This entire reservation is too low; go to next. */ @@ -1194,6 +1286,17 @@ /* This entire reservation is too high; go to next. */ continue; } + if (vm_reserv_trylock(rv) == 0) { + vm_reserv_domain_unlock(domain); + vm_reserv_lock(rv); + if (!rv->inpartpopq) { + vm_reserv_domain_lock(domain); + if (!rvn->inpartpopq) + goto again; + continue; + } + } else + vm_reserv_domain_unlock(domain); if (pa < low) { /* Start the search for free pages at "low". */ low_index = (low + PAGE_MASK - pa) >> PAGE_SHIFT; @@ -1239,6 +1342,7 @@ if ((NBPOPMAP * i - next_free) * PAGE_SIZE >= size) { vm_reserv_reclaim(rv); + vm_reserv_unlock(rv); return (TRUE); } hi = ffsl(rv->popmap[i]); @@ -1249,10 +1353,16 @@ if ((NBPOPMAP * i + hi - next_free) * PAGE_SIZE >= size) { vm_reserv_reclaim(rv); + vm_reserv_unlock(rv); return (TRUE); } } while (i < NPOPMAP); + vm_reserv_unlock(rv); + vm_reserv_domain_lock(domain); + if (rvn != NULL && !rvn->inpartpopq) + goto again; } + vm_reserv_domain_unlock(domain); return (FALSE); } @@ -1270,7 +1380,11 @@ VM_OBJECT_ASSERT_WLOCKED(new_object); rv = vm_reserv_from_page(m); if (rv->object == old_object) { - vm_domain_free_lock(VM_DOMAIN(rv->domain)); + vm_reserv_lock(rv); + CTR6(KTR_VM, + "%s: rv %p object %p new %p popcnt %d inpartpop %d", + __FUNCTION__, rv, rv->object, new_object, rv->popcnt, + rv->inpartpopq); if (rv->object == old_object) { vm_reserv_object_lock(old_object); rv->object = NULL; @@ -1282,7 +1396,7 @@ LIST_INSERT_HEAD(&new_object->rvq, rv, objq); vm_reserv_object_unlock(new_object); } - vm_domain_free_unlock(VM_DOMAIN(rv->domain)); + vm_reserv_unlock(rv); } } @@ -1312,7 +1426,6 @@ { vm_paddr_t new_end; size_t size; - int i; /* * Calculate the size (in bytes) of the reservation array. Round up @@ -1332,10 +1445,6 @@ VM_PROT_READ | VM_PROT_WRITE); bzero(vm_reserv_array, size); - for (i = 0; i < VM_RESERV_OBJ_LOCK_COUNT; i++) - mtx_init(&vm_reserv_object_mtx[i], "resv obj lock", NULL, - MTX_DEF); - /* * Return the next available physical address. */ @@ -1343,6 +1452,21 @@ } /* + * Initializes the reservation management system. Specifically, initializes + * the reservation counters. + */ +static void +vm_reserv_counter_init(void *unused) +{ + + vm_reserv_freed = counter_u64_alloc(M_WAITOK); + vm_reserv_broken = counter_u64_alloc(M_WAITOK); + vm_reserv_reclaimed = counter_u64_alloc(M_WAITOK); +} +SYSINIT(vm_reserv_counter_init, SI_SUB_CPU, SI_ORDER_ANY, + vm_reserv_counter_init, NULL); + +/* * Returns the superpage containing the given page. */ vm_page_t @@ -1352,8 +1476,12 @@ VM_OBJECT_ASSERT_LOCKED(m->object); rv = vm_reserv_from_page(m); - return (rv->object == m->object && rv->popcnt == VM_LEVEL_0_NPAGES ? - rv->pages : NULL); + if (rv->object == m->object && rv->popcnt == VM_LEVEL_0_NPAGES) + m = rv->pages; + else + m = NULL; + + return (m); } #endif /* VM_NRESERVLEVEL > 0 */