Index: sys/kern/subr_witness.c =================================================================== --- sys/kern/subr_witness.c +++ sys/kern/subr_witness.c @@ -136,7 +136,7 @@ #define WITNESS_COUNT 1536 #endif #define WITNESS_HASH_SIZE 251 /* Prime, gives load factor < 2 */ -#define WITNESS_PENDLIST (1280 + MAXCPU) +#define WITNESS_PENDLIST (1536 + MAXCPU) /* Allocate 256 KB of stack data space */ #define WITNESS_LO_DATA_COUNT 2048 Index: sys/sys/vmmeter.h =================================================================== --- sys/sys/vmmeter.h +++ sys/sys/vmmeter.h @@ -96,6 +96,7 @@ u_int v_active_count; /* (q) pages active */ u_int v_inactive_target; /* (c) pages desired inactive */ u_int v_inactive_count; /* (q) pages inactive */ + u_int v_inactive_deferred_count; /* (a) pages inactive */ u_int v_cache_count; /* (f) pages on cache queue */ u_int v_cache_min; /* (c) min pages desired on cache queue */ u_int v_cache_max; /* (c) max pages in cached obj (unused) */ Index: sys/vm/vm_meter.c =================================================================== --- sys/vm/vm_meter.c +++ sys/vm/vm_meter.c @@ -315,6 +315,7 @@ VM_STATS_VM(v_active_count, "Active pages"); VM_STATS_VM(v_inactive_target, "Desired inactive pages"); VM_STATS_VM(v_inactive_count, "Inactive pages"); +VM_STATS_VM(v_inactive_deferred_count, "Inactive pages on a PA deferred queue"); VM_STATS_VM(v_cache_count, "Pages on cache queue"); VM_STATS_VM(v_cache_min, "Min pages on cache queue"); VM_STATS_VM(v_cache_max, "Max pages on cached queue"); Index: sys/vm/vm_page.h =================================================================== --- sys/vm/vm_page.h +++ sys/vm/vm_page.h @@ -185,7 +185,7 @@ * The algorithm is taken mostly by rwlock(9) and sx(9) locks implementation, * even if the support for owner identity is removed because of size * constraints. Checks on lock recursion are then not possible, while the - * lock assertions effectiveness is someway reduced. + * lock assertions effectiveness is somewhat reduced. */ #define VPB_BIT_SHARED 0x01 #define VPB_BIT_EXCLUSIVE 0x02 @@ -208,6 +208,10 @@ #define PQ_ACTIVE 1 #define PQ_COUNT 2 +#include /* PA_LOCK_COUNT */ + +#define VM_ASSERT(exp) KASSERT(exp, (#exp)) + TAILQ_HEAD(pglist, vm_page); SLIST_HEAD(spglist, vm_page); @@ -221,7 +225,7 @@ struct vm_domain { - struct vm_pagequeue vmd_pagequeues[PQ_COUNT]; + struct vm_pagequeue vmd_pagequeues[PQ_COUNT + PA_LOCK_COUNT]; u_int vmd_page_count; u_int vmd_free_count; long vmd_segs; /* bitmask of the segments */ @@ -229,6 +233,8 @@ int vmd_pass; /* local pagedaemon pass */ struct vm_page vmd_marker; /* marker for pagedaemon private use */ }; +#define vm_page_queue_idx(m) (PQ_COUNT + (pa_index(VM_PAGE_TO_PHYS((m))) % PA_LOCK_COUNT)) + extern struct vm_domain vm_dom[MAXMEMDOM]; @@ -241,9 +247,6 @@ vm_pagequeue_cnt_add(struct vm_pagequeue *pq, int addend) { -#ifdef notyet - vm_pagequeue_assert_locked(pq); -#endif pq->pq_cnt += addend; atomic_add_int(pq->pq_vcnt, addend); } @@ -321,7 +324,7 @@ * Page flags. If changed at any other time than page allocation or * freeing, the modification must be protected by the vm_page lock. */ -#define PG_UNUSED1 0x0001 /* unused - was PG_CACHED */ +#define PG_PAQUEUE 0x0001 /* page has inactivation pending */ #define PG_FICTITIOUS 0x0004 /* physical page doesn't exist */ #define PG_ZERO 0x0008 /* page is zeroed */ #define PG_MARKER 0x0010 /* special queue marker page */ @@ -457,6 +460,7 @@ vm_page_t vm_page_next(vm_page_t m); int vm_page_pa_tryrelock(pmap_t, vm_paddr_t, vm_paddr_t *); struct vm_pagequeue *vm_page_pagequeue(vm_page_t m); +struct vm_pagequeue *vm_page_pagequeue_deferred(vm_page_t m); vm_page_t vm_page_prev(vm_page_t m); boolean_t vm_page_ps_is_valid(vm_page_t m); void vm_page_putfake(vm_page_t m); @@ -489,6 +493,7 @@ void vm_page_free_toq(vm_page_t m); void vm_page_zero_idle_wakeup(void); +int vm_page_queue_fixup(struct vm_domain *vmd); void vm_page_dirty_KBI(vm_page_t m); void vm_page_lock_KBI(vm_page_t m, const char *file, int line); void vm_page_unlock_KBI(vm_page_t m, const char *file, int line); Index: sys/vm/vm_page.c =================================================================== --- sys/vm/vm_page.c +++ sys/vm/vm_page.c @@ -169,6 +169,7 @@ #endif #endif +#define PAQLENTHRESH 16 struct vm_page_percpu { struct mtx vpp_lock; @@ -360,11 +361,18 @@ vmd->vmd_segs = 0; vmd->vmd_oom = FALSE; vmd->vmd_pass = 0; - for (i = 0; i < PQ_COUNT; i++) { + for (i = 0; i < PQ_COUNT + PA_LOCK_COUNT; i++) { pq = &vmd->vmd_pagequeues[i]; TAILQ_INIT(&pq->pq_pl); mtx_init(&pq->pq_mutex, pq->pq_name, "vm pagequeue", - MTX_DEF | MTX_DUPOK); + MTX_DEF | MTX_DUPOK); + if (i >= PQ_COUNT) { + pq->pq_cnt = 0; + *__DECONST(char **, &vmd->vmd_pagequeues[i].pq_name) = + "vm inactive pagequeue"; + *__DECONST(int **, &vmd->vmd_pagequeues[i].pq_vcnt) = + &vm_cnt.v_inactive_count; + } } } @@ -1933,8 +1941,23 @@ struct vm_pagequeue * vm_page_pagequeue(vm_page_t m) { + int queue = m->queue; + struct vm_domain *dom = vm_phys_domain(m); + + return (&dom->vmd_pagequeues[queue]); +} + +struct vm_pagequeue * +vm_page_pagequeue_deferred(vm_page_t m) +{ + int queue = m->queue; + struct vm_domain *dom = vm_phys_domain(m); - return (&vm_phys_domain(m)->vmd_pagequeues[m->queue]); + vm_page_lock_assert(m, MA_OWNED); + if ((queue == PQ_INACTIVE) && (m->flags & PG_PAQUEUE)) + return (&dom->vmd_pagequeues[vm_page_queue_idx(m)]); + else + return (&dom->vmd_pagequeues[queue]); } /* @@ -1952,12 +1975,19 @@ vm_page_assert_locked(m); KASSERT(m->queue < PQ_COUNT, ("vm_page_dequeue: page %p is not queued", m)); - pq = vm_page_pagequeue(m); - vm_pagequeue_lock(pq); + pq = vm_page_pagequeue_deferred(m); + if (m->flags & PG_PAQUEUE) { + TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); + vm_pagequeue_cnt_dec(pq); + m->flags &= ~PG_PAQUEUE; + atomic_add_int(&vm_cnt.v_inactive_deferred_count, -1); + } else { + vm_pagequeue_lock(pq); + TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); + vm_pagequeue_cnt_dec(pq); + vm_pagequeue_unlock(pq); + } m->queue = PQ_NONE; - TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); - vm_pagequeue_cnt_dec(pq); - vm_pagequeue_unlock(pq); } /* @@ -1973,8 +2003,12 @@ struct vm_pagequeue *pq; vm_page_lock_assert(m, MA_OWNED); - pq = vm_page_pagequeue(m); - vm_pagequeue_assert_locked(pq); + pq = vm_page_pagequeue_deferred(m); + if (m->flags & PG_PAQUEUE) { + m->flags &= ~PG_PAQUEUE; + atomic_add_int(&vm_cnt.v_inactive_deferred_count, -1); + } else + vm_pagequeue_assert_locked(pq); m->queue = PQ_NONE; TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); vm_pagequeue_cnt_dec(pq); @@ -1996,12 +2030,23 @@ KASSERT(queue < PQ_COUNT, ("vm_page_enqueue: invalid queue %u request for page %p", queue, m)); - pq = &vm_phys_domain(m)->vmd_pagequeues[queue]; - vm_pagequeue_lock(pq); - m->queue = queue; - TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); - vm_pagequeue_cnt_inc(pq); - vm_pagequeue_unlock(pq); + if (queue == PQ_INACTIVE) { + /* look up deferred queue */ + pq = &vm_phys_domain(m)->vmd_pagequeues[vm_page_queue_idx(m)]; + m->queue = queue; + /* mark page as on physically addressed deferred queue */ + m->flags |= PG_PAQUEUE; + TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); + vm_pagequeue_cnt_inc(pq); + atomic_add_int(&vm_cnt.v_inactive_deferred_count, 1); + } else { + pq = &vm_phys_domain(m)->vmd_pagequeues[queue]; + vm_pagequeue_lock(pq); + m->queue = queue; + TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); + vm_pagequeue_cnt_inc(pq); + vm_pagequeue_unlock(pq); + } } /* @@ -2019,11 +2064,16 @@ vm_page_lock_assert(m, MA_OWNED); KASSERT(m->queue != PQ_NONE, ("vm_page_requeue: page %p is not queued", m)); - pq = vm_page_pagequeue(m); - vm_pagequeue_lock(pq); - TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); - TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); - vm_pagequeue_unlock(pq); + pq = vm_page_pagequeue_deferred(m); + if (m->flags & PG_PAQUEUE) { + TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); + TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); + } else { + vm_pagequeue_lock(pq); + TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); + TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); + vm_pagequeue_unlock(pq); + } } /* @@ -2039,13 +2089,57 @@ struct vm_pagequeue *pq; KASSERT(m->queue != PQ_NONE, - ("vm_page_requeue_locked: page %p is not queued", m)); + ("vm_page_requeue_locked: page %p is not queued", m)); + /* the page lock isn't held and the page isn't on + * the inactive queue it should be moved by fixup + */ + if (m->flags & PG_PAQUEUE) + return; pq = vm_page_pagequeue(m); vm_pagequeue_assert_locked(pq); TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); } +int +vm_page_queue_fixup(struct vm_domain *vmd) +{ + int i, merged; +#ifdef INVARIANTS + int _cnt; +#endif + struct vm_pagequeue *vpq, *lvpq; + struct mtx *qlock; + vm_page_t m1; + + vpq = &vmd->vmd_pagequeues[PQ_INACTIVE]; + merged = 0; + for (i = 0, lvpq = &vmd->vmd_pagequeues[PQ_COUNT]; i < PA_LOCK_COUNT; lvpq++, i++) { + qlock = (struct mtx *)&pa_lock[i]; + if (lvpq->pq_cnt < PAQLENTHRESH) + continue; + if (!mtx_trylock(qlock)) + continue; + _cnt = 0; + TAILQ_FOREACH(m1, &lvpq->pq_pl, plinks.q) { +#ifdef INVARIANTS + _cnt++; + VM_ASSERT(m1->queue == PQ_INACTIVE); + VM_ASSERT((m1->flags & PG_PAQUEUE) != 0); +#endif + m1->flags &= ~PG_PAQUEUE; + } + VM_ASSERT(_cnt == lvpq->pq_cnt); + TAILQ_CONCAT(&vpq->pq_pl, &lvpq->pq_pl, plinks.q); + vpq->pq_cnt += lvpq->pq_cnt; + merged += lvpq->pq_cnt; + atomic_add_int(&vm_cnt.v_inactive_deferred_count, -lvpq->pq_cnt); + lvpq->pq_cnt = 0; + mtx_unlock(qlock); + } + return (merged); +} + /* * vm_page_activate: * @@ -2319,16 +2413,15 @@ if (queue != PQ_NONE) vm_page_dequeue(m); m->flags &= ~PG_WINATCFLS; - pq = &vm_phys_domain(m)->vmd_pagequeues[PQ_INACTIVE]; - vm_pagequeue_lock(pq); + m->flags |= PG_PAQUEUE; m->queue = PQ_INACTIVE; - + pq = &vm_phys_domain(m)->vmd_pagequeues[vm_page_queue_idx(m)]; if (athead) TAILQ_INSERT_HEAD(&pq->pq_pl, m, plinks.q); else TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); vm_pagequeue_cnt_inc(pq); - vm_pagequeue_unlock(pq); + atomic_add_int(&vm_cnt.v_inactive_deferred_count, 1); } } Index: sys/vm/vm_pageout.c =================================================================== --- sys/vm/vm_pageout.c +++ sys/vm/vm_pageout.c @@ -271,6 +271,7 @@ u_short queue; vm_object_t object; + VM_ASSERT((m->flags & PG_PAQUEUE) == 0); queue = m->queue; vm_pageout_init_marker(&marker, queue); pq = vm_page_pagequeue(m); @@ -313,6 +314,7 @@ if (vm_page_trylock(m)) return (TRUE); + VM_ASSERT((m->flags & PG_PAQUEUE) == 0); queue = m->queue; vm_pageout_init_marker(&marker, queue); pq = vm_page_pagequeue(m); @@ -1080,7 +1082,7 @@ vm_page_t m, next; struct vm_pagequeue *pq; vm_object_t object; - int act_delta, addl_page_shortage, deficit, maxscan, page_shortage; + int act_delta, addl_page_shortage, deficit, maxscan, page_shortage, merged; int vnodes_skipped = 0; int maxlaunder; boolean_t queues_locked; @@ -1147,6 +1149,11 @@ pq = &vmd->vmd_pagequeues[PQ_INACTIVE]; maxscan = pq->pq_cnt; vm_pagequeue_lock(pq); +rescan: + /* try to move as many pages as possible from deferred inactive to + * inactive + */ + merged = vm_page_queue_fixup(vmd); queues_locked = TRUE; for (m = TAILQ_FIRST(&pq->pq_pl); m != NULL && maxscan-- > 0 && page_shortage > 0; @@ -1348,6 +1355,11 @@ next = TAILQ_NEXT(&vmd->vmd_marker, plinks.q); TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_marker, plinks.q); } + /* we still need pages and we were able to collect inactive pages + * last time so we should try again. + */ + if (page_shortage && maxscan && merged) + goto rescan; vm_pagequeue_unlock(pq); #if !defined(NO_SWAPPING)