Index: sys/sys/vmmeter.h =================================================================== --- sys/sys/vmmeter.h +++ sys/sys/vmmeter.h @@ -96,6 +96,7 @@ u_int v_active_count; /* (q) pages active */ u_int v_inactive_target; /* (c) pages desired inactive */ u_int v_inactive_count; /* (q) pages inactive */ + u_int v_inactive_deferred_count; /* (a) pages inactive */ u_int v_cache_count; /* (f) pages on cache queue */ u_int v_cache_min; /* (c) min pages desired on cache queue */ u_int v_cache_max; /* (c) max pages in cached obj (unused) */ Index: sys/vm/vm_meter.c =================================================================== --- sys/vm/vm_meter.c +++ sys/vm/vm_meter.c @@ -315,6 +315,7 @@ VM_STATS_VM(v_active_count, "Active pages"); VM_STATS_VM(v_inactive_target, "Desired inactive pages"); VM_STATS_VM(v_inactive_count, "Inactive pages"); +VM_STATS_VM(v_inactive_deferred_count, "Inactive pages on a PA deferred queue"); VM_STATS_VM(v_cache_count, "Pages on cache queue"); VM_STATS_VM(v_cache_min, "Min pages on cache queue"); VM_STATS_VM(v_cache_max, "Max pages on cached queue"); Index: sys/vm/vm_page.h =================================================================== --- sys/vm/vm_page.h +++ sys/vm/vm_page.h @@ -185,7 +185,7 @@ * The algorithm is taken mostly by rwlock(9) and sx(9) locks implementation, * even if the support for owner identity is removed because of size * constraints. Checks on lock recursion are then not possible, while the - * lock assertions effectiveness is someway reduced. + * lock assertions effectiveness is somewhat reduced. */ #define VPB_BIT_SHARED 0x01 #define VPB_BIT_EXCLUSIVE 0x02 @@ -208,6 +208,10 @@ #define PQ_ACTIVE 1 #define PQ_COUNT 2 +#include /* PA_LOCK_COUNT */ + +#define VM_ASSERT(exp) KASSERT(exp, (#exp)) + TAILQ_HEAD(pglist, vm_page); SLIST_HEAD(spglist, vm_page); @@ -221,7 +225,7 @@ struct vm_domain { - struct vm_pagequeue vmd_pagequeues[PQ_COUNT]; + struct vm_pagequeue vmd_pagequeues[PQ_COUNT + PA_LOCK_COUNT]; u_int vmd_page_count; u_int vmd_free_count; long vmd_segs; /* bitmask of the segments */ @@ -229,11 +233,14 @@ int vmd_pass; /* local pagedaemon pass */ struct vm_page vmd_marker; /* marker for pagedaemon private use */ }; +#define vm_page_queue_idx(m) \ + (PQ_COUNT + (pa_index(VM_PAGE_TO_PHYS((m))) % PA_LOCK_COUNT)) extern struct vm_domain vm_dom[MAXMEMDOM]; #define vm_pagequeue_assert_locked(pq) mtx_assert(&(pq)->pq_mutex, MA_OWNED) #define vm_pagequeue_lock(pq) mtx_lock(&(pq)->pq_mutex) +#define vm_pagequeue_trylock(pq) mtx_trylock(&(pq)->pq_mutex) #define vm_pagequeue_unlock(pq) mtx_unlock(&(pq)->pq_mutex) #ifdef _KERNEL @@ -241,9 +248,6 @@ vm_pagequeue_cnt_add(struct vm_pagequeue *pq, int addend) { -#ifdef notyet - vm_pagequeue_assert_locked(pq); -#endif pq->pq_cnt += addend; atomic_add_int(pq->pq_vcnt, addend); } @@ -322,6 +326,7 @@ * freeing, the modification must be protected by the vm_page lock. */ #define PG_CACHED 0x0001 /* page is cached */ +#define PG_PAQUEUE 0x0002 /* page has inactivation pending */ #define PG_FICTITIOUS 0x0004 /* physical page doesn't exist */ #define PG_ZERO 0x0008 /* page is zeroed */ #define PG_MARKER 0x0010 /* special queue marker page */ @@ -461,6 +466,7 @@ vm_page_t vm_page_next(vm_page_t m); int vm_page_pa_tryrelock(pmap_t, vm_paddr_t, vm_paddr_t *); struct vm_pagequeue *vm_page_pagequeue(vm_page_t m); +struct vm_pagequeue *vm_page_pagequeue_deferred(vm_page_t m); vm_page_t vm_page_prev(vm_page_t m); boolean_t vm_page_ps_is_valid(vm_page_t m); void vm_page_putfake(vm_page_t m); @@ -493,6 +499,7 @@ void vm_page_free_toq(vm_page_t m); void vm_page_zero_idle_wakeup(void); +int vm_page_queue_fixup(struct vm_page *m); void vm_page_dirty_KBI(vm_page_t m); void vm_page_lock_KBI(vm_page_t m, const char *file, int line); void vm_page_unlock_KBI(vm_page_t m, const char *file, int line); Index: sys/vm/vm_page.c =================================================================== --- sys/vm/vm_page.c +++ sys/vm/vm_page.c @@ -264,14 +264,35 @@ vmd->vmd_segs = 0; vmd->vmd_oom = FALSE; vmd->vmd_pass = 0; - for (i = 0; i < PQ_COUNT; i++) { + for (i = 0; i < PQ_COUNT + PA_LOCK_COUNT; i++) { pq = &vmd->vmd_pagequeues[i]; TAILQ_INIT(&pq->pq_pl); mtx_init(&pq->pq_mutex, pq->pq_name, "vm pagequeue", - MTX_DEF | MTX_DUPOK); + MTX_DEF | MTX_DUPOK); + if (i >= PQ_COUNT) { + pq->pq_cnt = 0; + *__DECONST(char **, &vmd->vmd_pagequeues[i].pq_name) = + "vm inactive pagequeue"; + *__DECONST(int **, &vmd->vmd_pagequeues[i].pq_vcnt) = + &vm_cnt.v_inactive_count; + } } } +#define PAQLENTHRESH_SMALL_LWM 4 +#define PAQLENTHRESH_MEDIUM_LWM 16 +#define PAQLENTHRESH_LARGE_LWM 64 + +#define PAQLENTHRESH_SMALL_HWM 6 +#define PAQLENTHRESH_MEDIUM_HWM 24 +#define PAQLENTHRESH_LARGE_HWM 80 + +#define VM_PAGES_SMALL (1<<18) +#define VM_PAGES_MEDIUM (1<<21) + +static int vm_paqlenthresh_lwm; +static int vm_paqlenthresh_hwm; + /* * vm_page_startup: * @@ -497,6 +518,16 @@ * Initialize the reservation management system. */ vm_reserv_init(); + if (vm_page_array_size < VM_PAGES_SMALL) { + vm_paqlenthresh_lwm = PAQLENTHRESH_SMALL_LWM; + vm_paqlenthresh_hwm = PAQLENTHRESH_SMALL_HWM; + } else if (vm_page_array_size < VM_PAGES_MEDIUM) { + vm_paqlenthresh_lwm = PAQLENTHRESH_MEDIUM_LWM; + vm_paqlenthresh_hwm = PAQLENTHRESH_MEDIUM_HWM; + } else { + vm_paqlenthresh_lwm = PAQLENTHRESH_LARGE_LWM; + vm_paqlenthresh_hwm = PAQLENTHRESH_LARGE_HWM; + } #endif return (vaddr); } @@ -2035,8 +2066,75 @@ struct vm_pagequeue * vm_page_pagequeue(vm_page_t m) { + int queue = m->queue; + struct vm_domain *dom = vm_phys_domain(m); + + return (&dom->vmd_pagequeues[queue]); +} + +struct vm_pagequeue * +vm_page_pagequeue_deferred(vm_page_t m) +{ + int queue = m->queue; + struct vm_domain *dom = vm_phys_domain(m); + + vm_page_lock_assert(m, MA_OWNED); + if ((queue == PQ_INACTIVE) && (m->flags & PG_PAQUEUE)) + return (&dom->vmd_pagequeues[vm_page_queue_idx(m)]); + else + return (&dom->vmd_pagequeues[queue]); +} + +int +vm_page_queue_fixup(struct vm_page *m) +{ + int merged, _cnt, was_locked; + struct vm_pagequeue *vpq, *lvpq; + struct vm_domain *vmd; + vm_page_t m1; + + vmd = vm_phys_domain(m); + vm_page_lock_assert(m, MA_OWNED); + + vpq = &vmd->vmd_pagequeues[PQ_INACTIVE]; + was_locked = mtx_owned(&vpq->pq_mutex); + + merged = 0; + lvpq = &vmd->vmd_pagequeues[vm_page_queue_idx(m)]; + + if (lvpq->pq_cnt < vm_paqlenthresh_lwm) { + return 0; + } else if (lvpq->pq_cnt < vm_paqlenthresh_hwm && !was_locked) { + if (!vm_pagequeue_trylock(vpq)) + return 0; + } else if (!was_locked) + vm_pagequeue_lock(vpq); + + _cnt = 0; + TAILQ_FOREACH(m1, &lvpq->pq_pl, plinks.q) { +#ifdef INVARIANTS + _cnt++; + VM_ASSERT(m1->queue == PQ_INACTIVE); + VM_ASSERT((m1->flags & PG_PAQUEUE) != 0); +#endif + m1->flags &= ~PG_PAQUEUE; + } +#ifdef INVARIANTS + VM_ASSERT(_cnt == lvpq->pq_cnt); +#endif + + TAILQ_CONCAT(&vpq->pq_pl, &lvpq->pq_pl, plinks.q); + vpq->pq_cnt += lvpq->pq_cnt; + merged += lvpq->pq_cnt; + atomic_add_int(&vm_cnt.v_inactive_deferred_count, + -lvpq->pq_cnt); + + if (!was_locked) + vm_pagequeue_unlock(vpq); - return (&vm_phys_domain(m)->vmd_pagequeues[m->queue]); + lvpq->pq_cnt = 0; + + return (merged); } /* @@ -2054,12 +2152,19 @@ vm_page_assert_locked(m); KASSERT(m->queue < PQ_COUNT, ("vm_page_dequeue: page %p is not queued", m)); - pq = vm_page_pagequeue(m); - vm_pagequeue_lock(pq); + pq = vm_page_pagequeue_deferred(m); + if (m->flags & PG_PAQUEUE) { + TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); + vm_pagequeue_cnt_dec(pq); + m->flags &= ~PG_PAQUEUE; + atomic_add_int(&vm_cnt.v_inactive_deferred_count, -1); + } else { + vm_pagequeue_lock(pq); + TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); + vm_pagequeue_cnt_dec(pq); + vm_pagequeue_unlock(pq); + } m->queue = PQ_NONE; - TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); - vm_pagequeue_cnt_dec(pq); - vm_pagequeue_unlock(pq); } /* @@ -2075,8 +2180,12 @@ struct vm_pagequeue *pq; vm_page_lock_assert(m, MA_OWNED); - pq = vm_page_pagequeue(m); - vm_pagequeue_assert_locked(pq); + pq = vm_page_pagequeue_deferred(m); + if (m->flags & PG_PAQUEUE) { + m->flags &= ~PG_PAQUEUE; + atomic_add_int(&vm_cnt.v_inactive_deferred_count, -1); + } else + vm_pagequeue_assert_locked(pq); m->queue = PQ_NONE; TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); vm_pagequeue_cnt_dec(pq); @@ -2098,12 +2207,25 @@ KASSERT(queue < PQ_COUNT, ("vm_page_enqueue: invalid queue %u request for page %p", queue, m)); - pq = &vm_phys_domain(m)->vmd_pagequeues[queue]; - vm_pagequeue_lock(pq); - m->queue = queue; - TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); - vm_pagequeue_cnt_inc(pq); - vm_pagequeue_unlock(pq); + if (queue == PQ_INACTIVE) { + /* look up deferred queue */ + pq = &vm_phys_domain(m)->vmd_pagequeues[vm_page_queue_idx(m)]; + m->queue = queue; + /* mark page as on physically addressed deferred queue */ + m->flags |= PG_PAQUEUE; + TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); + vm_pagequeue_cnt_inc(pq); + atomic_fetchadd_int(&vm_cnt.v_inactive_deferred_count, 1); + if (pq->pq_cnt >= vm_paqlenthresh_lwm) + vm_page_queue_fixup(m); + } else { + pq = &vm_phys_domain(m)->vmd_pagequeues[queue]; + vm_pagequeue_lock(pq); + m->queue = queue; + TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); + vm_pagequeue_cnt_inc(pq); + vm_pagequeue_unlock(pq); + } } /* @@ -2121,11 +2243,16 @@ vm_page_lock_assert(m, MA_OWNED); KASSERT(m->queue != PQ_NONE, ("vm_page_requeue: page %p is not queued", m)); - pq = vm_page_pagequeue(m); - vm_pagequeue_lock(pq); - TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); - TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); - vm_pagequeue_unlock(pq); + pq = vm_page_pagequeue_deferred(m); + if (m->flags & PG_PAQUEUE) { + TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); + TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); + } else { + vm_pagequeue_lock(pq); + TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); + TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); + vm_pagequeue_unlock(pq); + } } /* @@ -2141,7 +2268,12 @@ struct vm_pagequeue *pq; KASSERT(m->queue != PQ_NONE, - ("vm_page_requeue_locked: page %p is not queued", m)); + ("vm_page_requeue_locked: page %p is not queued", m)); + /* the page lock isn't held and the page isn't on + * the inactive queue it should be moved by fixup + */ + if (m->flags & PG_PAQUEUE) + return; pq = vm_page_pagequeue(m); vm_pagequeue_assert_locked(pq); TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); @@ -2431,15 +2563,23 @@ if (queue != PQ_NONE) vm_page_dequeue(m); m->flags &= ~PG_WINATCFLS; - pq = &vm_phys_domain(m)->vmd_pagequeues[PQ_INACTIVE]; - vm_pagequeue_lock(pq); m->queue = PQ_INACTIVE; - if (athead) + if (athead) { + pq = &vm_phys_domain(m)->vmd_pagequeues[PQ_INACTIVE]; + vm_pagequeue_lock(pq); TAILQ_INSERT_HEAD(&pq->pq_pl, m, plinks.q); - else + vm_pagequeue_cnt_inc(pq); + vm_pagequeue_unlock(pq); + } else { + m->flags |= PG_PAQUEUE; + pq = &vm_phys_domain(m)->vmd_pagequeues[ + vm_page_queue_idx(m)]; TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); - vm_pagequeue_cnt_inc(pq); - vm_pagequeue_unlock(pq); + vm_pagequeue_cnt_inc(pq); + atomic_add_int(&vm_cnt.v_inactive_deferred_count, 1); + if (pq->pq_cnt > vm_paqlenthresh_lwm) + vm_page_queue_fixup(m); + } } } Index: sys/vm/vm_pageout.c =================================================================== --- sys/vm/vm_pageout.c +++ sys/vm/vm_pageout.c @@ -280,6 +280,7 @@ u_short queue; vm_object_t object; + VM_ASSERT((m->flags & PG_PAQUEUE) == 0); queue = m->queue; vm_pageout_init_marker(&marker, queue); pq = vm_page_pagequeue(m); @@ -322,6 +323,7 @@ if (vm_page_trylock(m)) return (TRUE); + VM_ASSERT((m->flags & PG_PAQUEUE) == 0); queue = m->queue; vm_pageout_init_marker(&marker, queue); pq = vm_page_pagequeue(m); @@ -1127,6 +1129,7 @@ vm_page_unlock(m); continue; } + vm_page_queue_fixup(m); object = m->object; if (!VM_OBJECT_TRYWLOCK(object) && !vm_pageout_fallback_object_lock(m, &next)) {