Page MenuHomeFreeBSD

D14893.id40876.diff
No OneTemporary

D14893.id40876.diff

Index: sys/amd64/include/vmparam.h
===================================================================
--- sys/amd64/include/vmparam.h
+++ sys/amd64/include/vmparam.h
@@ -227,4 +227,10 @@
#define ZERO_REGION_SIZE (2 * 1024 * 1024) /* 2MB */
+/*
+ * Use a fairly large batch size since we expect amd64 systems to have lots of
+ * memory.
+ */
+#define VM_BATCHQUEUE_SIZE 31
+
#endif /* _MACHINE_VMPARAM_H_ */
Index: sys/kern/subr_witness.c
===================================================================
--- sys/kern/subr_witness.c
+++ sys/kern/subr_witness.c
@@ -601,7 +601,6 @@
* CDEV
*/
{ "vm map (system)", &lock_class_mtx_sleep },
- { "vm pagequeue", &lock_class_mtx_sleep },
{ "vnode interlock", &lock_class_mtx_sleep },
{ "cdev", &lock_class_mtx_sleep },
{ NULL, NULL },
@@ -611,11 +610,11 @@
{ "vm map (user)", &lock_class_sx },
{ "vm object", &lock_class_rw },
{ "vm page", &lock_class_mtx_sleep },
- { "vm pagequeue", &lock_class_mtx_sleep },
{ "pmap pv global", &lock_class_rw },
{ "pmap", &lock_class_mtx_sleep },
{ "pmap pv list", &lock_class_rw },
{ "vm page free queue", &lock_class_mtx_sleep },
+ { "vm pagequeue", &lock_class_mtx_sleep },
{ NULL, NULL },
/*
* kqueue/VFS interaction
Index: sys/vm/vm_object.c
===================================================================
--- sys/vm/vm_object.c
+++ sys/vm/vm_object.c
@@ -721,14 +721,11 @@
vm_object_terminate_pages(vm_object_t object)
{
vm_page_t p, p_next;
- struct mtx *mtx, *mtx1;
- struct vm_pagequeue *pq, *pq1;
- int dequeued;
+ struct mtx *mtx;
VM_OBJECT_ASSERT_WLOCKED(object);
mtx = NULL;
- pq = NULL;
/*
* Free any remaining pageable pages. This also removes them from the
@@ -738,60 +735,23 @@
*/
TAILQ_FOREACH_SAFE(p, &object->memq, listq, p_next) {
vm_page_assert_unbusied(p);
- if ((object->flags & OBJ_UNMANAGED) == 0) {
+ if ((object->flags & OBJ_UNMANAGED) == 0)
/*
* vm_page_free_prep() only needs the page
* lock for managed pages.
*/
- mtx1 = vm_page_lockptr(p);
- if (mtx1 != mtx) {
- if (mtx != NULL)
- mtx_unlock(mtx);
- if (pq != NULL) {
- vm_pagequeue_cnt_add(pq, dequeued);
- vm_pagequeue_unlock(pq);
- pq = NULL;
- }
- mtx = mtx1;
- mtx_lock(mtx);
- }
- }
+ vm_page_change_lock(p, &mtx);
p->object = NULL;
if (p->wire_count != 0)
- goto unlist;
+ continue;
VM_CNT_INC(v_pfree);
p->flags &= ~PG_ZERO;
- if (p->queue != PQ_NONE) {
- KASSERT(p->queue < PQ_COUNT, ("vm_object_terminate: "
- "page %p is not queued", p));
- pq1 = vm_page_pagequeue(p);
- if (pq != pq1) {
- if (pq != NULL) {
- vm_pagequeue_cnt_add(pq, dequeued);
- vm_pagequeue_unlock(pq);
- }
- pq = pq1;
- vm_pagequeue_lock(pq);
- dequeued = 0;
- }
- p->queue = PQ_NONE;
- TAILQ_REMOVE(&pq->pq_pl, p, plinks.q);
- dequeued--;
- }
- if (vm_page_free_prep(p, true))
- continue;
-unlist:
- TAILQ_REMOVE(&object->memq, p, listq);
- }
- if (pq != NULL) {
- vm_pagequeue_cnt_add(pq, dequeued);
- vm_pagequeue_unlock(pq);
+
+ vm_page_free(p);
}
if (mtx != NULL)
mtx_unlock(mtx);
- vm_page_free_phys_pglist(&object->memq);
-
/*
* If the object contained any pages, then reset it to an empty state.
* None of the object's fields, including "resident_page_count", were
@@ -1974,7 +1934,6 @@
{
vm_page_t p, next;
struct mtx *mtx;
- struct pglist pgl;
VM_OBJECT_ASSERT_WLOCKED(object);
KASSERT((object->flags & OBJ_UNMANAGED) == 0 ||
@@ -1983,7 +1942,6 @@
if (object->resident_page_count == 0)
return;
vm_object_pip_add(object, 1);
- TAILQ_INIT(&pgl);
again:
p = vm_page_find_least(object, start);
mtx = NULL;
@@ -2038,12 +1996,10 @@
if ((options & OBJPR_NOTMAPPED) == 0 && object->ref_count != 0)
pmap_remove_all(p);
p->flags &= ~PG_ZERO;
- if (vm_page_free_prep(p, false))
- TAILQ_INSERT_TAIL(&pgl, p, listq);
+ vm_page_free(p);
}
if (mtx != NULL)
mtx_unlock(mtx);
- vm_page_free_phys_pglist(&pgl);
vm_object_pip_wakeup(object);
}
Index: sys/vm/vm_page.h
===================================================================
--- sys/vm/vm_page.h
+++ sys/vm/vm_page.h
@@ -94,7 +94,9 @@
* In general, operations on this structure's mutable fields are
* synchronized using either one of or a combination of the lock on the
* object that the page belongs to (O), the pool lock for the page (P),
- * or the lock for either the free or paging queue (Q). If a field is
+ * the per-domain lock for the free queues (F), or the page's queue
+ * lock (Q). The queue lock for a page depends on the value of its
+ * queue field and described in detail below. If a field is
* annotated below with two of these locks, then holding either lock is
* sufficient for read access, but both locks are required for write
* access. An annotation of (C) indicates that the field is immutable.
@@ -143,6 +145,28 @@
* causing the thread to block. vm_page_sleep_if_busy() can be used to
* sleep until the page's busy state changes, after which the caller
* must re-lookup the page and re-evaluate its state.
+ *
+ * The queue field is the index of the page queue containing the
+ * page, or PQ_NONE if the page is not enqueued. The queue lock of a
+ * page is the page queue lock corresponding to the page queue index,
+ * or the page lock (P) for the page. To modify the queue field, the
+ * queue lock for the old value of the field must be held. It is
+ * invalid for a page's queue field to transition between two distinct
+ * page queue indices. That is, when updating the queue field, either
+ * the new value or the old value must be PQ_NONE.
+ *
+ * To avoid contention on page queue locks, page queue operations
+ * (enqueue, dequeue, requeue) are batched using per-CPU queues.
+ * A deferred operation is requested by inserting an entry into a
+ * batch queue; the entry is simply a pointer to the page, and the
+ * request type is encoded in the page's aflags field using the values
+ * in PGA_QUEUE_STATE_MASK. The type-stability of struct vm_pages is
+ * crucial to this scheme since the processing of entries in a given
+ * batch queue may be deferred indefinitely. In particular, a page
+ * may be freed before its pending batch queue entries have been
+ * processed. The page lock (P) must be held to schedule a batched
+ * queue operation, and the page queue lock must be held in order to
+ * process batch queue entries for the page queue.
*/
#if PAGE_SIZE == 4096
@@ -174,7 +198,7 @@
TAILQ_ENTRY(vm_page) listq; /* pages in same object (O) */
vm_object_t object; /* which object am I in (O,P) */
vm_pindex_t pindex; /* offset into object (O,P) */
- vm_paddr_t phys_addr; /* physical address of page */
+ vm_paddr_t phys_addr; /* physical address of page (C) */
struct md_page md; /* machine dependent stuff */
u_int wire_count; /* wired down maps refs (P) */
volatile u_int busy_lock; /* busy owners lock */
@@ -182,11 +206,11 @@
uint16_t flags; /* page PG_* flags (P) */
uint8_t aflags; /* access is atomic */
uint8_t oflags; /* page VPO_* flags (O) */
- uint8_t queue; /* page queue index (P,Q) */
+ uint8_t queue; /* page queue index (Q) */
int8_t psind; /* pagesizes[] index (O) */
int8_t segind; /* vm_phys segment index (C) */
- uint8_t order; /* index of the buddy queue */
- uint8_t pool; /* vm_phys freepool index (Q) */
+ uint8_t order; /* index of the buddy queue (F) */
+ uint8_t pool; /* vm_phys freepool index (F) */
u_char act_count; /* page usage count (P) */
/* NOTE that these must support one bit per DEV_BSIZE in a page */
/* so, on normal X86 kernels, they must be at least 8 bits wide */
@@ -314,10 +338,32 @@
*
* PGA_EXECUTABLE may be set by pmap routines, and indicates that a page has
* at least one executable mapping. It is not consumed by the MI VM layer.
+ *
+ * PGA_ENQUEUED is set and cleared when a page is inserted into or removed
+ * from a page queue, respectively. It determines whether the plinks.q field
+ * of the page is valid. To set or clear this flag, the queue lock for the
+ * page must be held: the page queue lock corresponding to the page's "queue"
+ * field if its value is not PQ_NONE, and the page lock otherwise.
+ *
+ * PGA_DEQUEUE is set when the page is scheduled to be dequeued from a page
+ * queue, and cleared when the dequeue request is processed. A page may
+ * have PGA_DEQUEUE set and PGA_ENQUEUED cleared, for instance if a dequeue
+ * is requested after the page is scheduled to be enqueued but before it is
+ * actually inserted into the page queue. The page lock must be held to set
+ * this flag, and the queue lock for the page must be held to clear it.
+ *
+ * PGA_REQUEUE is set when the page is scheduled to be requeued in its page
+ * queue. The page lock must be held to set this flag, and the queue lock
+ * for the page must be held to clear it.
*/
#define PGA_WRITEABLE 0x01 /* page may be mapped writeable */
#define PGA_REFERENCED 0x02 /* page has been referenced */
#define PGA_EXECUTABLE 0x04 /* page may be mapped executable */
+#define PGA_ENQUEUED 0x08 /* page is enqueued in a page queue */
+#define PGA_DEQUEUE 0x10 /* page is due to be dequeued */
+#define PGA_REQUEUE 0x20 /* page is due to be requeued */
+
+#define PGA_QUEUE_STATE_MASK (PGA_ENQUEUED | PGA_DEQUEUE | PGA_REQUEUE)
/*
* Page flags. If changed at any other time than page allocation or
@@ -483,10 +529,10 @@
void vm_page_deactivate(vm_page_t);
void vm_page_deactivate_noreuse(vm_page_t);
void vm_page_dequeue(vm_page_t m);
+void vm_page_dequeue_lazy(vm_page_t m);
void vm_page_dequeue_locked(vm_page_t m);
vm_page_t vm_page_find_least(vm_object_t, vm_pindex_t);
-void vm_page_free_phys_pglist(struct pglist *tq);
-bool vm_page_free_prep(vm_page_t m, bool pagequeue_locked);
+bool vm_page_free_prep(vm_page_t m);
vm_page_t vm_page_getfake(vm_paddr_t paddr, vm_memattr_t memattr);
void vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr);
int vm_page_insert (vm_page_t, vm_object_t, vm_pindex_t);
Index: sys/vm/vm_page.c
===================================================================
--- sys/vm/vm_page.c
+++ sys/vm/vm_page.c
@@ -131,13 +131,11 @@
extern void uma_startup(void *, int);
extern int vmem_startup_count(void);
-/*
- * Associated with page of user-allocatable memory is a
- * page structure.
- */
-
struct vm_domain vm_dom[MAXMEMDOM];
+static DPCPU_DEFINE(struct vm_batchqueue, pqbatch[MAXMEMDOM][PQ_COUNT]);
+static DPCPU_DEFINE(struct vm_batchqueue, freeqbatch[MAXMEMDOM]);
+
struct mtx_padalign __exclusive_cache_line pa_lock[PA_LOCK_COUNT];
struct mtx_padalign __exclusive_cache_line vm_domainset_lock;
@@ -176,7 +174,7 @@
static void vm_page_alloc_check(vm_page_t m);
static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits);
-static void vm_page_enqueue(uint8_t queue, vm_page_t m);
+static void vm_page_enqueue_lazy(vm_page_t m, uint8_t queue);
static void vm_page_init(void *dummy);
static int vm_page_insert_after(vm_page_t m, vm_object_t object,
vm_pindex_t pindex, vm_page_t mpred);
@@ -1813,9 +1811,8 @@
*/
KASSERT(m != NULL, ("missing page"));
-#if VM_NRESERVLEVEL > 0
found:
-#endif
+ vm_page_dequeue(m);
vm_page_alloc_check(m);
/*
@@ -2012,8 +2009,10 @@
#if VM_NRESERVLEVEL > 0
found:
#endif
- for (m = m_ret; m < &m_ret[npages]; m++)
+ for (m = m_ret; m < &m_ret[npages]; m++) {
+ vm_page_dequeue(m);
vm_page_alloc_check(m);
+ }
/*
* Initialize the pages. Only the PG_ZERO flag is inherited.
@@ -2157,6 +2156,7 @@
goto again;
return (NULL);
}
+ vm_page_dequeue(m);
vm_page_alloc_check(m);
/*
@@ -2349,7 +2349,8 @@
vm_reserv_size(level)) - pa);
#endif
} else if (object->memattr == VM_MEMATTR_DEFAULT &&
- m->queue != PQ_NONE && !vm_page_busied(m)) {
+ m->queue != PQ_NONE &&
+ (m->aflags & PGA_DEQUEUE) == 0 && !vm_page_busied(m)) {
/*
* The page is allocated but eligible for
* relocation. Extend the current run by one
@@ -2500,7 +2501,9 @@
error = EINVAL;
else if (object->memattr != VM_MEMATTR_DEFAULT)
error = EINVAL;
- else if (m->queue != PQ_NONE && !vm_page_busied(m)) {
+ else if (m->queue != PQ_NONE &&
+ (m->aflags & PGA_DEQUEUE) == 0 &&
+ !vm_page_busied(m)) {
KASSERT(pmap_page_get_memattr(m) ==
VM_MEMATTR_DEFAULT,
("page %p has an unexpected memattr", m));
@@ -2550,7 +2553,7 @@
goto unlock;
}
KASSERT(m_new->wire_count == 0,
- ("page %p is wired", m_new));
+ ("page %p is wired", m));
/*
* Replace "m" with the new page. For
@@ -2560,9 +2563,10 @@
*/
if (object->ref_count != 0)
pmap_remove_all(m);
- m_new->aflags = m->aflags;
+ m_new->aflags = m->aflags &
+ ~PGA_QUEUE_STATE_MASK;
KASSERT(m_new->oflags == VPO_UNMANAGED,
- ("page %p is managed", m_new));
+ ("page %p is managed", m));
m_new->oflags = m->oflags & VPO_NOSYNC;
pmap_copy_page(m, m_new);
m_new->valid = m->valid;
@@ -2572,7 +2576,7 @@
vm_page_remque(m);
vm_page_replace_checked(m_new, object,
m->pindex, m);
- if (vm_page_free_prep(m, false))
+ if (vm_page_free_prep(m))
SLIST_INSERT_HEAD(&free, m,
plinks.s.ss);
@@ -2586,7 +2590,7 @@
m->flags &= ~PG_ZERO;
vm_page_remque(m);
vm_page_remove(m);
- if (vm_page_free_prep(m, false))
+ if (vm_page_free_prep(m))
SLIST_INSERT_HEAD(&free, m,
plinks.s.ss);
KASSERT(m->dirty == 0,
@@ -3029,113 +3033,288 @@
return (&vm_pagequeue_domain(m)->vmd_pagequeues[m->queue]);
}
+static struct mtx *
+vm_page_pagequeue_lockptr(vm_page_t m)
+{
+
+ if (m->queue == PQ_NONE)
+ return (NULL);
+ return (&vm_page_pagequeue(m)->pq_mutex);
+}
+
+static void
+vm_pqbatch_process(struct vm_pagequeue *pq, struct vm_batchqueue *bq,
+ uint8_t queue)
+{
+ vm_page_t m;
+ int delta;
+ uint8_t aflags;
+
+ vm_pagequeue_assert_locked(pq);
+
+ delta = 0;
+ VM_BATCHQ_FOREACH(bq, m) {
+ if (__predict_false(m->queue != queue))
+ continue;
+
+ aflags = m->aflags;
+ if ((aflags & PGA_DEQUEUE) != 0) {
+ if (__predict_true((aflags & PGA_ENQUEUED) != 0)) {
+ TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
+ delta--;
+ }
+
+ /*
+ * Synchronize with the page daemon, which may be
+ * simultaneously scanning this page with only the page
+ * lock held. We must be careful to avoid leaving the
+ * page in a state where it appears to belong to a page
+ * queue.
+ */
+ m->queue = PQ_NONE;
+ atomic_thread_fence_rel();
+ vm_page_aflag_clear(m, PGA_QUEUE_STATE_MASK);
+ } else if ((aflags & PGA_ENQUEUED) == 0) {
+ TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
+ delta++;
+ vm_page_aflag_set(m, PGA_ENQUEUED);
+ if (__predict_false((aflags & PGA_REQUEUE) != 0))
+ vm_page_aflag_clear(m, PGA_REQUEUE);
+ } else if ((aflags & PGA_REQUEUE) != 0) {
+ TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
+ TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
+ vm_page_aflag_clear(m, PGA_REQUEUE);
+ }
+ }
+ vm_batchqueue_init(bq);
+ vm_pagequeue_cnt_add(pq, delta);
+}
+
/*
- * vm_page_dequeue:
+ * vm_page_dequeue_lazy: [ internal use only ]
*
- * Remove the given page from its current page queue.
+ * Request removal of the given page from its current page
+ * queue. Physical removal from the queue may be deferred
+ * arbitrarily, and may be cancelled by later queue operations
+ * on that page.
*
* The page must be locked.
*/
void
-vm_page_dequeue(vm_page_t m)
+vm_page_dequeue_lazy(vm_page_t m)
{
+ struct vm_batchqueue *bq;
struct vm_pagequeue *pq;
+ int domain, queue;
vm_page_assert_locked(m);
- KASSERT(m->queue < PQ_COUNT, ("vm_page_dequeue: page %p is not queued",
- m));
- pq = vm_page_pagequeue(m);
- vm_pagequeue_lock(pq);
- m->queue = PQ_NONE;
- TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
- vm_pagequeue_cnt_dec(pq);
+
+ queue = m->queue;
+ if (queue == PQ_NONE)
+ return;
+ domain = vm_phys_domain(m);
+ pq = &VM_DOMAIN(domain)->vmd_pagequeues[queue];
+
+ vm_page_aflag_set(m, PGA_DEQUEUE);
+
+ critical_enter();
+ bq = DPCPU_PTR(pqbatch[domain][queue]);
+ if (vm_batchqueue_insert(bq, m)) {
+ critical_exit();
+ return;
+ }
+ if (!vm_pagequeue_trylock(pq)) {
+ critical_exit();
+ vm_pagequeue_lock(pq);
+ critical_enter();
+ bq = DPCPU_PTR(pqbatch[domain][queue]);
+ }
+ vm_pqbatch_process(pq, bq, queue);
+
+ /*
+ * The page may have been dequeued by another thread before we
+ * acquired the page queue lock. However, since we hold the
+ * page lock, the page's queue field cannot change a second
+ * time and we can safely clear PGA_DEQUEUE.
+ */
+ KASSERT(m->queue == queue || m->queue == PQ_NONE,
+ ("%s: page %p migrated between queues", __func__, m));
+ if (m->queue == queue) {
+ (void)vm_batchqueue_insert(bq, m);
+ vm_pqbatch_process(pq, bq, queue);
+ } else
+ vm_page_aflag_clear(m, PGA_DEQUEUE);
vm_pagequeue_unlock(pq);
+ critical_exit();
}
/*
* vm_page_dequeue_locked:
*
- * Remove the given page from its current page queue.
+ * Remove the page from its page queue, which must be locked.
+ * If the page lock is not held, there is no guarantee that the
+ * page will not be enqueued by another thread before this function
+ * returns. In this case, it is up to the caller to ensure that
+ * no other threads hold a reference to the page.
*
- * The page and page queue must be locked.
+ * The page queue lock must be held. If the page is not already
+ * logically dequeued, the page lock must be held as well.
*/
void
vm_page_dequeue_locked(vm_page_t m)
{
struct vm_pagequeue *pq;
- vm_page_lock_assert(m, MA_OWNED);
- pq = vm_page_pagequeue(m);
- vm_pagequeue_assert_locked(pq);
+ KASSERT(m->queue != PQ_NONE,
+ ("%s: page %p queue field is PQ_NONE", __func__, m));
+ vm_pagequeue_assert_locked(vm_page_pagequeue(m));
+ KASSERT((m->aflags & PGA_DEQUEUE) != 0 ||
+ mtx_owned(vm_page_lockptr(m)),
+ ("%s: queued unlocked page %p", __func__, m));
+
+ if ((m->aflags & PGA_ENQUEUED) != 0) {
+ pq = vm_page_pagequeue(m);
+ TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
+ vm_pagequeue_cnt_dec(pq);
+ }
+
+ /*
+ * Synchronize with the page daemon, which may be simultaneously
+ * scanning this page with only the page lock held. We must be careful
+ * to avoid leaving the page in a state where it appears to belong to a
+ * page queue.
+ */
m->queue = PQ_NONE;
- TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
- vm_pagequeue_cnt_dec(pq);
+ atomic_thread_fence_rel();
+ vm_page_aflag_clear(m, PGA_QUEUE_STATE_MASK);
+}
+
+/*
+ * vm_page_dequeue:
+ *
+ * Remove the page from whichever page queue it's in, if any.
+ * If the page lock is not held, there is no guarantee that the
+ * page will not be enqueued by another thread before this function
+ * returns. In this case, it is up to the caller to ensure that
+ * no other threads hold a reference to the page.
+ */
+void
+vm_page_dequeue(vm_page_t m)
+{
+ struct mtx *lock, *lock1;
+
+ lock = vm_page_pagequeue_lockptr(m);
+ for (;;) {
+ if (lock == NULL)
+ return;
+ mtx_lock(lock);
+ if ((lock1 = vm_page_pagequeue_lockptr(m)) == lock)
+ break;
+ mtx_unlock(lock);
+ lock = lock1;
+ }
+ KASSERT(lock == vm_page_pagequeue_lockptr(m),
+ ("%s: page %p migrated directly between queues", __func__, m));
+ vm_page_dequeue_locked(m);
+ mtx_unlock(lock);
}
/*
- * vm_page_enqueue:
+ * vm_page_enqueue_lazy:
*
- * Add the given page to the specified page queue.
+ * Schedule the given page for insertion into the specified page queue.
+ * Physical insertion of the page may be deferred indefinitely.
*
* The page must be locked.
*/
static void
-vm_page_enqueue(uint8_t queue, vm_page_t m)
+vm_page_enqueue_lazy(vm_page_t m, uint8_t queue)
{
+ struct vm_batchqueue *bq;
struct vm_pagequeue *pq;
+ int domain;
- vm_page_lock_assert(m, MA_OWNED);
- KASSERT(queue < PQ_COUNT,
- ("vm_page_enqueue: invalid queue %u request for page %p",
- queue, m));
+ vm_page_assert_locked(m);
+ KASSERT(m->queue == PQ_NONE && (m->aflags & PGA_QUEUE_STATE_MASK) == 0,
+ ("%s: page %p is already enqueued", __func__, m));
+
+ domain = vm_phys_domain(m);
pq = &vm_pagequeue_domain(m)->vmd_pagequeues[queue];
- vm_pagequeue_lock(pq);
+
+ /*
+ * The queue field might be changed back to PQ_NONE by a concurrent
+ * call to vm_page_dequeue(). In that case the batch queue entry will
+ * be a no-op.
+ */
m->queue = queue;
- TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
- vm_pagequeue_cnt_inc(pq);
+
+ critical_enter();
+ bq = DPCPU_PTR(pqbatch[domain][queue]);
+ if (__predict_true(vm_batchqueue_insert(bq, m))) {
+ critical_exit();
+ return;
+ }
+ if (!vm_pagequeue_trylock(pq)) {
+ critical_exit();
+ vm_pagequeue_lock(pq);
+ critical_enter();
+ bq = DPCPU_PTR(pqbatch[domain][queue]);
+ }
+ vm_pqbatch_process(pq, bq, queue);
+ (void)vm_batchqueue_insert(bq, m);
+ vm_pqbatch_process(pq, bq, queue);
vm_pagequeue_unlock(pq);
+ critical_exit();
}
/*
* vm_page_requeue:
*
- * Move the given page to the tail of its current page queue.
+ * Schedule a requeue of the given page.
*
* The page must be locked.
*/
void
vm_page_requeue(vm_page_t m)
{
+ struct vm_batchqueue *bq;
struct vm_pagequeue *pq;
+ int domain, queue;
vm_page_lock_assert(m, MA_OWNED);
KASSERT(m->queue != PQ_NONE,
- ("vm_page_requeue: page %p is not queued", m));
+ ("%s: page %p is not enqueued", __func__, m));
+
+ domain = vm_phys_domain(m);
+ queue = m->queue;
pq = vm_page_pagequeue(m);
- vm_pagequeue_lock(pq);
- TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
- TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
- vm_pagequeue_unlock(pq);
-}
-/*
- * vm_page_requeue_locked:
- *
- * Move the given page to the tail of its current page queue.
- *
- * The page queue must be locked.
- */
-void
-vm_page_requeue_locked(vm_page_t m)
-{
- struct vm_pagequeue *pq;
+ if (queue == PQ_NONE)
+ return;
- KASSERT(m->queue != PQ_NONE,
- ("vm_page_requeue_locked: page %p is not queued", m));
- pq = vm_page_pagequeue(m);
- vm_pagequeue_assert_locked(pq);
- TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
- TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
+ vm_page_aflag_set(m, PGA_REQUEUE);
+ critical_enter();
+ bq = DPCPU_PTR(pqbatch[domain][queue]);
+ if (__predict_true(vm_batchqueue_insert(bq, m))) {
+ critical_exit();
+ return;
+ }
+ if (!vm_pagequeue_trylock(pq)) {
+ critical_exit();
+ vm_pagequeue_lock(pq);
+ critical_enter();
+ bq = DPCPU_PTR(pqbatch[domain][queue]);
+ }
+ vm_pqbatch_process(pq, bq, queue);
+ KASSERT(m->queue == queue || m->queue == PQ_NONE,
+ ("%s: page %p migrated between queues", __func__, m));
+ if (m->queue == queue) {
+ (void)vm_batchqueue_insert(bq, m);
+ vm_pqbatch_process(pq, bq, queue);
+ } else
+ vm_page_aflag_clear(m, PGA_REQUEUE);
+ vm_pagequeue_unlock(pq);
+ critical_exit();
}
/*
@@ -3153,18 +3332,18 @@
int queue;
vm_page_lock_assert(m, MA_OWNED);
- if ((queue = m->queue) != PQ_ACTIVE) {
- if (m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0) {
- if (m->act_count < ACT_INIT)
- m->act_count = ACT_INIT;
- if (queue != PQ_NONE)
- vm_page_dequeue(m);
- vm_page_enqueue(PQ_ACTIVE, m);
- }
- } else {
- if (m->act_count < ACT_INIT)
+
+ if ((queue = m->queue) == PQ_ACTIVE || m->wire_count > 0 ||
+ (m->oflags & VPO_UNMANAGED) != 0) {
+ if (queue == PQ_ACTIVE && m->act_count < ACT_INIT)
m->act_count = ACT_INIT;
+ return;
}
+
+ vm_page_remque(m);
+ if (m->act_count < ACT_INIT)
+ m->act_count = ACT_INIT;
+ vm_page_enqueue_lazy(m, PQ_ACTIVE);
}
/*
@@ -3175,11 +3354,10 @@
* the page to the free list only if this function returns true.
*
* The object must be locked. The page must be locked if it is
- * managed. For a queued managed page, the pagequeue_locked
- * argument specifies whether the page queue is already locked.
+ * managed.
*/
bool
-vm_page_free_prep(vm_page_t m, bool pagequeue_locked)
+vm_page_free_prep(vm_page_t m)
{
#if defined(DIAGNOSTIC) && defined(PHYS_TO_DMAP)
@@ -3195,14 +3373,14 @@
if ((m->oflags & VPO_UNMANAGED) == 0) {
vm_page_lock_assert(m, MA_OWNED);
KASSERT(!pmap_page_is_mapped(m),
- ("vm_page_free_toq: freeing mapped page %p", m));
+ ("vm_page_free_prep: freeing mapped page %p", m));
} else
KASSERT(m->queue == PQ_NONE,
- ("vm_page_free_toq: unmanaged page %p is queued", m));
+ ("vm_page_free_prep: unmanaged page %p is queued", m));
VM_CNT_INC(v_tfree);
if (vm_page_sbusied(m))
- panic("vm_page_free: freeing busy page %p", m);
+ panic("vm_page_free_prep: freeing busy page %p", m);
vm_page_remove(m);
@@ -3218,21 +3396,23 @@
return (false);
}
- if (m->queue != PQ_NONE) {
- if (pagequeue_locked)
- vm_page_dequeue_locked(m);
- else
- vm_page_dequeue(m);
- }
+ /*
+ * Pages need not be dequeued before they are returned to the physical
+ * memory allocator, but they must at least be marked for a deferred
+ * dequeue.
+ */
+ if ((m->oflags & VPO_UNMANAGED) == 0)
+ vm_page_dequeue_lazy(m);
+
m->valid = 0;
vm_page_undirty(m);
if (m->wire_count != 0)
- panic("vm_page_free: freeing wired page %p", m);
+ panic("vm_page_free_prep: freeing wired page %p", m);
if (m->hold_count != 0) {
m->flags &= ~PG_ZERO;
KASSERT((m->flags & PG_UNHOLDFREE) == 0,
- ("vm_page_free: freeing PG_UNHOLDFREE page %p", m));
+ ("vm_page_free_prep: freeing PG_UNHOLDFREE page %p", m));
m->flags |= PG_UNHOLDFREE;
return (false);
}
@@ -3251,36 +3431,6 @@
return (true);
}
-void
-vm_page_free_phys_pglist(struct pglist *tq)
-{
- struct vm_domain *vmd;
- vm_page_t m;
- int cnt;
-
- if (TAILQ_EMPTY(tq))
- return;
- vmd = NULL;
- cnt = 0;
- TAILQ_FOREACH(m, tq, listq) {
- if (vmd != vm_pagequeue_domain(m)) {
- if (vmd != NULL) {
- vm_domain_free_unlock(vmd);
- vm_domain_freecnt_inc(vmd, cnt);
- cnt = 0;
- }
- vmd = vm_pagequeue_domain(m);
- vm_domain_free_lock(vmd);
- }
- vm_phys_free_pages(m, 0);
- cnt++;
- }
- if (vmd != NULL) {
- vm_domain_free_unlock(vmd);
- vm_domain_freecnt_inc(vmd, cnt);
- }
-}
-
/*
* vm_page_free_toq:
*
@@ -3293,15 +3443,35 @@
void
vm_page_free_toq(vm_page_t m)
{
+ struct vm_batchqueue *cpubq, bq;
struct vm_domain *vmd;
+ int domain, freed;
+
+ if (!vm_page_free_prep(m))
+ return;
+
+ domain = vm_phys_domain(m);
+ vmd = VM_DOMAIN(domain);
- if (!vm_page_free_prep(m, false))
+ critical_enter();
+ cpubq = DPCPU_PTR(freeqbatch[domain]);
+ if (vm_batchqueue_insert(cpubq, m)) {
+ critical_exit();
return;
- vmd = vm_pagequeue_domain(m);
+ }
+ memcpy(&bq, cpubq, sizeof(bq));
+ vm_batchqueue_init(cpubq);
+ critical_exit();
+
vm_domain_free_lock(vmd);
vm_phys_free_pages(m, 0);
+ freed = 1;
+ VM_BATCHQ_FOREACH(&bq, m) {
+ vm_phys_free_pages(m, 0);
+ freed++;
+ }
vm_domain_free_unlock(vmd);
- vm_domain_freecnt_inc(vmd, 1);
+ vm_domain_freecnt_inc(vmd, freed);
}
/*
@@ -3318,23 +3488,18 @@
vm_page_free_pages_toq(struct spglist *free, bool update_wire_count)
{
vm_page_t m;
- struct pglist pgl;
int count;
if (SLIST_EMPTY(free))
return;
count = 0;
- TAILQ_INIT(&pgl);
while ((m = SLIST_FIRST(free)) != NULL) {
count++;
SLIST_REMOVE_HEAD(free, plinks.s.ss);
- if (vm_page_free_prep(m, false))
- TAILQ_INSERT_TAIL(&pgl, m, listq);
+ vm_page_free_toq(m);
}
- vm_page_free_phys_pglist(&pgl);
-
if (update_wire_count)
vm_wire_sub(count);
}
@@ -3393,22 +3558,25 @@
KASSERT(queue < PQ_COUNT || queue == PQ_NONE,
("vm_page_unwire: invalid queue %u request for page %p",
queue, m));
+ if ((m->oflags & VPO_UNMANAGED) == 0)
+ vm_page_assert_locked(m);
unwired = vm_page_unwire_noq(m);
- if (unwired && (m->oflags & VPO_UNMANAGED) == 0 && m->object != NULL) {
- if (m->queue == queue) {
+ if (!unwired || (m->oflags & VPO_UNMANAGED) != 0 || m->object == NULL)
+ return (unwired);
+
+ if (m->queue == queue) {
+ if (queue == PQ_ACTIVE)
+ vm_page_reference(m);
+ else if (queue != PQ_NONE)
+ vm_page_requeue(m);
+ } else {
+ vm_page_dequeue(m);
+ if (queue != PQ_NONE) {
+ vm_page_enqueue_lazy(m, queue);
if (queue == PQ_ACTIVE)
- vm_page_reference(m);
- else if (queue != PQ_NONE)
- vm_page_requeue(m);
- } else {
- vm_page_remque(m);
- if (queue != PQ_NONE) {
- vm_page_enqueue(queue, m);
- if (queue == PQ_ACTIVE)
- /* Initialize act_count. */
- vm_page_activate(m);
- }
+ /* Initialize act_count. */
+ vm_page_activate(m);
}
}
return (unwired);
@@ -3456,35 +3624,33 @@
* The page must be locked.
*/
static inline void
-_vm_page_deactivate(vm_page_t m, boolean_t noreuse)
+_vm_page_deactivate(vm_page_t m, bool noreuse)
{
struct vm_pagequeue *pq;
- int queue;
vm_page_assert_locked(m);
- if (m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0) {
+ if (m->wire_count > 0 || (m->oflags & VPO_UNMANAGED) != 0)
+ return;
+
+ if (noreuse) {
+ /* This is slower than it could be. */
+ vm_page_remque(m);
pq = &vm_pagequeue_domain(m)->vmd_pagequeues[PQ_INACTIVE];
- /* Avoid multiple acquisitions of the inactive queue lock. */
- queue = m->queue;
- if (queue == PQ_INACTIVE) {
- vm_pagequeue_lock(pq);
- vm_page_dequeue_locked(m);
- } else {
- if (queue != PQ_NONE)
- vm_page_dequeue(m);
- vm_pagequeue_lock(pq);
- }
+ vm_pagequeue_lock(pq);
m->queue = PQ_INACTIVE;
- if (noreuse)
- TAILQ_INSERT_BEFORE(
- &vm_pagequeue_domain(m)->vmd_inacthead, m,
- plinks.q);
- else
- TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
+ TAILQ_INSERT_BEFORE(&vm_pagequeue_domain(m)->vmd_inacthead, m,
+ plinks.q);
vm_pagequeue_cnt_inc(pq);
+ vm_page_aflag_set(m, PGA_ENQUEUED);
+ if ((m->aflags & PGA_REQUEUE) != 0)
+ vm_page_aflag_clear(m, PGA_REQUEUE);
vm_pagequeue_unlock(pq);
- }
+ } else if (!vm_page_inactive(m)) {
+ vm_page_remque(m);
+ vm_page_enqueue_lazy(m, PQ_INACTIVE);
+ } else
+ vm_page_requeue(m);
}
/*
@@ -3497,7 +3663,7 @@
vm_page_deactivate(vm_page_t m)
{
- _vm_page_deactivate(m, FALSE);
+ _vm_page_deactivate(m, false);
}
/*
@@ -3510,7 +3676,7 @@
vm_page_deactivate_noreuse(vm_page_t m)
{
- _vm_page_deactivate(m, TRUE);
+ _vm_page_deactivate(m, true);
}
/*
@@ -3523,13 +3689,14 @@
{
vm_page_assert_locked(m);
- if (m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0) {
- if (m->queue == PQ_LAUNDRY)
- vm_page_requeue(m);
- else {
- vm_page_remque(m);
- vm_page_enqueue(PQ_LAUNDRY, m);
- }
+ if (m->wire_count > 0 || (m->oflags & VPO_UNMANAGED) != 0)
+ return;
+
+ if (m->queue == PQ_LAUNDRY)
+ vm_page_requeue(m);
+ else {
+ vm_page_remque(m);
+ vm_page_enqueue_lazy(m, PQ_LAUNDRY);
}
}
@@ -3545,9 +3712,9 @@
vm_page_assert_locked(m);
KASSERT(m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0,
("page %p already unswappable", m));
- if (m->queue != PQ_NONE)
- vm_page_dequeue(m);
- vm_page_enqueue(PQ_UNSWAPPABLE, m);
+
+ vm_page_remque(m);
+ vm_page_enqueue_lazy(m, PQ_UNSWAPPABLE);
}
/*
Index: sys/vm/vm_pageout.c
===================================================================
--- sys/vm/vm_pageout.c
+++ sys/vm/vm_pageout.c
@@ -201,11 +201,9 @@
CTLFLAG_RW, &vm_page_max_wired, 0, "System-wide limit to wired page count");
static u_int isqrt(u_int num);
-static boolean_t vm_pageout_fallback_object_lock(vm_page_t, vm_page_t *);
static int vm_pageout_launder(struct vm_domain *vmd, int launder,
bool in_shortfall);
static void vm_pageout_laundry_worker(void *arg);
-static boolean_t vm_pageout_page_lock(vm_page_t, vm_page_t *);
/*
* Initialize a dummy page for marking the caller's place in the specified
@@ -222,99 +220,68 @@
marker->busy_lock = VPB_SINGLE_EXCLUSIVER;
marker->queue = queue;
marker->hold_count = 1;
+ if (queue != PQ_NONE)
+ marker->aflags = PGA_ENQUEUED;
}
-/*
- * vm_pageout_fallback_object_lock:
- *
- * Lock vm object currently associated with `m'. VM_OBJECT_TRYWLOCK is
- * known to have failed and page queue must be either PQ_ACTIVE or
- * PQ_INACTIVE. To avoid lock order violation, unlock the page queue
- * while locking the vm object. Use marker page to detect page queue
- * changes and maintain notion of next page on page queue. Return
- * TRUE if no changes were detected, FALSE otherwise. vm object is
- * locked on return.
- *
- * This function depends on both the lock portion of struct vm_object
- * and normal struct vm_page being type stable.
- */
-static boolean_t
-vm_pageout_fallback_object_lock(vm_page_t m, vm_page_t *next)
+static inline bool
+vm_pageout_page_queued(vm_page_t m, int queue)
{
- struct vm_page marker;
- struct vm_pagequeue *pq;
- boolean_t unchanged;
- u_short queue;
- vm_object_t object;
- queue = m->queue;
- vm_pageout_init_marker(&marker, queue);
- pq = vm_page_pagequeue(m);
- object = m->object;
-
- TAILQ_INSERT_AFTER(&pq->pq_pl, m, &marker, plinks.q);
- vm_pagequeue_unlock(pq);
- vm_page_unlock(m);
- VM_OBJECT_WLOCK(object);
- vm_page_lock(m);
- vm_pagequeue_lock(pq);
+ vm_page_assert_locked(m);
- /*
- * The page's object might have changed, and/or the page might
- * have moved from its original position in the queue. If the
- * page's object has changed, then the caller should abandon
- * processing the page because the wrong object lock was
- * acquired. Use the marker's plinks.q, not the page's, to
- * determine if the page has been moved. The state of the
- * page's plinks.q can be indeterminate; whereas, the marker's
- * plinks.q must be valid.
- */
- *next = TAILQ_NEXT(&marker, plinks.q);
- unchanged = m->object == object &&
- m == TAILQ_PREV(&marker, pglist, plinks.q);
- KASSERT(!unchanged || m->queue == queue,
- ("page %p queue %d %d", m, queue, m->queue));
- TAILQ_REMOVE(&pq->pq_pl, &marker, plinks.q);
- return (unchanged);
+ if ((m->aflags & PGA_DEQUEUE) != 0)
+ return (false);
+ atomic_thread_fence_acq();
+ return (m->queue == queue);
}
/*
- * Lock the page while holding the page queue lock. Use marker page
- * to detect page queue changes and maintain notion of next page on
- * page queue. Return TRUE if no changes were detected, FALSE
- * otherwise. The page is locked on return. The page queue lock might
- * be dropped and reacquired.
+ * Add a small number of queued pages to a batch queue for later processing
+ * without the corresponding queue lock held. The caller must have enqueued a
+ * marker page at the desired start point for the scan.
*
- * This function depends on normal struct vm_page being type stable.
+ * When processing the batch queue, vm_pageout_page_queued() must be used to
+ * determine whether the page was logically dequeued by another thread. Once
+ * this check is performed, the page lock guarantees that the page will not be
+ * disassociated from the queue.
*/
-static boolean_t
-vm_pageout_page_lock(vm_page_t m, vm_page_t *next)
+static inline void
+vm_pageout_collect_batch(struct vm_pagequeue *pq, struct vm_batchqueue *bq,
+ vm_page_t marker, int maxcollect, const bool dequeue)
{
- struct vm_page marker;
- struct vm_pagequeue *pq;
- boolean_t unchanged;
- u_short queue;
+ vm_page_t m;
- vm_page_lock_assert(m, MA_NOTOWNED);
- if (vm_page_trylock(m))
- return (TRUE);
+ vm_pagequeue_assert_locked(pq);
- queue = m->queue;
- vm_pageout_init_marker(&marker, queue);
- pq = vm_page_pagequeue(m);
+ vm_batchqueue_init(bq);
+ for (m = TAILQ_NEXT(marker, plinks.q); m != NULL && maxcollect > 0;
+ m = TAILQ_NEXT(m, plinks.q), maxcollect--) {
+ VM_CNT_INC(v_pdpages);
+ if (__predict_false((m->flags & PG_MARKER) != 0))
+ continue;
- TAILQ_INSERT_AFTER(&pq->pq_pl, m, &marker, plinks.q);
- vm_pagequeue_unlock(pq);
- vm_page_lock(m);
- vm_pagequeue_lock(pq);
+ KASSERT((m->aflags & PGA_ENQUEUED) != 0,
+ ("page %p not enqueued", m));
+ KASSERT((m->flags & PG_FICTITIOUS) == 0,
+ ("Fictitious page %p cannot be in page queue", m));
+ KASSERT((m->oflags & VPO_UNMANAGED) == 0,
+ ("Unmanaged page %p cannot be in page queue", m));
- /* Page queue might have changed. */
- *next = TAILQ_NEXT(&marker, plinks.q);
- unchanged = m == TAILQ_PREV(&marker, pglist, plinks.q);
- KASSERT(!unchanged || m->queue == queue,
- ("page %p queue %d %d", m, queue, m->queue));
- TAILQ_REMOVE(&pq->pq_pl, &marker, plinks.q);
- return (unchanged);
+ if (!vm_batchqueue_insert(bq, m))
+ break;
+ if (dequeue) {
+ TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
+ vm_page_aflag_clear(m, PGA_ENQUEUED);
+ }
+ }
+ TAILQ_REMOVE(&pq->pq_pl, marker, plinks.q);
+ if (__predict_true(m != NULL))
+ TAILQ_INSERT_BEFORE(m, marker, plinks.q);
+ else
+ TAILQ_INSERT_TAIL(&pq->pq_pl, marker, plinks.q);
+ if (dequeue)
+ vm_pagequeue_cnt_add(pq, -bq->bq_cnt);
}
/*
@@ -370,12 +337,12 @@
break;
}
vm_page_test_dirty(p);
- if (p->dirty == 0) {
+ if (p->dirty == 0 || !vm_page_in_laundry(p)) {
ib = 0;
break;
}
vm_page_lock(p);
- if (!vm_page_in_laundry(p) || vm_page_held(p)) {
+ if (vm_page_held(p)) {
vm_page_unlock(p);
ib = 0;
break;
@@ -398,10 +365,10 @@
if ((p = vm_page_next(ps)) == NULL || vm_page_busied(p))
break;
vm_page_test_dirty(p);
- if (p->dirty == 0)
+ if (p->dirty == 0 || !vm_page_in_laundry(p))
break;
vm_page_lock(p);
- if (!vm_page_in_laundry(p) || vm_page_held(p)) {
+ if (vm_page_held(p)) {
vm_page_unlock(p);
break;
}
@@ -692,12 +659,14 @@
static int
vm_pageout_launder(struct vm_domain *vmd, int launder, bool in_shortfall)
{
+ struct vm_batchqueue bq;
struct vm_pagequeue *pq;
+ struct mtx *mtx;
vm_object_t object;
- vm_page_t m, next;
- int act_delta, error, maxscan, numpagedout, starting_target;
+ vm_page_t m;
+ int act_delta, error, maxscan, numpagedout, queue, starting_target;
int vnodes_skipped;
- bool pageout_ok, queue_locked;
+ bool obj_locked, pageout_ok;
starting_target = launder;
vnodes_skipped = 0;
@@ -716,186 +685,210 @@
* swap devices are configured.
*/
if (atomic_load_acq_int(&swapdev_enabled))
- pq = &vmd->vmd_pagequeues[PQ_UNSWAPPABLE];
+ queue = PQ_UNSWAPPABLE;
else
- pq = &vmd->vmd_pagequeues[PQ_LAUNDRY];
+ queue = PQ_LAUNDRY;
+ pq = &vmd->vmd_pagequeues[queue];
scan:
vm_pagequeue_lock(pq);
- maxscan = pq->pq_cnt;
- queue_locked = true;
- for (m = TAILQ_FIRST(&pq->pq_pl);
- m != NULL && maxscan-- > 0 && launder > 0;
- m = next) {
- vm_pagequeue_assert_locked(pq);
- KASSERT(queue_locked, ("unlocked laundry queue"));
- KASSERT(vm_page_in_laundry(m),
- ("page %p has an inconsistent queue", m));
- next = TAILQ_NEXT(m, plinks.q);
- if ((m->flags & PG_MARKER) != 0)
- continue;
- KASSERT((m->flags & PG_FICTITIOUS) == 0,
- ("PG_FICTITIOUS page %p cannot be in laundry queue", m));
- KASSERT((m->oflags & VPO_UNMANAGED) == 0,
- ("VPO_UNMANAGED page %p cannot be in laundry queue", m));
- if (!vm_pageout_page_lock(m, &next) || m->hold_count != 0) {
- vm_page_unlock(m);
- continue;
- }
- if (m->wire_count != 0) {
- vm_page_dequeue_locked(m);
- vm_page_unlock(m);
- continue;
- }
- object = m->object;
- if ((!VM_OBJECT_TRYWLOCK(object) &&
- (!vm_pageout_fallback_object_lock(m, &next) ||
- vm_page_held(m))) || vm_page_busied(m)) {
- VM_OBJECT_WUNLOCK(object);
- if (m->wire_count != 0 && vm_page_pagequeue(m) == pq)
- vm_page_dequeue_locked(m);
- vm_page_unlock(m);
- continue;
- }
-
- /*
- * Unlock the laundry queue, invalidating the 'next' pointer.
- * Use a marker to remember our place in the laundry queue.
- */
- TAILQ_INSERT_AFTER(&pq->pq_pl, m, &vmd->vmd_laundry_marker,
- plinks.q);
+ TAILQ_INSERT_HEAD(&pq->pq_pl, &vmd->vmd_laundry_marker, plinks.q);
+ for (maxscan = pq->pq_cnt; maxscan > 0 && launder > 0 &&
+ TAILQ_NEXT(&vmd->vmd_laundry_marker, plinks.q) != NULL;
+ maxscan -= bq.bq_cnt) {
+ vm_pageout_collect_batch(pq, &bq, &vmd->vmd_laundry_marker,
+ min(maxscan, launder), false);
vm_pagequeue_unlock(pq);
- queue_locked = false;
- /*
- * Invalid pages can be easily freed. They cannot be
- * mapped; vm_page_free() asserts this.
- */
- if (m->valid == 0)
- goto free_page;
+ mtx = NULL;
+ obj_locked = false;
+ object = NULL;
+ VM_BATCHQ_FOREACH(&bq, m) {
+ vm_page_change_lock(m, &mtx);
- /*
- * If the page has been referenced and the object is not dead,
- * reactivate or requeue the page depending on whether the
- * object is mapped.
- */
- if ((m->aflags & PGA_REFERENCED) != 0) {
- vm_page_aflag_clear(m, PGA_REFERENCED);
- act_delta = 1;
- } else
- act_delta = 0;
- if (object->ref_count != 0)
- act_delta += pmap_ts_referenced(m);
- else {
- KASSERT(!pmap_page_is_mapped(m),
- ("page %p is mapped", m));
- }
- if (act_delta != 0) {
- if (object->ref_count != 0) {
- VM_CNT_INC(v_reactivated);
- vm_page_activate(m);
+recheck:
+ /*
+ * The page may have been disassociated from the queue
+ * while locks were dropped.
+ */
+ if (!vm_pageout_page_queued(m, queue))
+ continue;
- /*
- * Increase the activation count if the page
- * was referenced while in the laundry queue.
- * This makes it less likely that the page will
- * be returned prematurely to the inactive
- * queue.
- */
- m->act_count += act_delta + ACT_ADVANCE;
+ /*
+ * A requeue was requested, so this page gets a second
+ * chance.
+ */
+ if ((m->aflags & PGA_REQUEUE) != 0) {
+ vm_page_requeue(m);
+ continue;
+ }
- /*
- * If this was a background laundering, count
- * activated pages towards our target. The
- * purpose of background laundering is to ensure
- * that pages are eventually cycled through the
- * laundry queue, and an activation is a valid
- * way out.
- */
- if (!in_shortfall)
- launder--;
- goto drop_page;
- } else if ((object->flags & OBJ_DEAD) == 0)
- goto requeue_page;
- }
+ /*
+ * Held pages are essentially stuck in the queue.
+ *
+ * Wired pages may not be freed. Complete their removal
+ * from the queue now to avoid needless revisits during
+ * future scans.
+ */
+ if (m->hold_count != 0)
+ continue;
+ if (m->wire_count != 0) {
+ vm_page_dequeue_lazy(m);
+ continue;
+ }
- /*
- * If the page appears to be clean at the machine-independent
- * layer, then remove all of its mappings from the pmap in
- * anticipation of freeing it. If, however, any of the page's
- * mappings allow write access, then the page may still be
- * modified until the last of those mappings are removed.
- */
- if (object->ref_count != 0) {
- vm_page_test_dirty(m);
- if (m->dirty == 0)
- pmap_remove_all(m);
- }
+ if (object != m->object) {
+ if (obj_locked) {
+ VM_OBJECT_WUNLOCK(object);
+ obj_locked = false;
+ }
+ object = m->object;
+ }
+ if (!obj_locked) {
+ if (!VM_OBJECT_TRYWLOCK(object)) {
+ mtx_unlock(mtx);
+ VM_OBJECT_WLOCK(object);
+ obj_locked = true;
+ mtx_lock(mtx);
+ goto recheck;
+ } else
+ obj_locked = true;
+ }
- /*
- * Clean pages are freed, and dirty pages are paged out unless
- * they belong to a dead object. Requeueing dirty pages from
- * dead objects is pointless, as they are being paged out and
- * freed by the thread that destroyed the object.
- */
- if (m->dirty == 0) {
-free_page:
- vm_page_free(m);
- VM_CNT_INC(v_dfree);
- } else if ((object->flags & OBJ_DEAD) == 0) {
- if (object->type != OBJT_SWAP &&
- object->type != OBJT_DEFAULT)
- pageout_ok = true;
- else if (disable_swap_pageouts)
- pageout_ok = false;
- else
- pageout_ok = true;
- if (!pageout_ok) {
-requeue_page:
- vm_pagequeue_lock(pq);
- queue_locked = true;
- vm_page_requeue_locked(m);
- goto drop_page;
+ if (vm_page_busied(m))
+ continue;
+
+ /*
+ * Invalid pages can be easily freed. They cannot be
+ * mapped; vm_page_free() asserts this.
+ */
+ if (m->valid == 0)
+ goto free_page;
+
+ /*
+ * If the page has been referenced and the object is not
+ * dead, reactivate or requeue the page depending on
+ * whether the object is mapped.
+ */
+ if ((m->aflags & PGA_REFERENCED) != 0) {
+ vm_page_aflag_clear(m, PGA_REFERENCED);
+ act_delta = 1;
+ } else
+ act_delta = 0;
+ if (object->ref_count != 0)
+ act_delta += pmap_ts_referenced(m);
+ else {
+ KASSERT(!pmap_page_is_mapped(m),
+ ("page %p is mapped", m));
+ }
+ if (act_delta != 0) {
+ if (object->ref_count != 0) {
+ VM_CNT_INC(v_reactivated);
+ vm_page_activate(m);
+
+ /*
+ * Increase the activation count if the
+ * page was referenced while in the
+ * laundry queue. This makes it less
+ * likely that the page will be returned
+ * prematurely to the inactive queue.
+ */
+ m->act_count += act_delta + ACT_ADVANCE;
+
+ /*
+ * If this was a background laundering,
+ * count activated pages towards our
+ * target. The purpose of background
+ * laundering is to ensure that pages
+ * are eventually cycled through the
+ * laundry queue, and an activation is a
+ * valid way out.
+ */
+ if (!in_shortfall)
+ launder--;
+ continue;
+ } else if ((object->flags & OBJ_DEAD) == 0) {
+ vm_page_requeue(m);
+ continue;
+ }
}
/*
- * Form a cluster with adjacent, dirty pages from the
- * same object, and page out that entire cluster.
- *
- * The adjacent, dirty pages must also be in the
- * laundry. However, their mappings are not checked
- * for new references. Consequently, a recently
- * referenced page may be paged out. However, that
- * page will not be prematurely reclaimed. After page
- * out, the page will be placed in the inactive queue,
- * where any new references will be detected and the
- * page reactivated.
+ * If the page appears to be clean at the
+ * machine-independent layer, then remove all of its
+ * mappings from the pmap in anticipation of freeing it.
+ * If, however, any of the page's mappings allow write
+ * access, then the page may still be modified until the
+ * last of those mappings are removed.
*/
- error = vm_pageout_clean(m, &numpagedout);
- if (error == 0) {
- launder -= numpagedout;
- maxscan -= numpagedout - 1;
- } else if (error == EDEADLK) {
- pageout_lock_miss++;
- vnodes_skipped++;
+ if (object->ref_count != 0) {
+ vm_page_test_dirty(m);
+ if (m->dirty == 0)
+ pmap_remove_all(m);
+ }
+
+ /*
+ * Clean pages are freed, and dirty pages are paged out
+ * unless they belong to a dead object. Requeueing
+ * dirty pages from dead objects is pointless, as they
+ * are being paged out and freed by the thread that
+ * destroyed the object.
+ */
+ if (m->dirty == 0) {
+free_page:
+ vm_page_free(m);
+ VM_CNT_INC(v_dfree);
+ } else if ((object->flags & OBJ_DEAD) == 0) {
+ if (object->type != OBJT_SWAP &&
+ object->type != OBJT_DEFAULT)
+ pageout_ok = true;
+ else if (disable_swap_pageouts)
+ pageout_ok = false;
+ else
+ pageout_ok = true;
+ if (!pageout_ok) {
+ vm_page_requeue(m);
+ continue;
+ }
+
+ /*
+ * Form a cluster with adjacent, dirty pages from the
+ * same object, and page out that entire cluster.
+ *
+ * The adjacent, dirty pages must also be in the
+ * laundry. However, their mappings are not checked
+ * for new references. Consequently, a recently
+ * referenced page may be paged out. However, that
+ * page will not be prematurely reclaimed. After page
+ * out, the page will be placed in the inactive queue,
+ * where any new references will be detected and the
+ * page reactivated.
+ */
+ error = vm_pageout_clean(m, &numpagedout);
+ if (error == 0) {
+ launder -= numpagedout;
+ maxscan -= numpagedout - 1;
+ } else if (error == EDEADLK) {
+ pageout_lock_miss++;
+ vnodes_skipped++;
+ }
+ mtx = NULL;
+ obj_locked = false;
}
- goto relock_queue;
- }
-drop_page:
- vm_page_unlock(m);
- VM_OBJECT_WUNLOCK(object);
-relock_queue:
- if (!queue_locked) {
- vm_pagequeue_lock(pq);
- queue_locked = true;
}
- next = TAILQ_NEXT(&vmd->vmd_laundry_marker, plinks.q);
- TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_laundry_marker, plinks.q);
+ if (mtx != NULL)
+ mtx_unlock(mtx);
+ if (obj_locked)
+ VM_OBJECT_WUNLOCK(object);
+
+ vm_pagequeue_lock(pq);
}
+ TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_laundry_marker, plinks.q);
vm_pagequeue_unlock(pq);
if (launder > 0 && pq == &vmd->vmd_pagequeues[PQ_UNSWAPPABLE]) {
- pq = &vmd->vmd_pagequeues[PQ_LAUNDRY];
+ queue = PQ_LAUNDRY;
+ pq = &vmd->vmd_pagequeues[queue];
goto scan;
}
@@ -1091,6 +1084,65 @@
}
}
+static int
+vm_pageout_free_pages(vm_object_t object, vm_page_t m, struct mtx **mtxp)
+{
+ vm_page_t p, pp;
+ vm_pindex_t start;
+ int pcount, count;
+
+ pcount = MAX(object->iosize / PAGE_SIZE, 1);
+ count = 1;
+ if (pcount == 1) {
+ vm_page_free(m);
+ goto out;
+ }
+
+ /* Find the first page in the block. */
+ start = m->pindex - (m->pindex % pcount);
+ for (p = m; p->pindex > start && (pp = vm_page_prev(p)) != NULL;
+ p = pp);
+
+ /* Free the original page so we don't validate it twice. */
+ if (p == m)
+ p = vm_page_next(m);
+ vm_page_free(m);
+ /* Iterate through the block range and free compatible pages. */
+ /* XXX Fix cache miss on last page. */
+ for (m = p; m != NULL && m->pindex < start + pcount; m = p) {
+ p = TAILQ_NEXT(m, listq);
+ vm_page_change_lock(m, mtxp);
+ if (vm_page_held(m) || vm_page_busied(m) ||
+ m->queue != PQ_INACTIVE)
+ continue;
+ if (m->valid == 0)
+ goto free_page;
+ if ((m->aflags & PGA_REFERENCED) != 0)
+ continue;
+ if (object->ref_count != 0) {
+ if (pmap_ts_referenced(m)) {
+ vm_page_aflag_set(m, PGA_REFERENCED);
+ continue;
+ }
+ vm_page_test_dirty(m);
+ if (m->dirty == 0)
+ pmap_remove_all(m);
+ }
+ if (m->dirty) {
+ if ((object->flags & OBJ_DEAD) == 0)
+ vm_page_launder(m);
+ continue;
+ }
+free_page:
+ vm_page_free(m);
+ count++;
+ }
+out:
+ VM_CNT_ADD(v_dfree, count);
+
+ return (count);
+}
+
/*
* vm_pageout_scan does the dirty work for the pageout daemon.
*
@@ -1103,13 +1155,15 @@
static bool
vm_pageout_scan(struct vm_domain *vmd, int pass, int shortage)
{
- vm_page_t m, next;
+ struct vm_batchqueue bq, rq;
+ struct mtx *mtx;
+ vm_page_t m;
struct vm_pagequeue *pq;
vm_object_t object;
long min_scan;
- int act_delta, addl_page_shortage, deficit, inactq_shortage, maxscan;
- int page_shortage, scan_tick, scanned, starting_page_shortage;
- boolean_t queue_locked;
+ int act_delta, addl_page_shortage, deficit, inactq_shortage;
+ int maxscan, page_shortage, scan_tick, scanned, starting_page_shortage;
+ bool obj_locked;
/*
* If we need to reclaim memory ask kernel caches to return
@@ -1157,186 +1211,179 @@
* decisions for the inactive queue, only for the active queue.)
*/
pq = &vmd->vmd_pagequeues[PQ_INACTIVE];
- maxscan = pq->pq_cnt;
vm_pagequeue_lock(pq);
- queue_locked = TRUE;
- for (m = TAILQ_FIRST(&pq->pq_pl);
- m != NULL && maxscan-- > 0 && page_shortage > 0;
- m = next) {
- vm_pagequeue_assert_locked(pq);
- KASSERT(queue_locked, ("unlocked inactive queue"));
- KASSERT(vm_page_inactive(m), ("Inactive queue %p", m));
-
- VM_CNT_INC(v_pdpages);
- next = TAILQ_NEXT(m, plinks.q);
+ TAILQ_INSERT_HEAD(&pq->pq_pl, &vmd->vmd_marker, plinks.q);
+ for (maxscan = pq->pq_cnt; maxscan > 0 && page_shortage > 0 &&
+ TAILQ_NEXT(&vmd->vmd_marker, plinks.q) != NULL;
+ maxscan -= bq.bq_cnt) {
+ vm_pageout_collect_batch(pq, &bq, &vmd->vmd_marker,
+ min(maxscan, page_shortage), true);
+ vm_pagequeue_unlock(pq);
- /*
- * skip marker pages
- */
- if (m->flags & PG_MARKER)
- continue;
+ mtx = NULL;
+ obj_locked = false;
+ object = NULL;
+ vm_batchqueue_init(&rq);
+ VM_BATCHQ_FOREACH(&bq, m) {
+ vm_page_change_lock(m, &mtx);
- KASSERT((m->flags & PG_FICTITIOUS) == 0,
- ("Fictitious page %p cannot be in inactive queue", m));
- KASSERT((m->oflags & VPO_UNMANAGED) == 0,
- ("Unmanaged page %p cannot be in inactive queue", m));
+recheck:
+ /*
+ * The page may have been disassociated from the queue
+ * while locks were dropped.
+ */
+ if (!vm_pageout_page_queued(m, PQ_INACTIVE)) {
+ addl_page_shortage++;
+ continue;
+ }
- /*
- * The page or object lock acquisitions fail if the
- * page was removed from the queue or moved to a
- * different position within the queue. In either
- * case, addl_page_shortage should not be incremented.
- */
- if (!vm_pageout_page_lock(m, &next))
- goto unlock_page;
- else if (m->wire_count != 0) {
/*
- * Wired pages may not be freed, and unwiring a queued
- * page will cause it to be requeued. Thus, remove them
- * from the queue now to avoid unnecessary revisits.
+ * A requeue was requested, so this page gets a second
+ * chance.
*/
- vm_page_dequeue_locked(m);
- addl_page_shortage++;
- goto unlock_page;
- } else if (m->hold_count != 0) {
+ if ((m->aflags & PGA_REQUEUE) != 0)
+ goto reinsert;
+
/*
- * Held pages are essentially stuck in the
- * queue. So, they ought to be discounted
- * from the inactive count. See the
- * calculation of inactq_shortage before the
+ * Held pages are essentially stuck in the queue. So,
+ * they ought to be discounted from the inactive count.
+ * See the calculation of inactq_shortage before the
* loop over the active queue below.
+ *
+ * Wired pages may not be freed. Complete their removal
+ * from the queue now to avoid needless revisits during
+ * future scans.
*/
- addl_page_shortage++;
- goto unlock_page;
- }
- object = m->object;
- if (!VM_OBJECT_TRYWLOCK(object)) {
- if (!vm_pageout_fallback_object_lock(m, &next))
- goto unlock_object;
- else if (m->wire_count != 0) {
- vm_page_dequeue_locked(m);
+ if (m->hold_count != 0) {
addl_page_shortage++;
- goto unlock_object;
- } else if (m->hold_count != 0) {
+ goto reinsert;
+ }
+ if (m->wire_count != 0) {
addl_page_shortage++;
- goto unlock_object;
+ vm_page_dequeue_lazy(m);
+ continue;
}
- }
- if (vm_page_busied(m)) {
- /*
- * Don't mess with busy pages. Leave them at
- * the front of the queue. Most likely, they
- * are being paged out and will leave the
- * queue shortly after the scan finishes. So,
- * they ought to be discounted from the
- * inactive count.
- */
- addl_page_shortage++;
-unlock_object:
- VM_OBJECT_WUNLOCK(object);
-unlock_page:
- vm_page_unlock(m);
- continue;
- }
- KASSERT(!vm_page_held(m), ("Held page %p", m));
- /*
- * Dequeue the inactive page and unlock the inactive page
- * queue, invalidating the 'next' pointer. Dequeueing the
- * page here avoids a later reacquisition (and release) of
- * the inactive page queue lock when vm_page_activate(),
- * vm_page_free(), or vm_page_launder() is called. Use a
- * marker to remember our place in the inactive queue.
- */
- TAILQ_INSERT_AFTER(&pq->pq_pl, m, &vmd->vmd_marker, plinks.q);
- vm_page_dequeue_locked(m);
- vm_pagequeue_unlock(pq);
- queue_locked = FALSE;
+ if (object != m->object) {
+ if (obj_locked) {
+ VM_OBJECT_WUNLOCK(object);
+ obj_locked = false;
+ }
+ object = m->object;
+ }
+ if (!obj_locked) {
+ if (!VM_OBJECT_TRYWLOCK(object)) {
+ mtx_unlock(mtx);
+ VM_OBJECT_WLOCK(object);
+ obj_locked = true;
+ mtx_lock(mtx);
+ goto recheck;
+ } else
+ obj_locked = true;
+ }
- /*
- * Invalid pages can be easily freed. They cannot be
- * mapped, vm_page_free() asserts this.
- */
- if (m->valid == 0)
- goto free_page;
+ if (vm_page_busied(m)) {
+ addl_page_shortage++;
+ goto reinsert;
+ }
- /*
- * If the page has been referenced and the object is not dead,
- * reactivate or requeue the page depending on whether the
- * object is mapped.
- */
- if ((m->aflags & PGA_REFERENCED) != 0) {
- vm_page_aflag_clear(m, PGA_REFERENCED);
- act_delta = 1;
- } else
- act_delta = 0;
- if (object->ref_count != 0) {
- act_delta += pmap_ts_referenced(m);
- } else {
- KASSERT(!pmap_page_is_mapped(m),
- ("vm_pageout_scan: page %p is mapped", m));
- }
- if (act_delta != 0) {
+ /*
+ * Invalid pages can be easily freed. They cannot be
+ * mapped, vm_page_free() asserts this.
+ */
+ if (m->valid == 0)
+ goto free_page;
+
+ /*
+ * If the page has been referenced and the object is not dead,
+ * reactivate or requeue the page depending on whether the
+ * object is mapped.
+ */
+ if ((m->aflags & PGA_REFERENCED) != 0) {
+ vm_page_aflag_clear(m, PGA_REFERENCED);
+ act_delta = 1;
+ } else
+ act_delta = 0;
if (object->ref_count != 0) {
- VM_CNT_INC(v_reactivated);
- vm_page_activate(m);
+ act_delta += pmap_ts_referenced(m);
+ } else {
+ KASSERT(!pmap_page_is_mapped(m),
+ ("vm_pageout_scan: page %p is mapped", m));
+ }
+ if (act_delta != 0) {
+ if (object->ref_count != 0) {
+ VM_CNT_INC(v_reactivated);
+ vm_page_activate(m);
+
+ /*
+ * Increase the activation count if the
+ * page was referenced while in the
+ * inactive queue. This makes it less
+ * likely that the page will be returned
+ * prematurely to the inactive queue.
+ */
+ m->act_count += act_delta + ACT_ADVANCE;
+ continue;
+ } else if ((object->flags & OBJ_DEAD) == 0) {
+ vm_page_aflag_set(m, PGA_REQUEUE);
+ goto reinsert;
+ }
+ }
- /*
- * Increase the activation count if the page
- * was referenced while in the inactive queue.
- * This makes it less likely that the page will
- * be returned prematurely to the inactive
- * queue.
- */
- m->act_count += act_delta + ACT_ADVANCE;
- goto drop_page;
- } else if ((object->flags & OBJ_DEAD) == 0) {
- vm_pagequeue_lock(pq);
- queue_locked = TRUE;
- m->queue = PQ_INACTIVE;
- TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
- vm_pagequeue_cnt_inc(pq);
- goto drop_page;
+ /*
+ * If the page appears to be clean at the
+ * machine-independent layer, then remove all of its
+ * mappings from the pmap in anticipation of freeing it.
+ * If, however, any of the page's mappings allow write
+ * access, then the page may still be modified until the
+ * last of those mappings are removed.
+ */
+ if (object->ref_count != 0) {
+ vm_page_test_dirty(m);
+ if (m->dirty == 0)
+ pmap_remove_all(m);
}
- }
- /*
- * If the page appears to be clean at the machine-independent
- * layer, then remove all of its mappings from the pmap in
- * anticipation of freeing it. If, however, any of the page's
- * mappings allow write access, then the page may still be
- * modified until the last of those mappings are removed.
- */
- if (object->ref_count != 0) {
- vm_page_test_dirty(m);
- if (m->dirty == 0)
- pmap_remove_all(m);
+ /*
+ * Clean pages can be freed, but dirty pages must be
+ * sent back to the laundry, unless they belong to a
+ * dead object. Requeueing dirty pages from dead
+ * objects is pointless, as they are being paged out
+ * and freed by the thread that destroyed the object.
+ */
+ if (m->dirty == 0) {
+free_page:
+ page_shortage -= vm_pageout_free_pages(object,
+ m, &mtx);
+ } else if ((object->flags & OBJ_DEAD) == 0)
+ vm_page_launder(m);
+ continue;
+reinsert:
+ if (!vm_batchqueue_insert(&rq, m))
+ panic("failed to requeue page %p", m);
}
+ if (mtx != NULL)
+ mtx_unlock(mtx);
+ if (obj_locked)
+ VM_OBJECT_WUNLOCK(object);
- /*
- * Clean pages can be freed, but dirty pages must be sent back
- * to the laundry, unless they belong to a dead object.
- * Requeueing dirty pages from dead objects is pointless, as
- * they are being paged out and freed by the thread that
- * destroyed the object.
- */
- if (m->dirty == 0) {
-free_page:
- vm_page_free(m);
- VM_CNT_INC(v_dfree);
- --page_shortage;
- } else if ((object->flags & OBJ_DEAD) == 0)
- vm_page_launder(m);
-drop_page:
- vm_page_unlock(m);
- VM_OBJECT_WUNLOCK(object);
- if (!queue_locked) {
- vm_pagequeue_lock(pq);
- queue_locked = TRUE;
+ vm_pagequeue_lock(pq);
+ VM_BATCHQ_FOREACH(&rq, m) {
+ if (vm_page_inactive(m) &&
+ (m->aflags & PGA_ENQUEUED) == 0) {
+ vm_page_aflag_set(m, PGA_ENQUEUED);
+ if ((m->aflags & PGA_REQUEUE) != 0) {
+ TAILQ_INSERT_TAIL(&pq->pq_pl, m,
+ plinks.q);
+ vm_page_aflag_clear(m, PGA_REQUEUE);
+ } else
+ TAILQ_INSERT_BEFORE(&vmd->vmd_marker, m,
+ plinks.q);
+ vm_pagequeue_cnt_inc(pq);
+ }
}
- next = TAILQ_NEXT(&vmd->vmd_marker, plinks.q);
- TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_marker, plinks.q);
}
+ TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_marker, plinks.q);
vm_pagequeue_unlock(pq);
/*
@@ -1401,7 +1448,6 @@
pq = &vmd->vmd_pagequeues[PQ_ACTIVE];
vm_pagequeue_lock(pq);
- maxscan = pq->pq_cnt;
/*
* If we're just idle polling attempt to visit every
@@ -1422,116 +1468,133 @@
* the per-page activity counter and use it to identify deactivation
* candidates. Held pages may be deactivated.
*/
- for (m = TAILQ_FIRST(&pq->pq_pl), scanned = 0; m != NULL && (scanned <
- min_scan || (inactq_shortage > 0 && scanned < maxscan)); m = next,
- scanned++) {
- KASSERT(m->queue == PQ_ACTIVE,
- ("vm_pageout_scan: page %p isn't active", m));
- next = TAILQ_NEXT(m, plinks.q);
- if ((m->flags & PG_MARKER) != 0)
- continue;
- KASSERT((m->flags & PG_FICTITIOUS) == 0,
- ("Fictitious page %p cannot be in active queue", m));
- KASSERT((m->oflags & VPO_UNMANAGED) == 0,
- ("Unmanaged page %p cannot be in active queue", m));
- if (!vm_pageout_page_lock(m, &next)) {
- vm_page_unlock(m);
- continue;
- }
+ TAILQ_INSERT_HEAD(&pq->pq_pl, &vmd->vmd_marker, plinks.q);
+ for (maxscan = pq->pq_cnt, scanned = 0;
+ TAILQ_NEXT(&vmd->vmd_marker, plinks.q) != NULL &&
+ (scanned < min_scan || (inactq_shortage > 0 && scanned < maxscan));
+ scanned += bq.bq_cnt) {
+ vm_pageout_collect_batch(pq, &bq, &vmd->vmd_marker,
+ (inactq_shortage > 0 ? maxscan : min_scan) - scanned,
+ false);
+ vm_pagequeue_unlock(pq);
- /*
- * The count for page daemon pages is updated after checking
- * the page for eligibility.
- */
- VM_CNT_INC(v_pdpages);
+ mtx = NULL;
+ vm_batchqueue_init(&rq);
+ VM_BATCHQ_FOREACH(&bq, m) {
+ vm_page_change_lock(m, &mtx);
- /*
- * Wired pages are dequeued lazily.
- */
- if (m->wire_count != 0) {
- vm_page_dequeue_locked(m);
- vm_page_unlock(m);
- continue;
- }
+ /*
+ * The page may have been disassociated from the queue
+ * while locks were dropped.
+ */
+ if (!vm_pageout_page_queued(m, PQ_ACTIVE))
+ continue;
- /*
- * Check to see "how much" the page has been used.
- */
- if ((m->aflags & PGA_REFERENCED) != 0) {
- vm_page_aflag_clear(m, PGA_REFERENCED);
- act_delta = 1;
- } else
- act_delta = 0;
+ /*
+ * Wired pages are dequeued lazily.
+ */
+ if (m->wire_count != 0) {
+ vm_page_dequeue_lazy(m);
+ continue;
+ }
- /*
- * Perform an unsynchronized object ref count check. While
- * the page lock ensures that the page is not reallocated to
- * another object, in particular, one with unmanaged mappings
- * that cannot support pmap_ts_referenced(), two races are,
- * nonetheless, possible:
- * 1) The count was transitioning to zero, but we saw a non-
- * zero value. pmap_ts_referenced() will return zero
- * because the page is not mapped.
- * 2) The count was transitioning to one, but we saw zero.
- * This race delays the detection of a new reference. At
- * worst, we will deactivate and reactivate the page.
- */
- if (m->object->ref_count != 0)
- act_delta += pmap_ts_referenced(m);
+ /*
+ * Check to see "how much" the page has been used.
+ */
+ if ((m->aflags & PGA_REFERENCED) != 0) {
+ vm_page_aflag_clear(m, PGA_REFERENCED);
+ act_delta = 1;
+ } else
+ act_delta = 0;
- /*
- * Advance or decay the act_count based on recent usage.
- */
- if (act_delta != 0) {
- m->act_count += ACT_ADVANCE + act_delta;
- if (m->act_count > ACT_MAX)
- m->act_count = ACT_MAX;
- } else
- m->act_count -= min(m->act_count, ACT_DECLINE);
+ /*
+ * Perform an unsynchronized object ref count check.
+ * While the page lock ensures that the page is not
+ * reallocated to another object, in particular, one
+ * with unmanaged mappings that cannot support
+ * pmap_ts_referenced(), two races are, nonetheless,
+ * possible:
+ * 1) The count was transitioning to zero, but we saw a
+ * non-zero value. pmap_ts_referenced() will return
+ * zero because the page is not mapped.
+ * 2) The count was transitioning to one, but we saw
+ * zero. This race delays the detection of a new
+ * reference. At worst, we will deactivate and
+ * reactivate the page.
+ */
+ if (m->object->ref_count != 0)
+ act_delta += pmap_ts_referenced(m);
- /*
- * Move this page to the tail of the active, inactive or laundry
- * queue depending on usage.
- */
- if (m->act_count == 0) {
- /* Dequeue to avoid later lock recursion. */
- vm_page_dequeue_locked(m);
+ /*
+ * Advance or decay the act_count based on recent usage.
+ */
+ if (act_delta != 0) {
+ m->act_count += ACT_ADVANCE + act_delta;
+ if (m->act_count > ACT_MAX)
+ m->act_count = ACT_MAX;
+ } else
+ m->act_count -= min(m->act_count, ACT_DECLINE);
/*
- * When not short for inactive pages, let dirty pages go
- * through the inactive queue before moving to the
- * laundry queues. This gives them some extra time to
- * be reactivated, potentially avoiding an expensive
- * pageout. During a page shortage, the inactive queue
- * is necessarily small, so we may move dirty pages
- * directly to the laundry queue.
+ * Move this page to the tail of the active, inactive or
+ * laundry queue depending on usage.
*/
- if (inactq_shortage <= 0)
- vm_page_deactivate(m);
- else {
+ if (m->act_count == 0) {
/*
- * Calling vm_page_test_dirty() here would
- * require acquisition of the object's write
- * lock. However, during a page shortage,
- * directing dirty pages into the laundry
- * queue is only an optimization and not a
- * requirement. Therefore, we simply rely on
- * the opportunistic updates to the page's
- * dirty field by the pmap.
+ * When not short for inactive pages, let dirty
+ * pages go through the inactive queue before
+ * moving to the laundry queues. This gives
+ * them some extra time to be reactivated,
+ * potentially avoiding an expensive pageout.
+ * During a page shortage, the inactive queue is
+ * necessarily small, so we may move dirty pages
+ * directly to the laundry queue.
*/
- if (m->dirty == 0) {
+ if (inactq_shortage <= 0)
vm_page_deactivate(m);
- inactq_shortage -=
- act_scan_laundry_weight;
- } else {
- vm_page_launder(m);
- inactq_shortage--;
+ else {
+ /*
+ * Calling vm_page_test_dirty() here
+ * would require acquisition of the
+ * object's write lock. However, during
+ * a page shortage, directing dirty
+ * pages into the laundry queue is only
+ * an optimization and not a
+ * requirement. Therefore, we simply
+ * rely on the opportunistic updates to
+ * the page's dirty field by the pmap.
+ */
+ if (m->dirty == 0) {
+ vm_page_deactivate(m);
+ inactq_shortage -=
+ act_scan_laundry_weight;
+ } else {
+ vm_page_launder(m);
+ inactq_shortage--;
+ }
}
+ } else if (!vm_batchqueue_insert(&rq, m))
+ panic("failed to requeue page %p", m);
+ }
+ if (mtx != NULL)
+ mtx_unlock(mtx);
+
+ vm_pagequeue_lock(pq);
+ /*
+ * XXXMJ this step could be avoided if we used a CLOCK scan for
+ * the active queue. This would involve modifying
+ * vm_page_enqueue() to insert after a marker page rather than
+ * at the tail of the queue.
+ */
+ VM_BATCHQ_FOREACH(&rq, m) {
+ if (vm_page_active(m) &&
+ (m->aflags & PGA_ENQUEUED) != 0) {
+ TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
+ TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
}
- } else
- vm_page_requeue_locked(m);
- vm_page_unlock(m);
+ }
}
+ TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_marker, plinks.q);
vm_pagequeue_unlock(pq);
if (pass > 0)
vm_swapout_run_idle();
Index: sys/vm/vm_pagequeue.h
===================================================================
--- sys/vm/vm_pagequeue.h
+++ sys/vm/vm_pagequeue.h
@@ -73,7 +73,23 @@
const char * const pq_name;
} __aligned(CACHE_LINE_SIZE);
+#ifndef VM_BATCHQUEUE_SIZE
+#define VM_BATCHQUEUE_SIZE 7
+#endif
+
+struct vm_batchqueue {
+ vm_page_t bq_pa[VM_BATCHQUEUE_SIZE];
+ int bq_cnt;
+} __aligned(CACHE_LINE_SIZE);
+
+#define VM_BATCHQ_FOREACH(batchqp, page) \
+ for (vm_page_t *__mp = &(batchqp)->bq_pa[0]; \
+ (page) = *__mp, __mp != &(batchqp)->bq_pa[(batchqp)->bq_cnt]; \
+ __mp++)
+
+#include <vm/uma.h>
#include <sys/pidctrl.h>
+
struct sysctl_oid;
/*
@@ -144,6 +160,7 @@
#define vm_pagequeue_assert_locked(pq) mtx_assert(&(pq)->pq_mutex, MA_OWNED)
#define vm_pagequeue_lock(pq) mtx_lock(&(pq)->pq_mutex)
#define vm_pagequeue_lockptr(pq) (&(pq)->pq_mutex)
+#define vm_pagequeue_trylock(pq) mtx_trylock(&(pq)->pq_mutex)
#define vm_pagequeue_unlock(pq) mtx_unlock(&(pq)->pq_mutex)
#define vm_domain_free_assert_locked(n) \
@@ -154,6 +171,8 @@
mtx_lock(vm_domain_free_lockptr((d)))
#define vm_domain_free_lockptr(d) \
(&(d)->vmd_free_mtx)
+#define vm_domain_free_trylock(d) \
+ mtx_trylock(vm_domain_free_lockptr((d)))
#define vm_domain_free_unlock(d) \
mtx_unlock(vm_domain_free_lockptr((d)))
@@ -172,14 +191,30 @@
vm_pagequeue_cnt_add(struct vm_pagequeue *pq, int addend)
{
-#ifdef notyet
vm_pagequeue_assert_locked(pq);
-#endif
pq->pq_cnt += addend;
}
#define vm_pagequeue_cnt_inc(pq) vm_pagequeue_cnt_add((pq), 1)
#define vm_pagequeue_cnt_dec(pq) vm_pagequeue_cnt_add((pq), -1)
+static inline void
+vm_batchqueue_init(struct vm_batchqueue *bq)
+{
+
+ bq->bq_cnt = 0;
+}
+
+static inline bool
+vm_batchqueue_insert(struct vm_batchqueue *bq, vm_page_t m)
+{
+
+ if (bq->bq_cnt < nitems(bq->bq_pa)) {
+ bq->bq_pa[bq->bq_cnt++] = m;
+ return (true);
+ }
+ return (false);
+}
+
void vm_domain_set(struct vm_domain *vmd);
void vm_domain_clear(struct vm_domain *vmd);
int vm_domain_allocate(struct vm_domain *vmd, int req, int npages);
Index: sys/vm/vm_phys.c
===================================================================
--- sys/vm/vm_phys.c
+++ sys/vm/vm_phys.c
@@ -354,9 +354,9 @@
m->order = order;
if (tail)
- TAILQ_INSERT_TAIL(&fl[order].pl, m, plinks.q);
+ TAILQ_INSERT_TAIL(&fl[order].pl, m, listq);
else
- TAILQ_INSERT_HEAD(&fl[order].pl, m, plinks.q);
+ TAILQ_INSERT_HEAD(&fl[order].pl, m, listq);
fl[order].lcnt++;
}
@@ -364,7 +364,7 @@
vm_freelist_rem(struct vm_freelist *fl, vm_page_t m, int order)
{
- TAILQ_REMOVE(&fl[order].pl, m, plinks.q);
+ TAILQ_REMOVE(&fl[order].pl, m, listq);
fl[order].lcnt--;
m->order = VM_NFREEORDER;
}
@@ -1196,7 +1196,7 @@
oind++) {
for (pind = 0; pind < VM_NFREEPOOL; pind++) {
fl = (*seg->free_queues)[pind];
- TAILQ_FOREACH(m_ret, &fl[oind].pl, plinks.q) {
+ TAILQ_FOREACH(m_ret, &fl[oind].pl, listq) {
/*
* Is the size of this allocation request
* larger than the largest block size?

File Metadata

Mime Type
text/plain
Expires
Fri, Apr 10, 11:35 PM (21 h, 38 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
31253847
Default Alt Text
D14893.id40876.diff (70 KB)

Event Timeline