Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F151679937
D14893.id40876.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
70 KB
Referenced Files
None
Subscribers
None
D14893.id40876.diff
View Options
Index: sys/amd64/include/vmparam.h
===================================================================
--- sys/amd64/include/vmparam.h
+++ sys/amd64/include/vmparam.h
@@ -227,4 +227,10 @@
#define ZERO_REGION_SIZE (2 * 1024 * 1024) /* 2MB */
+/*
+ * Use a fairly large batch size since we expect amd64 systems to have lots of
+ * memory.
+ */
+#define VM_BATCHQUEUE_SIZE 31
+
#endif /* _MACHINE_VMPARAM_H_ */
Index: sys/kern/subr_witness.c
===================================================================
--- sys/kern/subr_witness.c
+++ sys/kern/subr_witness.c
@@ -601,7 +601,6 @@
* CDEV
*/
{ "vm map (system)", &lock_class_mtx_sleep },
- { "vm pagequeue", &lock_class_mtx_sleep },
{ "vnode interlock", &lock_class_mtx_sleep },
{ "cdev", &lock_class_mtx_sleep },
{ NULL, NULL },
@@ -611,11 +610,11 @@
{ "vm map (user)", &lock_class_sx },
{ "vm object", &lock_class_rw },
{ "vm page", &lock_class_mtx_sleep },
- { "vm pagequeue", &lock_class_mtx_sleep },
{ "pmap pv global", &lock_class_rw },
{ "pmap", &lock_class_mtx_sleep },
{ "pmap pv list", &lock_class_rw },
{ "vm page free queue", &lock_class_mtx_sleep },
+ { "vm pagequeue", &lock_class_mtx_sleep },
{ NULL, NULL },
/*
* kqueue/VFS interaction
Index: sys/vm/vm_object.c
===================================================================
--- sys/vm/vm_object.c
+++ sys/vm/vm_object.c
@@ -721,14 +721,11 @@
vm_object_terminate_pages(vm_object_t object)
{
vm_page_t p, p_next;
- struct mtx *mtx, *mtx1;
- struct vm_pagequeue *pq, *pq1;
- int dequeued;
+ struct mtx *mtx;
VM_OBJECT_ASSERT_WLOCKED(object);
mtx = NULL;
- pq = NULL;
/*
* Free any remaining pageable pages. This also removes them from the
@@ -738,60 +735,23 @@
*/
TAILQ_FOREACH_SAFE(p, &object->memq, listq, p_next) {
vm_page_assert_unbusied(p);
- if ((object->flags & OBJ_UNMANAGED) == 0) {
+ if ((object->flags & OBJ_UNMANAGED) == 0)
/*
* vm_page_free_prep() only needs the page
* lock for managed pages.
*/
- mtx1 = vm_page_lockptr(p);
- if (mtx1 != mtx) {
- if (mtx != NULL)
- mtx_unlock(mtx);
- if (pq != NULL) {
- vm_pagequeue_cnt_add(pq, dequeued);
- vm_pagequeue_unlock(pq);
- pq = NULL;
- }
- mtx = mtx1;
- mtx_lock(mtx);
- }
- }
+ vm_page_change_lock(p, &mtx);
p->object = NULL;
if (p->wire_count != 0)
- goto unlist;
+ continue;
VM_CNT_INC(v_pfree);
p->flags &= ~PG_ZERO;
- if (p->queue != PQ_NONE) {
- KASSERT(p->queue < PQ_COUNT, ("vm_object_terminate: "
- "page %p is not queued", p));
- pq1 = vm_page_pagequeue(p);
- if (pq != pq1) {
- if (pq != NULL) {
- vm_pagequeue_cnt_add(pq, dequeued);
- vm_pagequeue_unlock(pq);
- }
- pq = pq1;
- vm_pagequeue_lock(pq);
- dequeued = 0;
- }
- p->queue = PQ_NONE;
- TAILQ_REMOVE(&pq->pq_pl, p, plinks.q);
- dequeued--;
- }
- if (vm_page_free_prep(p, true))
- continue;
-unlist:
- TAILQ_REMOVE(&object->memq, p, listq);
- }
- if (pq != NULL) {
- vm_pagequeue_cnt_add(pq, dequeued);
- vm_pagequeue_unlock(pq);
+
+ vm_page_free(p);
}
if (mtx != NULL)
mtx_unlock(mtx);
- vm_page_free_phys_pglist(&object->memq);
-
/*
* If the object contained any pages, then reset it to an empty state.
* None of the object's fields, including "resident_page_count", were
@@ -1974,7 +1934,6 @@
{
vm_page_t p, next;
struct mtx *mtx;
- struct pglist pgl;
VM_OBJECT_ASSERT_WLOCKED(object);
KASSERT((object->flags & OBJ_UNMANAGED) == 0 ||
@@ -1983,7 +1942,6 @@
if (object->resident_page_count == 0)
return;
vm_object_pip_add(object, 1);
- TAILQ_INIT(&pgl);
again:
p = vm_page_find_least(object, start);
mtx = NULL;
@@ -2038,12 +1996,10 @@
if ((options & OBJPR_NOTMAPPED) == 0 && object->ref_count != 0)
pmap_remove_all(p);
p->flags &= ~PG_ZERO;
- if (vm_page_free_prep(p, false))
- TAILQ_INSERT_TAIL(&pgl, p, listq);
+ vm_page_free(p);
}
if (mtx != NULL)
mtx_unlock(mtx);
- vm_page_free_phys_pglist(&pgl);
vm_object_pip_wakeup(object);
}
Index: sys/vm/vm_page.h
===================================================================
--- sys/vm/vm_page.h
+++ sys/vm/vm_page.h
@@ -94,7 +94,9 @@
* In general, operations on this structure's mutable fields are
* synchronized using either one of or a combination of the lock on the
* object that the page belongs to (O), the pool lock for the page (P),
- * or the lock for either the free or paging queue (Q). If a field is
+ * the per-domain lock for the free queues (F), or the page's queue
+ * lock (Q). The queue lock for a page depends on the value of its
+ * queue field and described in detail below. If a field is
* annotated below with two of these locks, then holding either lock is
* sufficient for read access, but both locks are required for write
* access. An annotation of (C) indicates that the field is immutable.
@@ -143,6 +145,28 @@
* causing the thread to block. vm_page_sleep_if_busy() can be used to
* sleep until the page's busy state changes, after which the caller
* must re-lookup the page and re-evaluate its state.
+ *
+ * The queue field is the index of the page queue containing the
+ * page, or PQ_NONE if the page is not enqueued. The queue lock of a
+ * page is the page queue lock corresponding to the page queue index,
+ * or the page lock (P) for the page. To modify the queue field, the
+ * queue lock for the old value of the field must be held. It is
+ * invalid for a page's queue field to transition between two distinct
+ * page queue indices. That is, when updating the queue field, either
+ * the new value or the old value must be PQ_NONE.
+ *
+ * To avoid contention on page queue locks, page queue operations
+ * (enqueue, dequeue, requeue) are batched using per-CPU queues.
+ * A deferred operation is requested by inserting an entry into a
+ * batch queue; the entry is simply a pointer to the page, and the
+ * request type is encoded in the page's aflags field using the values
+ * in PGA_QUEUE_STATE_MASK. The type-stability of struct vm_pages is
+ * crucial to this scheme since the processing of entries in a given
+ * batch queue may be deferred indefinitely. In particular, a page
+ * may be freed before its pending batch queue entries have been
+ * processed. The page lock (P) must be held to schedule a batched
+ * queue operation, and the page queue lock must be held in order to
+ * process batch queue entries for the page queue.
*/
#if PAGE_SIZE == 4096
@@ -174,7 +198,7 @@
TAILQ_ENTRY(vm_page) listq; /* pages in same object (O) */
vm_object_t object; /* which object am I in (O,P) */
vm_pindex_t pindex; /* offset into object (O,P) */
- vm_paddr_t phys_addr; /* physical address of page */
+ vm_paddr_t phys_addr; /* physical address of page (C) */
struct md_page md; /* machine dependent stuff */
u_int wire_count; /* wired down maps refs (P) */
volatile u_int busy_lock; /* busy owners lock */
@@ -182,11 +206,11 @@
uint16_t flags; /* page PG_* flags (P) */
uint8_t aflags; /* access is atomic */
uint8_t oflags; /* page VPO_* flags (O) */
- uint8_t queue; /* page queue index (P,Q) */
+ uint8_t queue; /* page queue index (Q) */
int8_t psind; /* pagesizes[] index (O) */
int8_t segind; /* vm_phys segment index (C) */
- uint8_t order; /* index of the buddy queue */
- uint8_t pool; /* vm_phys freepool index (Q) */
+ uint8_t order; /* index of the buddy queue (F) */
+ uint8_t pool; /* vm_phys freepool index (F) */
u_char act_count; /* page usage count (P) */
/* NOTE that these must support one bit per DEV_BSIZE in a page */
/* so, on normal X86 kernels, they must be at least 8 bits wide */
@@ -314,10 +338,32 @@
*
* PGA_EXECUTABLE may be set by pmap routines, and indicates that a page has
* at least one executable mapping. It is not consumed by the MI VM layer.
+ *
+ * PGA_ENQUEUED is set and cleared when a page is inserted into or removed
+ * from a page queue, respectively. It determines whether the plinks.q field
+ * of the page is valid. To set or clear this flag, the queue lock for the
+ * page must be held: the page queue lock corresponding to the page's "queue"
+ * field if its value is not PQ_NONE, and the page lock otherwise.
+ *
+ * PGA_DEQUEUE is set when the page is scheduled to be dequeued from a page
+ * queue, and cleared when the dequeue request is processed. A page may
+ * have PGA_DEQUEUE set and PGA_ENQUEUED cleared, for instance if a dequeue
+ * is requested after the page is scheduled to be enqueued but before it is
+ * actually inserted into the page queue. The page lock must be held to set
+ * this flag, and the queue lock for the page must be held to clear it.
+ *
+ * PGA_REQUEUE is set when the page is scheduled to be requeued in its page
+ * queue. The page lock must be held to set this flag, and the queue lock
+ * for the page must be held to clear it.
*/
#define PGA_WRITEABLE 0x01 /* page may be mapped writeable */
#define PGA_REFERENCED 0x02 /* page has been referenced */
#define PGA_EXECUTABLE 0x04 /* page may be mapped executable */
+#define PGA_ENQUEUED 0x08 /* page is enqueued in a page queue */
+#define PGA_DEQUEUE 0x10 /* page is due to be dequeued */
+#define PGA_REQUEUE 0x20 /* page is due to be requeued */
+
+#define PGA_QUEUE_STATE_MASK (PGA_ENQUEUED | PGA_DEQUEUE | PGA_REQUEUE)
/*
* Page flags. If changed at any other time than page allocation or
@@ -483,10 +529,10 @@
void vm_page_deactivate(vm_page_t);
void vm_page_deactivate_noreuse(vm_page_t);
void vm_page_dequeue(vm_page_t m);
+void vm_page_dequeue_lazy(vm_page_t m);
void vm_page_dequeue_locked(vm_page_t m);
vm_page_t vm_page_find_least(vm_object_t, vm_pindex_t);
-void vm_page_free_phys_pglist(struct pglist *tq);
-bool vm_page_free_prep(vm_page_t m, bool pagequeue_locked);
+bool vm_page_free_prep(vm_page_t m);
vm_page_t vm_page_getfake(vm_paddr_t paddr, vm_memattr_t memattr);
void vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr);
int vm_page_insert (vm_page_t, vm_object_t, vm_pindex_t);
Index: sys/vm/vm_page.c
===================================================================
--- sys/vm/vm_page.c
+++ sys/vm/vm_page.c
@@ -131,13 +131,11 @@
extern void uma_startup(void *, int);
extern int vmem_startup_count(void);
-/*
- * Associated with page of user-allocatable memory is a
- * page structure.
- */
-
struct vm_domain vm_dom[MAXMEMDOM];
+static DPCPU_DEFINE(struct vm_batchqueue, pqbatch[MAXMEMDOM][PQ_COUNT]);
+static DPCPU_DEFINE(struct vm_batchqueue, freeqbatch[MAXMEMDOM]);
+
struct mtx_padalign __exclusive_cache_line pa_lock[PA_LOCK_COUNT];
struct mtx_padalign __exclusive_cache_line vm_domainset_lock;
@@ -176,7 +174,7 @@
static void vm_page_alloc_check(vm_page_t m);
static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits);
-static void vm_page_enqueue(uint8_t queue, vm_page_t m);
+static void vm_page_enqueue_lazy(vm_page_t m, uint8_t queue);
static void vm_page_init(void *dummy);
static int vm_page_insert_after(vm_page_t m, vm_object_t object,
vm_pindex_t pindex, vm_page_t mpred);
@@ -1813,9 +1811,8 @@
*/
KASSERT(m != NULL, ("missing page"));
-#if VM_NRESERVLEVEL > 0
found:
-#endif
+ vm_page_dequeue(m);
vm_page_alloc_check(m);
/*
@@ -2012,8 +2009,10 @@
#if VM_NRESERVLEVEL > 0
found:
#endif
- for (m = m_ret; m < &m_ret[npages]; m++)
+ for (m = m_ret; m < &m_ret[npages]; m++) {
+ vm_page_dequeue(m);
vm_page_alloc_check(m);
+ }
/*
* Initialize the pages. Only the PG_ZERO flag is inherited.
@@ -2157,6 +2156,7 @@
goto again;
return (NULL);
}
+ vm_page_dequeue(m);
vm_page_alloc_check(m);
/*
@@ -2349,7 +2349,8 @@
vm_reserv_size(level)) - pa);
#endif
} else if (object->memattr == VM_MEMATTR_DEFAULT &&
- m->queue != PQ_NONE && !vm_page_busied(m)) {
+ m->queue != PQ_NONE &&
+ (m->aflags & PGA_DEQUEUE) == 0 && !vm_page_busied(m)) {
/*
* The page is allocated but eligible for
* relocation. Extend the current run by one
@@ -2500,7 +2501,9 @@
error = EINVAL;
else if (object->memattr != VM_MEMATTR_DEFAULT)
error = EINVAL;
- else if (m->queue != PQ_NONE && !vm_page_busied(m)) {
+ else if (m->queue != PQ_NONE &&
+ (m->aflags & PGA_DEQUEUE) == 0 &&
+ !vm_page_busied(m)) {
KASSERT(pmap_page_get_memattr(m) ==
VM_MEMATTR_DEFAULT,
("page %p has an unexpected memattr", m));
@@ -2550,7 +2553,7 @@
goto unlock;
}
KASSERT(m_new->wire_count == 0,
- ("page %p is wired", m_new));
+ ("page %p is wired", m));
/*
* Replace "m" with the new page. For
@@ -2560,9 +2563,10 @@
*/
if (object->ref_count != 0)
pmap_remove_all(m);
- m_new->aflags = m->aflags;
+ m_new->aflags = m->aflags &
+ ~PGA_QUEUE_STATE_MASK;
KASSERT(m_new->oflags == VPO_UNMANAGED,
- ("page %p is managed", m_new));
+ ("page %p is managed", m));
m_new->oflags = m->oflags & VPO_NOSYNC;
pmap_copy_page(m, m_new);
m_new->valid = m->valid;
@@ -2572,7 +2576,7 @@
vm_page_remque(m);
vm_page_replace_checked(m_new, object,
m->pindex, m);
- if (vm_page_free_prep(m, false))
+ if (vm_page_free_prep(m))
SLIST_INSERT_HEAD(&free, m,
plinks.s.ss);
@@ -2586,7 +2590,7 @@
m->flags &= ~PG_ZERO;
vm_page_remque(m);
vm_page_remove(m);
- if (vm_page_free_prep(m, false))
+ if (vm_page_free_prep(m))
SLIST_INSERT_HEAD(&free, m,
plinks.s.ss);
KASSERT(m->dirty == 0,
@@ -3029,113 +3033,288 @@
return (&vm_pagequeue_domain(m)->vmd_pagequeues[m->queue]);
}
+static struct mtx *
+vm_page_pagequeue_lockptr(vm_page_t m)
+{
+
+ if (m->queue == PQ_NONE)
+ return (NULL);
+ return (&vm_page_pagequeue(m)->pq_mutex);
+}
+
+static void
+vm_pqbatch_process(struct vm_pagequeue *pq, struct vm_batchqueue *bq,
+ uint8_t queue)
+{
+ vm_page_t m;
+ int delta;
+ uint8_t aflags;
+
+ vm_pagequeue_assert_locked(pq);
+
+ delta = 0;
+ VM_BATCHQ_FOREACH(bq, m) {
+ if (__predict_false(m->queue != queue))
+ continue;
+
+ aflags = m->aflags;
+ if ((aflags & PGA_DEQUEUE) != 0) {
+ if (__predict_true((aflags & PGA_ENQUEUED) != 0)) {
+ TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
+ delta--;
+ }
+
+ /*
+ * Synchronize with the page daemon, which may be
+ * simultaneously scanning this page with only the page
+ * lock held. We must be careful to avoid leaving the
+ * page in a state where it appears to belong to a page
+ * queue.
+ */
+ m->queue = PQ_NONE;
+ atomic_thread_fence_rel();
+ vm_page_aflag_clear(m, PGA_QUEUE_STATE_MASK);
+ } else if ((aflags & PGA_ENQUEUED) == 0) {
+ TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
+ delta++;
+ vm_page_aflag_set(m, PGA_ENQUEUED);
+ if (__predict_false((aflags & PGA_REQUEUE) != 0))
+ vm_page_aflag_clear(m, PGA_REQUEUE);
+ } else if ((aflags & PGA_REQUEUE) != 0) {
+ TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
+ TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
+ vm_page_aflag_clear(m, PGA_REQUEUE);
+ }
+ }
+ vm_batchqueue_init(bq);
+ vm_pagequeue_cnt_add(pq, delta);
+}
+
/*
- * vm_page_dequeue:
+ * vm_page_dequeue_lazy: [ internal use only ]
*
- * Remove the given page from its current page queue.
+ * Request removal of the given page from its current page
+ * queue. Physical removal from the queue may be deferred
+ * arbitrarily, and may be cancelled by later queue operations
+ * on that page.
*
* The page must be locked.
*/
void
-vm_page_dequeue(vm_page_t m)
+vm_page_dequeue_lazy(vm_page_t m)
{
+ struct vm_batchqueue *bq;
struct vm_pagequeue *pq;
+ int domain, queue;
vm_page_assert_locked(m);
- KASSERT(m->queue < PQ_COUNT, ("vm_page_dequeue: page %p is not queued",
- m));
- pq = vm_page_pagequeue(m);
- vm_pagequeue_lock(pq);
- m->queue = PQ_NONE;
- TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
- vm_pagequeue_cnt_dec(pq);
+
+ queue = m->queue;
+ if (queue == PQ_NONE)
+ return;
+ domain = vm_phys_domain(m);
+ pq = &VM_DOMAIN(domain)->vmd_pagequeues[queue];
+
+ vm_page_aflag_set(m, PGA_DEQUEUE);
+
+ critical_enter();
+ bq = DPCPU_PTR(pqbatch[domain][queue]);
+ if (vm_batchqueue_insert(bq, m)) {
+ critical_exit();
+ return;
+ }
+ if (!vm_pagequeue_trylock(pq)) {
+ critical_exit();
+ vm_pagequeue_lock(pq);
+ critical_enter();
+ bq = DPCPU_PTR(pqbatch[domain][queue]);
+ }
+ vm_pqbatch_process(pq, bq, queue);
+
+ /*
+ * The page may have been dequeued by another thread before we
+ * acquired the page queue lock. However, since we hold the
+ * page lock, the page's queue field cannot change a second
+ * time and we can safely clear PGA_DEQUEUE.
+ */
+ KASSERT(m->queue == queue || m->queue == PQ_NONE,
+ ("%s: page %p migrated between queues", __func__, m));
+ if (m->queue == queue) {
+ (void)vm_batchqueue_insert(bq, m);
+ vm_pqbatch_process(pq, bq, queue);
+ } else
+ vm_page_aflag_clear(m, PGA_DEQUEUE);
vm_pagequeue_unlock(pq);
+ critical_exit();
}
/*
* vm_page_dequeue_locked:
*
- * Remove the given page from its current page queue.
+ * Remove the page from its page queue, which must be locked.
+ * If the page lock is not held, there is no guarantee that the
+ * page will not be enqueued by another thread before this function
+ * returns. In this case, it is up to the caller to ensure that
+ * no other threads hold a reference to the page.
*
- * The page and page queue must be locked.
+ * The page queue lock must be held. If the page is not already
+ * logically dequeued, the page lock must be held as well.
*/
void
vm_page_dequeue_locked(vm_page_t m)
{
struct vm_pagequeue *pq;
- vm_page_lock_assert(m, MA_OWNED);
- pq = vm_page_pagequeue(m);
- vm_pagequeue_assert_locked(pq);
+ KASSERT(m->queue != PQ_NONE,
+ ("%s: page %p queue field is PQ_NONE", __func__, m));
+ vm_pagequeue_assert_locked(vm_page_pagequeue(m));
+ KASSERT((m->aflags & PGA_DEQUEUE) != 0 ||
+ mtx_owned(vm_page_lockptr(m)),
+ ("%s: queued unlocked page %p", __func__, m));
+
+ if ((m->aflags & PGA_ENQUEUED) != 0) {
+ pq = vm_page_pagequeue(m);
+ TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
+ vm_pagequeue_cnt_dec(pq);
+ }
+
+ /*
+ * Synchronize with the page daemon, which may be simultaneously
+ * scanning this page with only the page lock held. We must be careful
+ * to avoid leaving the page in a state where it appears to belong to a
+ * page queue.
+ */
m->queue = PQ_NONE;
- TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
- vm_pagequeue_cnt_dec(pq);
+ atomic_thread_fence_rel();
+ vm_page_aflag_clear(m, PGA_QUEUE_STATE_MASK);
+}
+
+/*
+ * vm_page_dequeue:
+ *
+ * Remove the page from whichever page queue it's in, if any.
+ * If the page lock is not held, there is no guarantee that the
+ * page will not be enqueued by another thread before this function
+ * returns. In this case, it is up to the caller to ensure that
+ * no other threads hold a reference to the page.
+ */
+void
+vm_page_dequeue(vm_page_t m)
+{
+ struct mtx *lock, *lock1;
+
+ lock = vm_page_pagequeue_lockptr(m);
+ for (;;) {
+ if (lock == NULL)
+ return;
+ mtx_lock(lock);
+ if ((lock1 = vm_page_pagequeue_lockptr(m)) == lock)
+ break;
+ mtx_unlock(lock);
+ lock = lock1;
+ }
+ KASSERT(lock == vm_page_pagequeue_lockptr(m),
+ ("%s: page %p migrated directly between queues", __func__, m));
+ vm_page_dequeue_locked(m);
+ mtx_unlock(lock);
}
/*
- * vm_page_enqueue:
+ * vm_page_enqueue_lazy:
*
- * Add the given page to the specified page queue.
+ * Schedule the given page for insertion into the specified page queue.
+ * Physical insertion of the page may be deferred indefinitely.
*
* The page must be locked.
*/
static void
-vm_page_enqueue(uint8_t queue, vm_page_t m)
+vm_page_enqueue_lazy(vm_page_t m, uint8_t queue)
{
+ struct vm_batchqueue *bq;
struct vm_pagequeue *pq;
+ int domain;
- vm_page_lock_assert(m, MA_OWNED);
- KASSERT(queue < PQ_COUNT,
- ("vm_page_enqueue: invalid queue %u request for page %p",
- queue, m));
+ vm_page_assert_locked(m);
+ KASSERT(m->queue == PQ_NONE && (m->aflags & PGA_QUEUE_STATE_MASK) == 0,
+ ("%s: page %p is already enqueued", __func__, m));
+
+ domain = vm_phys_domain(m);
pq = &vm_pagequeue_domain(m)->vmd_pagequeues[queue];
- vm_pagequeue_lock(pq);
+
+ /*
+ * The queue field might be changed back to PQ_NONE by a concurrent
+ * call to vm_page_dequeue(). In that case the batch queue entry will
+ * be a no-op.
+ */
m->queue = queue;
- TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
- vm_pagequeue_cnt_inc(pq);
+
+ critical_enter();
+ bq = DPCPU_PTR(pqbatch[domain][queue]);
+ if (__predict_true(vm_batchqueue_insert(bq, m))) {
+ critical_exit();
+ return;
+ }
+ if (!vm_pagequeue_trylock(pq)) {
+ critical_exit();
+ vm_pagequeue_lock(pq);
+ critical_enter();
+ bq = DPCPU_PTR(pqbatch[domain][queue]);
+ }
+ vm_pqbatch_process(pq, bq, queue);
+ (void)vm_batchqueue_insert(bq, m);
+ vm_pqbatch_process(pq, bq, queue);
vm_pagequeue_unlock(pq);
+ critical_exit();
}
/*
* vm_page_requeue:
*
- * Move the given page to the tail of its current page queue.
+ * Schedule a requeue of the given page.
*
* The page must be locked.
*/
void
vm_page_requeue(vm_page_t m)
{
+ struct vm_batchqueue *bq;
struct vm_pagequeue *pq;
+ int domain, queue;
vm_page_lock_assert(m, MA_OWNED);
KASSERT(m->queue != PQ_NONE,
- ("vm_page_requeue: page %p is not queued", m));
+ ("%s: page %p is not enqueued", __func__, m));
+
+ domain = vm_phys_domain(m);
+ queue = m->queue;
pq = vm_page_pagequeue(m);
- vm_pagequeue_lock(pq);
- TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
- TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
- vm_pagequeue_unlock(pq);
-}
-/*
- * vm_page_requeue_locked:
- *
- * Move the given page to the tail of its current page queue.
- *
- * The page queue must be locked.
- */
-void
-vm_page_requeue_locked(vm_page_t m)
-{
- struct vm_pagequeue *pq;
+ if (queue == PQ_NONE)
+ return;
- KASSERT(m->queue != PQ_NONE,
- ("vm_page_requeue_locked: page %p is not queued", m));
- pq = vm_page_pagequeue(m);
- vm_pagequeue_assert_locked(pq);
- TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
- TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
+ vm_page_aflag_set(m, PGA_REQUEUE);
+ critical_enter();
+ bq = DPCPU_PTR(pqbatch[domain][queue]);
+ if (__predict_true(vm_batchqueue_insert(bq, m))) {
+ critical_exit();
+ return;
+ }
+ if (!vm_pagequeue_trylock(pq)) {
+ critical_exit();
+ vm_pagequeue_lock(pq);
+ critical_enter();
+ bq = DPCPU_PTR(pqbatch[domain][queue]);
+ }
+ vm_pqbatch_process(pq, bq, queue);
+ KASSERT(m->queue == queue || m->queue == PQ_NONE,
+ ("%s: page %p migrated between queues", __func__, m));
+ if (m->queue == queue) {
+ (void)vm_batchqueue_insert(bq, m);
+ vm_pqbatch_process(pq, bq, queue);
+ } else
+ vm_page_aflag_clear(m, PGA_REQUEUE);
+ vm_pagequeue_unlock(pq);
+ critical_exit();
}
/*
@@ -3153,18 +3332,18 @@
int queue;
vm_page_lock_assert(m, MA_OWNED);
- if ((queue = m->queue) != PQ_ACTIVE) {
- if (m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0) {
- if (m->act_count < ACT_INIT)
- m->act_count = ACT_INIT;
- if (queue != PQ_NONE)
- vm_page_dequeue(m);
- vm_page_enqueue(PQ_ACTIVE, m);
- }
- } else {
- if (m->act_count < ACT_INIT)
+
+ if ((queue = m->queue) == PQ_ACTIVE || m->wire_count > 0 ||
+ (m->oflags & VPO_UNMANAGED) != 0) {
+ if (queue == PQ_ACTIVE && m->act_count < ACT_INIT)
m->act_count = ACT_INIT;
+ return;
}
+
+ vm_page_remque(m);
+ if (m->act_count < ACT_INIT)
+ m->act_count = ACT_INIT;
+ vm_page_enqueue_lazy(m, PQ_ACTIVE);
}
/*
@@ -3175,11 +3354,10 @@
* the page to the free list only if this function returns true.
*
* The object must be locked. The page must be locked if it is
- * managed. For a queued managed page, the pagequeue_locked
- * argument specifies whether the page queue is already locked.
+ * managed.
*/
bool
-vm_page_free_prep(vm_page_t m, bool pagequeue_locked)
+vm_page_free_prep(vm_page_t m)
{
#if defined(DIAGNOSTIC) && defined(PHYS_TO_DMAP)
@@ -3195,14 +3373,14 @@
if ((m->oflags & VPO_UNMANAGED) == 0) {
vm_page_lock_assert(m, MA_OWNED);
KASSERT(!pmap_page_is_mapped(m),
- ("vm_page_free_toq: freeing mapped page %p", m));
+ ("vm_page_free_prep: freeing mapped page %p", m));
} else
KASSERT(m->queue == PQ_NONE,
- ("vm_page_free_toq: unmanaged page %p is queued", m));
+ ("vm_page_free_prep: unmanaged page %p is queued", m));
VM_CNT_INC(v_tfree);
if (vm_page_sbusied(m))
- panic("vm_page_free: freeing busy page %p", m);
+ panic("vm_page_free_prep: freeing busy page %p", m);
vm_page_remove(m);
@@ -3218,21 +3396,23 @@
return (false);
}
- if (m->queue != PQ_NONE) {
- if (pagequeue_locked)
- vm_page_dequeue_locked(m);
- else
- vm_page_dequeue(m);
- }
+ /*
+ * Pages need not be dequeued before they are returned to the physical
+ * memory allocator, but they must at least be marked for a deferred
+ * dequeue.
+ */
+ if ((m->oflags & VPO_UNMANAGED) == 0)
+ vm_page_dequeue_lazy(m);
+
m->valid = 0;
vm_page_undirty(m);
if (m->wire_count != 0)
- panic("vm_page_free: freeing wired page %p", m);
+ panic("vm_page_free_prep: freeing wired page %p", m);
if (m->hold_count != 0) {
m->flags &= ~PG_ZERO;
KASSERT((m->flags & PG_UNHOLDFREE) == 0,
- ("vm_page_free: freeing PG_UNHOLDFREE page %p", m));
+ ("vm_page_free_prep: freeing PG_UNHOLDFREE page %p", m));
m->flags |= PG_UNHOLDFREE;
return (false);
}
@@ -3251,36 +3431,6 @@
return (true);
}
-void
-vm_page_free_phys_pglist(struct pglist *tq)
-{
- struct vm_domain *vmd;
- vm_page_t m;
- int cnt;
-
- if (TAILQ_EMPTY(tq))
- return;
- vmd = NULL;
- cnt = 0;
- TAILQ_FOREACH(m, tq, listq) {
- if (vmd != vm_pagequeue_domain(m)) {
- if (vmd != NULL) {
- vm_domain_free_unlock(vmd);
- vm_domain_freecnt_inc(vmd, cnt);
- cnt = 0;
- }
- vmd = vm_pagequeue_domain(m);
- vm_domain_free_lock(vmd);
- }
- vm_phys_free_pages(m, 0);
- cnt++;
- }
- if (vmd != NULL) {
- vm_domain_free_unlock(vmd);
- vm_domain_freecnt_inc(vmd, cnt);
- }
-}
-
/*
* vm_page_free_toq:
*
@@ -3293,15 +3443,35 @@
void
vm_page_free_toq(vm_page_t m)
{
+ struct vm_batchqueue *cpubq, bq;
struct vm_domain *vmd;
+ int domain, freed;
+
+ if (!vm_page_free_prep(m))
+ return;
+
+ domain = vm_phys_domain(m);
+ vmd = VM_DOMAIN(domain);
- if (!vm_page_free_prep(m, false))
+ critical_enter();
+ cpubq = DPCPU_PTR(freeqbatch[domain]);
+ if (vm_batchqueue_insert(cpubq, m)) {
+ critical_exit();
return;
- vmd = vm_pagequeue_domain(m);
+ }
+ memcpy(&bq, cpubq, sizeof(bq));
+ vm_batchqueue_init(cpubq);
+ critical_exit();
+
vm_domain_free_lock(vmd);
vm_phys_free_pages(m, 0);
+ freed = 1;
+ VM_BATCHQ_FOREACH(&bq, m) {
+ vm_phys_free_pages(m, 0);
+ freed++;
+ }
vm_domain_free_unlock(vmd);
- vm_domain_freecnt_inc(vmd, 1);
+ vm_domain_freecnt_inc(vmd, freed);
}
/*
@@ -3318,23 +3488,18 @@
vm_page_free_pages_toq(struct spglist *free, bool update_wire_count)
{
vm_page_t m;
- struct pglist pgl;
int count;
if (SLIST_EMPTY(free))
return;
count = 0;
- TAILQ_INIT(&pgl);
while ((m = SLIST_FIRST(free)) != NULL) {
count++;
SLIST_REMOVE_HEAD(free, plinks.s.ss);
- if (vm_page_free_prep(m, false))
- TAILQ_INSERT_TAIL(&pgl, m, listq);
+ vm_page_free_toq(m);
}
- vm_page_free_phys_pglist(&pgl);
-
if (update_wire_count)
vm_wire_sub(count);
}
@@ -3393,22 +3558,25 @@
KASSERT(queue < PQ_COUNT || queue == PQ_NONE,
("vm_page_unwire: invalid queue %u request for page %p",
queue, m));
+ if ((m->oflags & VPO_UNMANAGED) == 0)
+ vm_page_assert_locked(m);
unwired = vm_page_unwire_noq(m);
- if (unwired && (m->oflags & VPO_UNMANAGED) == 0 && m->object != NULL) {
- if (m->queue == queue) {
+ if (!unwired || (m->oflags & VPO_UNMANAGED) != 0 || m->object == NULL)
+ return (unwired);
+
+ if (m->queue == queue) {
+ if (queue == PQ_ACTIVE)
+ vm_page_reference(m);
+ else if (queue != PQ_NONE)
+ vm_page_requeue(m);
+ } else {
+ vm_page_dequeue(m);
+ if (queue != PQ_NONE) {
+ vm_page_enqueue_lazy(m, queue);
if (queue == PQ_ACTIVE)
- vm_page_reference(m);
- else if (queue != PQ_NONE)
- vm_page_requeue(m);
- } else {
- vm_page_remque(m);
- if (queue != PQ_NONE) {
- vm_page_enqueue(queue, m);
- if (queue == PQ_ACTIVE)
- /* Initialize act_count. */
- vm_page_activate(m);
- }
+ /* Initialize act_count. */
+ vm_page_activate(m);
}
}
return (unwired);
@@ -3456,35 +3624,33 @@
* The page must be locked.
*/
static inline void
-_vm_page_deactivate(vm_page_t m, boolean_t noreuse)
+_vm_page_deactivate(vm_page_t m, bool noreuse)
{
struct vm_pagequeue *pq;
- int queue;
vm_page_assert_locked(m);
- if (m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0) {
+ if (m->wire_count > 0 || (m->oflags & VPO_UNMANAGED) != 0)
+ return;
+
+ if (noreuse) {
+ /* This is slower than it could be. */
+ vm_page_remque(m);
pq = &vm_pagequeue_domain(m)->vmd_pagequeues[PQ_INACTIVE];
- /* Avoid multiple acquisitions of the inactive queue lock. */
- queue = m->queue;
- if (queue == PQ_INACTIVE) {
- vm_pagequeue_lock(pq);
- vm_page_dequeue_locked(m);
- } else {
- if (queue != PQ_NONE)
- vm_page_dequeue(m);
- vm_pagequeue_lock(pq);
- }
+ vm_pagequeue_lock(pq);
m->queue = PQ_INACTIVE;
- if (noreuse)
- TAILQ_INSERT_BEFORE(
- &vm_pagequeue_domain(m)->vmd_inacthead, m,
- plinks.q);
- else
- TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
+ TAILQ_INSERT_BEFORE(&vm_pagequeue_domain(m)->vmd_inacthead, m,
+ plinks.q);
vm_pagequeue_cnt_inc(pq);
+ vm_page_aflag_set(m, PGA_ENQUEUED);
+ if ((m->aflags & PGA_REQUEUE) != 0)
+ vm_page_aflag_clear(m, PGA_REQUEUE);
vm_pagequeue_unlock(pq);
- }
+ } else if (!vm_page_inactive(m)) {
+ vm_page_remque(m);
+ vm_page_enqueue_lazy(m, PQ_INACTIVE);
+ } else
+ vm_page_requeue(m);
}
/*
@@ -3497,7 +3663,7 @@
vm_page_deactivate(vm_page_t m)
{
- _vm_page_deactivate(m, FALSE);
+ _vm_page_deactivate(m, false);
}
/*
@@ -3510,7 +3676,7 @@
vm_page_deactivate_noreuse(vm_page_t m)
{
- _vm_page_deactivate(m, TRUE);
+ _vm_page_deactivate(m, true);
}
/*
@@ -3523,13 +3689,14 @@
{
vm_page_assert_locked(m);
- if (m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0) {
- if (m->queue == PQ_LAUNDRY)
- vm_page_requeue(m);
- else {
- vm_page_remque(m);
- vm_page_enqueue(PQ_LAUNDRY, m);
- }
+ if (m->wire_count > 0 || (m->oflags & VPO_UNMANAGED) != 0)
+ return;
+
+ if (m->queue == PQ_LAUNDRY)
+ vm_page_requeue(m);
+ else {
+ vm_page_remque(m);
+ vm_page_enqueue_lazy(m, PQ_LAUNDRY);
}
}
@@ -3545,9 +3712,9 @@
vm_page_assert_locked(m);
KASSERT(m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0,
("page %p already unswappable", m));
- if (m->queue != PQ_NONE)
- vm_page_dequeue(m);
- vm_page_enqueue(PQ_UNSWAPPABLE, m);
+
+ vm_page_remque(m);
+ vm_page_enqueue_lazy(m, PQ_UNSWAPPABLE);
}
/*
Index: sys/vm/vm_pageout.c
===================================================================
--- sys/vm/vm_pageout.c
+++ sys/vm/vm_pageout.c
@@ -201,11 +201,9 @@
CTLFLAG_RW, &vm_page_max_wired, 0, "System-wide limit to wired page count");
static u_int isqrt(u_int num);
-static boolean_t vm_pageout_fallback_object_lock(vm_page_t, vm_page_t *);
static int vm_pageout_launder(struct vm_domain *vmd, int launder,
bool in_shortfall);
static void vm_pageout_laundry_worker(void *arg);
-static boolean_t vm_pageout_page_lock(vm_page_t, vm_page_t *);
/*
* Initialize a dummy page for marking the caller's place in the specified
@@ -222,99 +220,68 @@
marker->busy_lock = VPB_SINGLE_EXCLUSIVER;
marker->queue = queue;
marker->hold_count = 1;
+ if (queue != PQ_NONE)
+ marker->aflags = PGA_ENQUEUED;
}
-/*
- * vm_pageout_fallback_object_lock:
- *
- * Lock vm object currently associated with `m'. VM_OBJECT_TRYWLOCK is
- * known to have failed and page queue must be either PQ_ACTIVE or
- * PQ_INACTIVE. To avoid lock order violation, unlock the page queue
- * while locking the vm object. Use marker page to detect page queue
- * changes and maintain notion of next page on page queue. Return
- * TRUE if no changes were detected, FALSE otherwise. vm object is
- * locked on return.
- *
- * This function depends on both the lock portion of struct vm_object
- * and normal struct vm_page being type stable.
- */
-static boolean_t
-vm_pageout_fallback_object_lock(vm_page_t m, vm_page_t *next)
+static inline bool
+vm_pageout_page_queued(vm_page_t m, int queue)
{
- struct vm_page marker;
- struct vm_pagequeue *pq;
- boolean_t unchanged;
- u_short queue;
- vm_object_t object;
- queue = m->queue;
- vm_pageout_init_marker(&marker, queue);
- pq = vm_page_pagequeue(m);
- object = m->object;
-
- TAILQ_INSERT_AFTER(&pq->pq_pl, m, &marker, plinks.q);
- vm_pagequeue_unlock(pq);
- vm_page_unlock(m);
- VM_OBJECT_WLOCK(object);
- vm_page_lock(m);
- vm_pagequeue_lock(pq);
+ vm_page_assert_locked(m);
- /*
- * The page's object might have changed, and/or the page might
- * have moved from its original position in the queue. If the
- * page's object has changed, then the caller should abandon
- * processing the page because the wrong object lock was
- * acquired. Use the marker's plinks.q, not the page's, to
- * determine if the page has been moved. The state of the
- * page's plinks.q can be indeterminate; whereas, the marker's
- * plinks.q must be valid.
- */
- *next = TAILQ_NEXT(&marker, plinks.q);
- unchanged = m->object == object &&
- m == TAILQ_PREV(&marker, pglist, plinks.q);
- KASSERT(!unchanged || m->queue == queue,
- ("page %p queue %d %d", m, queue, m->queue));
- TAILQ_REMOVE(&pq->pq_pl, &marker, plinks.q);
- return (unchanged);
+ if ((m->aflags & PGA_DEQUEUE) != 0)
+ return (false);
+ atomic_thread_fence_acq();
+ return (m->queue == queue);
}
/*
- * Lock the page while holding the page queue lock. Use marker page
- * to detect page queue changes and maintain notion of next page on
- * page queue. Return TRUE if no changes were detected, FALSE
- * otherwise. The page is locked on return. The page queue lock might
- * be dropped and reacquired.
+ * Add a small number of queued pages to a batch queue for later processing
+ * without the corresponding queue lock held. The caller must have enqueued a
+ * marker page at the desired start point for the scan.
*
- * This function depends on normal struct vm_page being type stable.
+ * When processing the batch queue, vm_pageout_page_queued() must be used to
+ * determine whether the page was logically dequeued by another thread. Once
+ * this check is performed, the page lock guarantees that the page will not be
+ * disassociated from the queue.
*/
-static boolean_t
-vm_pageout_page_lock(vm_page_t m, vm_page_t *next)
+static inline void
+vm_pageout_collect_batch(struct vm_pagequeue *pq, struct vm_batchqueue *bq,
+ vm_page_t marker, int maxcollect, const bool dequeue)
{
- struct vm_page marker;
- struct vm_pagequeue *pq;
- boolean_t unchanged;
- u_short queue;
+ vm_page_t m;
- vm_page_lock_assert(m, MA_NOTOWNED);
- if (vm_page_trylock(m))
- return (TRUE);
+ vm_pagequeue_assert_locked(pq);
- queue = m->queue;
- vm_pageout_init_marker(&marker, queue);
- pq = vm_page_pagequeue(m);
+ vm_batchqueue_init(bq);
+ for (m = TAILQ_NEXT(marker, plinks.q); m != NULL && maxcollect > 0;
+ m = TAILQ_NEXT(m, plinks.q), maxcollect--) {
+ VM_CNT_INC(v_pdpages);
+ if (__predict_false((m->flags & PG_MARKER) != 0))
+ continue;
- TAILQ_INSERT_AFTER(&pq->pq_pl, m, &marker, plinks.q);
- vm_pagequeue_unlock(pq);
- vm_page_lock(m);
- vm_pagequeue_lock(pq);
+ KASSERT((m->aflags & PGA_ENQUEUED) != 0,
+ ("page %p not enqueued", m));
+ KASSERT((m->flags & PG_FICTITIOUS) == 0,
+ ("Fictitious page %p cannot be in page queue", m));
+ KASSERT((m->oflags & VPO_UNMANAGED) == 0,
+ ("Unmanaged page %p cannot be in page queue", m));
- /* Page queue might have changed. */
- *next = TAILQ_NEXT(&marker, plinks.q);
- unchanged = m == TAILQ_PREV(&marker, pglist, plinks.q);
- KASSERT(!unchanged || m->queue == queue,
- ("page %p queue %d %d", m, queue, m->queue));
- TAILQ_REMOVE(&pq->pq_pl, &marker, plinks.q);
- return (unchanged);
+ if (!vm_batchqueue_insert(bq, m))
+ break;
+ if (dequeue) {
+ TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
+ vm_page_aflag_clear(m, PGA_ENQUEUED);
+ }
+ }
+ TAILQ_REMOVE(&pq->pq_pl, marker, plinks.q);
+ if (__predict_true(m != NULL))
+ TAILQ_INSERT_BEFORE(m, marker, plinks.q);
+ else
+ TAILQ_INSERT_TAIL(&pq->pq_pl, marker, plinks.q);
+ if (dequeue)
+ vm_pagequeue_cnt_add(pq, -bq->bq_cnt);
}
/*
@@ -370,12 +337,12 @@
break;
}
vm_page_test_dirty(p);
- if (p->dirty == 0) {
+ if (p->dirty == 0 || !vm_page_in_laundry(p)) {
ib = 0;
break;
}
vm_page_lock(p);
- if (!vm_page_in_laundry(p) || vm_page_held(p)) {
+ if (vm_page_held(p)) {
vm_page_unlock(p);
ib = 0;
break;
@@ -398,10 +365,10 @@
if ((p = vm_page_next(ps)) == NULL || vm_page_busied(p))
break;
vm_page_test_dirty(p);
- if (p->dirty == 0)
+ if (p->dirty == 0 || !vm_page_in_laundry(p))
break;
vm_page_lock(p);
- if (!vm_page_in_laundry(p) || vm_page_held(p)) {
+ if (vm_page_held(p)) {
vm_page_unlock(p);
break;
}
@@ -692,12 +659,14 @@
static int
vm_pageout_launder(struct vm_domain *vmd, int launder, bool in_shortfall)
{
+ struct vm_batchqueue bq;
struct vm_pagequeue *pq;
+ struct mtx *mtx;
vm_object_t object;
- vm_page_t m, next;
- int act_delta, error, maxscan, numpagedout, starting_target;
+ vm_page_t m;
+ int act_delta, error, maxscan, numpagedout, queue, starting_target;
int vnodes_skipped;
- bool pageout_ok, queue_locked;
+ bool obj_locked, pageout_ok;
starting_target = launder;
vnodes_skipped = 0;
@@ -716,186 +685,210 @@
* swap devices are configured.
*/
if (atomic_load_acq_int(&swapdev_enabled))
- pq = &vmd->vmd_pagequeues[PQ_UNSWAPPABLE];
+ queue = PQ_UNSWAPPABLE;
else
- pq = &vmd->vmd_pagequeues[PQ_LAUNDRY];
+ queue = PQ_LAUNDRY;
+ pq = &vmd->vmd_pagequeues[queue];
scan:
vm_pagequeue_lock(pq);
- maxscan = pq->pq_cnt;
- queue_locked = true;
- for (m = TAILQ_FIRST(&pq->pq_pl);
- m != NULL && maxscan-- > 0 && launder > 0;
- m = next) {
- vm_pagequeue_assert_locked(pq);
- KASSERT(queue_locked, ("unlocked laundry queue"));
- KASSERT(vm_page_in_laundry(m),
- ("page %p has an inconsistent queue", m));
- next = TAILQ_NEXT(m, plinks.q);
- if ((m->flags & PG_MARKER) != 0)
- continue;
- KASSERT((m->flags & PG_FICTITIOUS) == 0,
- ("PG_FICTITIOUS page %p cannot be in laundry queue", m));
- KASSERT((m->oflags & VPO_UNMANAGED) == 0,
- ("VPO_UNMANAGED page %p cannot be in laundry queue", m));
- if (!vm_pageout_page_lock(m, &next) || m->hold_count != 0) {
- vm_page_unlock(m);
- continue;
- }
- if (m->wire_count != 0) {
- vm_page_dequeue_locked(m);
- vm_page_unlock(m);
- continue;
- }
- object = m->object;
- if ((!VM_OBJECT_TRYWLOCK(object) &&
- (!vm_pageout_fallback_object_lock(m, &next) ||
- vm_page_held(m))) || vm_page_busied(m)) {
- VM_OBJECT_WUNLOCK(object);
- if (m->wire_count != 0 && vm_page_pagequeue(m) == pq)
- vm_page_dequeue_locked(m);
- vm_page_unlock(m);
- continue;
- }
-
- /*
- * Unlock the laundry queue, invalidating the 'next' pointer.
- * Use a marker to remember our place in the laundry queue.
- */
- TAILQ_INSERT_AFTER(&pq->pq_pl, m, &vmd->vmd_laundry_marker,
- plinks.q);
+ TAILQ_INSERT_HEAD(&pq->pq_pl, &vmd->vmd_laundry_marker, plinks.q);
+ for (maxscan = pq->pq_cnt; maxscan > 0 && launder > 0 &&
+ TAILQ_NEXT(&vmd->vmd_laundry_marker, plinks.q) != NULL;
+ maxscan -= bq.bq_cnt) {
+ vm_pageout_collect_batch(pq, &bq, &vmd->vmd_laundry_marker,
+ min(maxscan, launder), false);
vm_pagequeue_unlock(pq);
- queue_locked = false;
- /*
- * Invalid pages can be easily freed. They cannot be
- * mapped; vm_page_free() asserts this.
- */
- if (m->valid == 0)
- goto free_page;
+ mtx = NULL;
+ obj_locked = false;
+ object = NULL;
+ VM_BATCHQ_FOREACH(&bq, m) {
+ vm_page_change_lock(m, &mtx);
- /*
- * If the page has been referenced and the object is not dead,
- * reactivate or requeue the page depending on whether the
- * object is mapped.
- */
- if ((m->aflags & PGA_REFERENCED) != 0) {
- vm_page_aflag_clear(m, PGA_REFERENCED);
- act_delta = 1;
- } else
- act_delta = 0;
- if (object->ref_count != 0)
- act_delta += pmap_ts_referenced(m);
- else {
- KASSERT(!pmap_page_is_mapped(m),
- ("page %p is mapped", m));
- }
- if (act_delta != 0) {
- if (object->ref_count != 0) {
- VM_CNT_INC(v_reactivated);
- vm_page_activate(m);
+recheck:
+ /*
+ * The page may have been disassociated from the queue
+ * while locks were dropped.
+ */
+ if (!vm_pageout_page_queued(m, queue))
+ continue;
- /*
- * Increase the activation count if the page
- * was referenced while in the laundry queue.
- * This makes it less likely that the page will
- * be returned prematurely to the inactive
- * queue.
- */
- m->act_count += act_delta + ACT_ADVANCE;
+ /*
+ * A requeue was requested, so this page gets a second
+ * chance.
+ */
+ if ((m->aflags & PGA_REQUEUE) != 0) {
+ vm_page_requeue(m);
+ continue;
+ }
- /*
- * If this was a background laundering, count
- * activated pages towards our target. The
- * purpose of background laundering is to ensure
- * that pages are eventually cycled through the
- * laundry queue, and an activation is a valid
- * way out.
- */
- if (!in_shortfall)
- launder--;
- goto drop_page;
- } else if ((object->flags & OBJ_DEAD) == 0)
- goto requeue_page;
- }
+ /*
+ * Held pages are essentially stuck in the queue.
+ *
+ * Wired pages may not be freed. Complete their removal
+ * from the queue now to avoid needless revisits during
+ * future scans.
+ */
+ if (m->hold_count != 0)
+ continue;
+ if (m->wire_count != 0) {
+ vm_page_dequeue_lazy(m);
+ continue;
+ }
- /*
- * If the page appears to be clean at the machine-independent
- * layer, then remove all of its mappings from the pmap in
- * anticipation of freeing it. If, however, any of the page's
- * mappings allow write access, then the page may still be
- * modified until the last of those mappings are removed.
- */
- if (object->ref_count != 0) {
- vm_page_test_dirty(m);
- if (m->dirty == 0)
- pmap_remove_all(m);
- }
+ if (object != m->object) {
+ if (obj_locked) {
+ VM_OBJECT_WUNLOCK(object);
+ obj_locked = false;
+ }
+ object = m->object;
+ }
+ if (!obj_locked) {
+ if (!VM_OBJECT_TRYWLOCK(object)) {
+ mtx_unlock(mtx);
+ VM_OBJECT_WLOCK(object);
+ obj_locked = true;
+ mtx_lock(mtx);
+ goto recheck;
+ } else
+ obj_locked = true;
+ }
- /*
- * Clean pages are freed, and dirty pages are paged out unless
- * they belong to a dead object. Requeueing dirty pages from
- * dead objects is pointless, as they are being paged out and
- * freed by the thread that destroyed the object.
- */
- if (m->dirty == 0) {
-free_page:
- vm_page_free(m);
- VM_CNT_INC(v_dfree);
- } else if ((object->flags & OBJ_DEAD) == 0) {
- if (object->type != OBJT_SWAP &&
- object->type != OBJT_DEFAULT)
- pageout_ok = true;
- else if (disable_swap_pageouts)
- pageout_ok = false;
- else
- pageout_ok = true;
- if (!pageout_ok) {
-requeue_page:
- vm_pagequeue_lock(pq);
- queue_locked = true;
- vm_page_requeue_locked(m);
- goto drop_page;
+ if (vm_page_busied(m))
+ continue;
+
+ /*
+ * Invalid pages can be easily freed. They cannot be
+ * mapped; vm_page_free() asserts this.
+ */
+ if (m->valid == 0)
+ goto free_page;
+
+ /*
+ * If the page has been referenced and the object is not
+ * dead, reactivate or requeue the page depending on
+ * whether the object is mapped.
+ */
+ if ((m->aflags & PGA_REFERENCED) != 0) {
+ vm_page_aflag_clear(m, PGA_REFERENCED);
+ act_delta = 1;
+ } else
+ act_delta = 0;
+ if (object->ref_count != 0)
+ act_delta += pmap_ts_referenced(m);
+ else {
+ KASSERT(!pmap_page_is_mapped(m),
+ ("page %p is mapped", m));
+ }
+ if (act_delta != 0) {
+ if (object->ref_count != 0) {
+ VM_CNT_INC(v_reactivated);
+ vm_page_activate(m);
+
+ /*
+ * Increase the activation count if the
+ * page was referenced while in the
+ * laundry queue. This makes it less
+ * likely that the page will be returned
+ * prematurely to the inactive queue.
+ */
+ m->act_count += act_delta + ACT_ADVANCE;
+
+ /*
+ * If this was a background laundering,
+ * count activated pages towards our
+ * target. The purpose of background
+ * laundering is to ensure that pages
+ * are eventually cycled through the
+ * laundry queue, and an activation is a
+ * valid way out.
+ */
+ if (!in_shortfall)
+ launder--;
+ continue;
+ } else if ((object->flags & OBJ_DEAD) == 0) {
+ vm_page_requeue(m);
+ continue;
+ }
}
/*
- * Form a cluster with adjacent, dirty pages from the
- * same object, and page out that entire cluster.
- *
- * The adjacent, dirty pages must also be in the
- * laundry. However, their mappings are not checked
- * for new references. Consequently, a recently
- * referenced page may be paged out. However, that
- * page will not be prematurely reclaimed. After page
- * out, the page will be placed in the inactive queue,
- * where any new references will be detected and the
- * page reactivated.
+ * If the page appears to be clean at the
+ * machine-independent layer, then remove all of its
+ * mappings from the pmap in anticipation of freeing it.
+ * If, however, any of the page's mappings allow write
+ * access, then the page may still be modified until the
+ * last of those mappings are removed.
*/
- error = vm_pageout_clean(m, &numpagedout);
- if (error == 0) {
- launder -= numpagedout;
- maxscan -= numpagedout - 1;
- } else if (error == EDEADLK) {
- pageout_lock_miss++;
- vnodes_skipped++;
+ if (object->ref_count != 0) {
+ vm_page_test_dirty(m);
+ if (m->dirty == 0)
+ pmap_remove_all(m);
+ }
+
+ /*
+ * Clean pages are freed, and dirty pages are paged out
+ * unless they belong to a dead object. Requeueing
+ * dirty pages from dead objects is pointless, as they
+ * are being paged out and freed by the thread that
+ * destroyed the object.
+ */
+ if (m->dirty == 0) {
+free_page:
+ vm_page_free(m);
+ VM_CNT_INC(v_dfree);
+ } else if ((object->flags & OBJ_DEAD) == 0) {
+ if (object->type != OBJT_SWAP &&
+ object->type != OBJT_DEFAULT)
+ pageout_ok = true;
+ else if (disable_swap_pageouts)
+ pageout_ok = false;
+ else
+ pageout_ok = true;
+ if (!pageout_ok) {
+ vm_page_requeue(m);
+ continue;
+ }
+
+ /*
+ * Form a cluster with adjacent, dirty pages from the
+ * same object, and page out that entire cluster.
+ *
+ * The adjacent, dirty pages must also be in the
+ * laundry. However, their mappings are not checked
+ * for new references. Consequently, a recently
+ * referenced page may be paged out. However, that
+ * page will not be prematurely reclaimed. After page
+ * out, the page will be placed in the inactive queue,
+ * where any new references will be detected and the
+ * page reactivated.
+ */
+ error = vm_pageout_clean(m, &numpagedout);
+ if (error == 0) {
+ launder -= numpagedout;
+ maxscan -= numpagedout - 1;
+ } else if (error == EDEADLK) {
+ pageout_lock_miss++;
+ vnodes_skipped++;
+ }
+ mtx = NULL;
+ obj_locked = false;
}
- goto relock_queue;
- }
-drop_page:
- vm_page_unlock(m);
- VM_OBJECT_WUNLOCK(object);
-relock_queue:
- if (!queue_locked) {
- vm_pagequeue_lock(pq);
- queue_locked = true;
}
- next = TAILQ_NEXT(&vmd->vmd_laundry_marker, plinks.q);
- TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_laundry_marker, plinks.q);
+ if (mtx != NULL)
+ mtx_unlock(mtx);
+ if (obj_locked)
+ VM_OBJECT_WUNLOCK(object);
+
+ vm_pagequeue_lock(pq);
}
+ TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_laundry_marker, plinks.q);
vm_pagequeue_unlock(pq);
if (launder > 0 && pq == &vmd->vmd_pagequeues[PQ_UNSWAPPABLE]) {
- pq = &vmd->vmd_pagequeues[PQ_LAUNDRY];
+ queue = PQ_LAUNDRY;
+ pq = &vmd->vmd_pagequeues[queue];
goto scan;
}
@@ -1091,6 +1084,65 @@
}
}
+static int
+vm_pageout_free_pages(vm_object_t object, vm_page_t m, struct mtx **mtxp)
+{
+ vm_page_t p, pp;
+ vm_pindex_t start;
+ int pcount, count;
+
+ pcount = MAX(object->iosize / PAGE_SIZE, 1);
+ count = 1;
+ if (pcount == 1) {
+ vm_page_free(m);
+ goto out;
+ }
+
+ /* Find the first page in the block. */
+ start = m->pindex - (m->pindex % pcount);
+ for (p = m; p->pindex > start && (pp = vm_page_prev(p)) != NULL;
+ p = pp);
+
+ /* Free the original page so we don't validate it twice. */
+ if (p == m)
+ p = vm_page_next(m);
+ vm_page_free(m);
+ /* Iterate through the block range and free compatible pages. */
+ /* XXX Fix cache miss on last page. */
+ for (m = p; m != NULL && m->pindex < start + pcount; m = p) {
+ p = TAILQ_NEXT(m, listq);
+ vm_page_change_lock(m, mtxp);
+ if (vm_page_held(m) || vm_page_busied(m) ||
+ m->queue != PQ_INACTIVE)
+ continue;
+ if (m->valid == 0)
+ goto free_page;
+ if ((m->aflags & PGA_REFERENCED) != 0)
+ continue;
+ if (object->ref_count != 0) {
+ if (pmap_ts_referenced(m)) {
+ vm_page_aflag_set(m, PGA_REFERENCED);
+ continue;
+ }
+ vm_page_test_dirty(m);
+ if (m->dirty == 0)
+ pmap_remove_all(m);
+ }
+ if (m->dirty) {
+ if ((object->flags & OBJ_DEAD) == 0)
+ vm_page_launder(m);
+ continue;
+ }
+free_page:
+ vm_page_free(m);
+ count++;
+ }
+out:
+ VM_CNT_ADD(v_dfree, count);
+
+ return (count);
+}
+
/*
* vm_pageout_scan does the dirty work for the pageout daemon.
*
@@ -1103,13 +1155,15 @@
static bool
vm_pageout_scan(struct vm_domain *vmd, int pass, int shortage)
{
- vm_page_t m, next;
+ struct vm_batchqueue bq, rq;
+ struct mtx *mtx;
+ vm_page_t m;
struct vm_pagequeue *pq;
vm_object_t object;
long min_scan;
- int act_delta, addl_page_shortage, deficit, inactq_shortage, maxscan;
- int page_shortage, scan_tick, scanned, starting_page_shortage;
- boolean_t queue_locked;
+ int act_delta, addl_page_shortage, deficit, inactq_shortage;
+ int maxscan, page_shortage, scan_tick, scanned, starting_page_shortage;
+ bool obj_locked;
/*
* If we need to reclaim memory ask kernel caches to return
@@ -1157,186 +1211,179 @@
* decisions for the inactive queue, only for the active queue.)
*/
pq = &vmd->vmd_pagequeues[PQ_INACTIVE];
- maxscan = pq->pq_cnt;
vm_pagequeue_lock(pq);
- queue_locked = TRUE;
- for (m = TAILQ_FIRST(&pq->pq_pl);
- m != NULL && maxscan-- > 0 && page_shortage > 0;
- m = next) {
- vm_pagequeue_assert_locked(pq);
- KASSERT(queue_locked, ("unlocked inactive queue"));
- KASSERT(vm_page_inactive(m), ("Inactive queue %p", m));
-
- VM_CNT_INC(v_pdpages);
- next = TAILQ_NEXT(m, plinks.q);
+ TAILQ_INSERT_HEAD(&pq->pq_pl, &vmd->vmd_marker, plinks.q);
+ for (maxscan = pq->pq_cnt; maxscan > 0 && page_shortage > 0 &&
+ TAILQ_NEXT(&vmd->vmd_marker, plinks.q) != NULL;
+ maxscan -= bq.bq_cnt) {
+ vm_pageout_collect_batch(pq, &bq, &vmd->vmd_marker,
+ min(maxscan, page_shortage), true);
+ vm_pagequeue_unlock(pq);
- /*
- * skip marker pages
- */
- if (m->flags & PG_MARKER)
- continue;
+ mtx = NULL;
+ obj_locked = false;
+ object = NULL;
+ vm_batchqueue_init(&rq);
+ VM_BATCHQ_FOREACH(&bq, m) {
+ vm_page_change_lock(m, &mtx);
- KASSERT((m->flags & PG_FICTITIOUS) == 0,
- ("Fictitious page %p cannot be in inactive queue", m));
- KASSERT((m->oflags & VPO_UNMANAGED) == 0,
- ("Unmanaged page %p cannot be in inactive queue", m));
+recheck:
+ /*
+ * The page may have been disassociated from the queue
+ * while locks were dropped.
+ */
+ if (!vm_pageout_page_queued(m, PQ_INACTIVE)) {
+ addl_page_shortage++;
+ continue;
+ }
- /*
- * The page or object lock acquisitions fail if the
- * page was removed from the queue or moved to a
- * different position within the queue. In either
- * case, addl_page_shortage should not be incremented.
- */
- if (!vm_pageout_page_lock(m, &next))
- goto unlock_page;
- else if (m->wire_count != 0) {
/*
- * Wired pages may not be freed, and unwiring a queued
- * page will cause it to be requeued. Thus, remove them
- * from the queue now to avoid unnecessary revisits.
+ * A requeue was requested, so this page gets a second
+ * chance.
*/
- vm_page_dequeue_locked(m);
- addl_page_shortage++;
- goto unlock_page;
- } else if (m->hold_count != 0) {
+ if ((m->aflags & PGA_REQUEUE) != 0)
+ goto reinsert;
+
/*
- * Held pages are essentially stuck in the
- * queue. So, they ought to be discounted
- * from the inactive count. See the
- * calculation of inactq_shortage before the
+ * Held pages are essentially stuck in the queue. So,
+ * they ought to be discounted from the inactive count.
+ * See the calculation of inactq_shortage before the
* loop over the active queue below.
+ *
+ * Wired pages may not be freed. Complete their removal
+ * from the queue now to avoid needless revisits during
+ * future scans.
*/
- addl_page_shortage++;
- goto unlock_page;
- }
- object = m->object;
- if (!VM_OBJECT_TRYWLOCK(object)) {
- if (!vm_pageout_fallback_object_lock(m, &next))
- goto unlock_object;
- else if (m->wire_count != 0) {
- vm_page_dequeue_locked(m);
+ if (m->hold_count != 0) {
addl_page_shortage++;
- goto unlock_object;
- } else if (m->hold_count != 0) {
+ goto reinsert;
+ }
+ if (m->wire_count != 0) {
addl_page_shortage++;
- goto unlock_object;
+ vm_page_dequeue_lazy(m);
+ continue;
}
- }
- if (vm_page_busied(m)) {
- /*
- * Don't mess with busy pages. Leave them at
- * the front of the queue. Most likely, they
- * are being paged out and will leave the
- * queue shortly after the scan finishes. So,
- * they ought to be discounted from the
- * inactive count.
- */
- addl_page_shortage++;
-unlock_object:
- VM_OBJECT_WUNLOCK(object);
-unlock_page:
- vm_page_unlock(m);
- continue;
- }
- KASSERT(!vm_page_held(m), ("Held page %p", m));
- /*
- * Dequeue the inactive page and unlock the inactive page
- * queue, invalidating the 'next' pointer. Dequeueing the
- * page here avoids a later reacquisition (and release) of
- * the inactive page queue lock when vm_page_activate(),
- * vm_page_free(), or vm_page_launder() is called. Use a
- * marker to remember our place in the inactive queue.
- */
- TAILQ_INSERT_AFTER(&pq->pq_pl, m, &vmd->vmd_marker, plinks.q);
- vm_page_dequeue_locked(m);
- vm_pagequeue_unlock(pq);
- queue_locked = FALSE;
+ if (object != m->object) {
+ if (obj_locked) {
+ VM_OBJECT_WUNLOCK(object);
+ obj_locked = false;
+ }
+ object = m->object;
+ }
+ if (!obj_locked) {
+ if (!VM_OBJECT_TRYWLOCK(object)) {
+ mtx_unlock(mtx);
+ VM_OBJECT_WLOCK(object);
+ obj_locked = true;
+ mtx_lock(mtx);
+ goto recheck;
+ } else
+ obj_locked = true;
+ }
- /*
- * Invalid pages can be easily freed. They cannot be
- * mapped, vm_page_free() asserts this.
- */
- if (m->valid == 0)
- goto free_page;
+ if (vm_page_busied(m)) {
+ addl_page_shortage++;
+ goto reinsert;
+ }
- /*
- * If the page has been referenced and the object is not dead,
- * reactivate or requeue the page depending on whether the
- * object is mapped.
- */
- if ((m->aflags & PGA_REFERENCED) != 0) {
- vm_page_aflag_clear(m, PGA_REFERENCED);
- act_delta = 1;
- } else
- act_delta = 0;
- if (object->ref_count != 0) {
- act_delta += pmap_ts_referenced(m);
- } else {
- KASSERT(!pmap_page_is_mapped(m),
- ("vm_pageout_scan: page %p is mapped", m));
- }
- if (act_delta != 0) {
+ /*
+ * Invalid pages can be easily freed. They cannot be
+ * mapped, vm_page_free() asserts this.
+ */
+ if (m->valid == 0)
+ goto free_page;
+
+ /*
+ * If the page has been referenced and the object is not dead,
+ * reactivate or requeue the page depending on whether the
+ * object is mapped.
+ */
+ if ((m->aflags & PGA_REFERENCED) != 0) {
+ vm_page_aflag_clear(m, PGA_REFERENCED);
+ act_delta = 1;
+ } else
+ act_delta = 0;
if (object->ref_count != 0) {
- VM_CNT_INC(v_reactivated);
- vm_page_activate(m);
+ act_delta += pmap_ts_referenced(m);
+ } else {
+ KASSERT(!pmap_page_is_mapped(m),
+ ("vm_pageout_scan: page %p is mapped", m));
+ }
+ if (act_delta != 0) {
+ if (object->ref_count != 0) {
+ VM_CNT_INC(v_reactivated);
+ vm_page_activate(m);
+
+ /*
+ * Increase the activation count if the
+ * page was referenced while in the
+ * inactive queue. This makes it less
+ * likely that the page will be returned
+ * prematurely to the inactive queue.
+ */
+ m->act_count += act_delta + ACT_ADVANCE;
+ continue;
+ } else if ((object->flags & OBJ_DEAD) == 0) {
+ vm_page_aflag_set(m, PGA_REQUEUE);
+ goto reinsert;
+ }
+ }
- /*
- * Increase the activation count if the page
- * was referenced while in the inactive queue.
- * This makes it less likely that the page will
- * be returned prematurely to the inactive
- * queue.
- */
- m->act_count += act_delta + ACT_ADVANCE;
- goto drop_page;
- } else if ((object->flags & OBJ_DEAD) == 0) {
- vm_pagequeue_lock(pq);
- queue_locked = TRUE;
- m->queue = PQ_INACTIVE;
- TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
- vm_pagequeue_cnt_inc(pq);
- goto drop_page;
+ /*
+ * If the page appears to be clean at the
+ * machine-independent layer, then remove all of its
+ * mappings from the pmap in anticipation of freeing it.
+ * If, however, any of the page's mappings allow write
+ * access, then the page may still be modified until the
+ * last of those mappings are removed.
+ */
+ if (object->ref_count != 0) {
+ vm_page_test_dirty(m);
+ if (m->dirty == 0)
+ pmap_remove_all(m);
}
- }
- /*
- * If the page appears to be clean at the machine-independent
- * layer, then remove all of its mappings from the pmap in
- * anticipation of freeing it. If, however, any of the page's
- * mappings allow write access, then the page may still be
- * modified until the last of those mappings are removed.
- */
- if (object->ref_count != 0) {
- vm_page_test_dirty(m);
- if (m->dirty == 0)
- pmap_remove_all(m);
+ /*
+ * Clean pages can be freed, but dirty pages must be
+ * sent back to the laundry, unless they belong to a
+ * dead object. Requeueing dirty pages from dead
+ * objects is pointless, as they are being paged out
+ * and freed by the thread that destroyed the object.
+ */
+ if (m->dirty == 0) {
+free_page:
+ page_shortage -= vm_pageout_free_pages(object,
+ m, &mtx);
+ } else if ((object->flags & OBJ_DEAD) == 0)
+ vm_page_launder(m);
+ continue;
+reinsert:
+ if (!vm_batchqueue_insert(&rq, m))
+ panic("failed to requeue page %p", m);
}
+ if (mtx != NULL)
+ mtx_unlock(mtx);
+ if (obj_locked)
+ VM_OBJECT_WUNLOCK(object);
- /*
- * Clean pages can be freed, but dirty pages must be sent back
- * to the laundry, unless they belong to a dead object.
- * Requeueing dirty pages from dead objects is pointless, as
- * they are being paged out and freed by the thread that
- * destroyed the object.
- */
- if (m->dirty == 0) {
-free_page:
- vm_page_free(m);
- VM_CNT_INC(v_dfree);
- --page_shortage;
- } else if ((object->flags & OBJ_DEAD) == 0)
- vm_page_launder(m);
-drop_page:
- vm_page_unlock(m);
- VM_OBJECT_WUNLOCK(object);
- if (!queue_locked) {
- vm_pagequeue_lock(pq);
- queue_locked = TRUE;
+ vm_pagequeue_lock(pq);
+ VM_BATCHQ_FOREACH(&rq, m) {
+ if (vm_page_inactive(m) &&
+ (m->aflags & PGA_ENQUEUED) == 0) {
+ vm_page_aflag_set(m, PGA_ENQUEUED);
+ if ((m->aflags & PGA_REQUEUE) != 0) {
+ TAILQ_INSERT_TAIL(&pq->pq_pl, m,
+ plinks.q);
+ vm_page_aflag_clear(m, PGA_REQUEUE);
+ } else
+ TAILQ_INSERT_BEFORE(&vmd->vmd_marker, m,
+ plinks.q);
+ vm_pagequeue_cnt_inc(pq);
+ }
}
- next = TAILQ_NEXT(&vmd->vmd_marker, plinks.q);
- TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_marker, plinks.q);
}
+ TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_marker, plinks.q);
vm_pagequeue_unlock(pq);
/*
@@ -1401,7 +1448,6 @@
pq = &vmd->vmd_pagequeues[PQ_ACTIVE];
vm_pagequeue_lock(pq);
- maxscan = pq->pq_cnt;
/*
* If we're just idle polling attempt to visit every
@@ -1422,116 +1468,133 @@
* the per-page activity counter and use it to identify deactivation
* candidates. Held pages may be deactivated.
*/
- for (m = TAILQ_FIRST(&pq->pq_pl), scanned = 0; m != NULL && (scanned <
- min_scan || (inactq_shortage > 0 && scanned < maxscan)); m = next,
- scanned++) {
- KASSERT(m->queue == PQ_ACTIVE,
- ("vm_pageout_scan: page %p isn't active", m));
- next = TAILQ_NEXT(m, plinks.q);
- if ((m->flags & PG_MARKER) != 0)
- continue;
- KASSERT((m->flags & PG_FICTITIOUS) == 0,
- ("Fictitious page %p cannot be in active queue", m));
- KASSERT((m->oflags & VPO_UNMANAGED) == 0,
- ("Unmanaged page %p cannot be in active queue", m));
- if (!vm_pageout_page_lock(m, &next)) {
- vm_page_unlock(m);
- continue;
- }
+ TAILQ_INSERT_HEAD(&pq->pq_pl, &vmd->vmd_marker, plinks.q);
+ for (maxscan = pq->pq_cnt, scanned = 0;
+ TAILQ_NEXT(&vmd->vmd_marker, plinks.q) != NULL &&
+ (scanned < min_scan || (inactq_shortage > 0 && scanned < maxscan));
+ scanned += bq.bq_cnt) {
+ vm_pageout_collect_batch(pq, &bq, &vmd->vmd_marker,
+ (inactq_shortage > 0 ? maxscan : min_scan) - scanned,
+ false);
+ vm_pagequeue_unlock(pq);
- /*
- * The count for page daemon pages is updated after checking
- * the page for eligibility.
- */
- VM_CNT_INC(v_pdpages);
+ mtx = NULL;
+ vm_batchqueue_init(&rq);
+ VM_BATCHQ_FOREACH(&bq, m) {
+ vm_page_change_lock(m, &mtx);
- /*
- * Wired pages are dequeued lazily.
- */
- if (m->wire_count != 0) {
- vm_page_dequeue_locked(m);
- vm_page_unlock(m);
- continue;
- }
+ /*
+ * The page may have been disassociated from the queue
+ * while locks were dropped.
+ */
+ if (!vm_pageout_page_queued(m, PQ_ACTIVE))
+ continue;
- /*
- * Check to see "how much" the page has been used.
- */
- if ((m->aflags & PGA_REFERENCED) != 0) {
- vm_page_aflag_clear(m, PGA_REFERENCED);
- act_delta = 1;
- } else
- act_delta = 0;
+ /*
+ * Wired pages are dequeued lazily.
+ */
+ if (m->wire_count != 0) {
+ vm_page_dequeue_lazy(m);
+ continue;
+ }
- /*
- * Perform an unsynchronized object ref count check. While
- * the page lock ensures that the page is not reallocated to
- * another object, in particular, one with unmanaged mappings
- * that cannot support pmap_ts_referenced(), two races are,
- * nonetheless, possible:
- * 1) The count was transitioning to zero, but we saw a non-
- * zero value. pmap_ts_referenced() will return zero
- * because the page is not mapped.
- * 2) The count was transitioning to one, but we saw zero.
- * This race delays the detection of a new reference. At
- * worst, we will deactivate and reactivate the page.
- */
- if (m->object->ref_count != 0)
- act_delta += pmap_ts_referenced(m);
+ /*
+ * Check to see "how much" the page has been used.
+ */
+ if ((m->aflags & PGA_REFERENCED) != 0) {
+ vm_page_aflag_clear(m, PGA_REFERENCED);
+ act_delta = 1;
+ } else
+ act_delta = 0;
- /*
- * Advance or decay the act_count based on recent usage.
- */
- if (act_delta != 0) {
- m->act_count += ACT_ADVANCE + act_delta;
- if (m->act_count > ACT_MAX)
- m->act_count = ACT_MAX;
- } else
- m->act_count -= min(m->act_count, ACT_DECLINE);
+ /*
+ * Perform an unsynchronized object ref count check.
+ * While the page lock ensures that the page is not
+ * reallocated to another object, in particular, one
+ * with unmanaged mappings that cannot support
+ * pmap_ts_referenced(), two races are, nonetheless,
+ * possible:
+ * 1) The count was transitioning to zero, but we saw a
+ * non-zero value. pmap_ts_referenced() will return
+ * zero because the page is not mapped.
+ * 2) The count was transitioning to one, but we saw
+ * zero. This race delays the detection of a new
+ * reference. At worst, we will deactivate and
+ * reactivate the page.
+ */
+ if (m->object->ref_count != 0)
+ act_delta += pmap_ts_referenced(m);
- /*
- * Move this page to the tail of the active, inactive or laundry
- * queue depending on usage.
- */
- if (m->act_count == 0) {
- /* Dequeue to avoid later lock recursion. */
- vm_page_dequeue_locked(m);
+ /*
+ * Advance or decay the act_count based on recent usage.
+ */
+ if (act_delta != 0) {
+ m->act_count += ACT_ADVANCE + act_delta;
+ if (m->act_count > ACT_MAX)
+ m->act_count = ACT_MAX;
+ } else
+ m->act_count -= min(m->act_count, ACT_DECLINE);
/*
- * When not short for inactive pages, let dirty pages go
- * through the inactive queue before moving to the
- * laundry queues. This gives them some extra time to
- * be reactivated, potentially avoiding an expensive
- * pageout. During a page shortage, the inactive queue
- * is necessarily small, so we may move dirty pages
- * directly to the laundry queue.
+ * Move this page to the tail of the active, inactive or
+ * laundry queue depending on usage.
*/
- if (inactq_shortage <= 0)
- vm_page_deactivate(m);
- else {
+ if (m->act_count == 0) {
/*
- * Calling vm_page_test_dirty() here would
- * require acquisition of the object's write
- * lock. However, during a page shortage,
- * directing dirty pages into the laundry
- * queue is only an optimization and not a
- * requirement. Therefore, we simply rely on
- * the opportunistic updates to the page's
- * dirty field by the pmap.
+ * When not short for inactive pages, let dirty
+ * pages go through the inactive queue before
+ * moving to the laundry queues. This gives
+ * them some extra time to be reactivated,
+ * potentially avoiding an expensive pageout.
+ * During a page shortage, the inactive queue is
+ * necessarily small, so we may move dirty pages
+ * directly to the laundry queue.
*/
- if (m->dirty == 0) {
+ if (inactq_shortage <= 0)
vm_page_deactivate(m);
- inactq_shortage -=
- act_scan_laundry_weight;
- } else {
- vm_page_launder(m);
- inactq_shortage--;
+ else {
+ /*
+ * Calling vm_page_test_dirty() here
+ * would require acquisition of the
+ * object's write lock. However, during
+ * a page shortage, directing dirty
+ * pages into the laundry queue is only
+ * an optimization and not a
+ * requirement. Therefore, we simply
+ * rely on the opportunistic updates to
+ * the page's dirty field by the pmap.
+ */
+ if (m->dirty == 0) {
+ vm_page_deactivate(m);
+ inactq_shortage -=
+ act_scan_laundry_weight;
+ } else {
+ vm_page_launder(m);
+ inactq_shortage--;
+ }
}
+ } else if (!vm_batchqueue_insert(&rq, m))
+ panic("failed to requeue page %p", m);
+ }
+ if (mtx != NULL)
+ mtx_unlock(mtx);
+
+ vm_pagequeue_lock(pq);
+ /*
+ * XXXMJ this step could be avoided if we used a CLOCK scan for
+ * the active queue. This would involve modifying
+ * vm_page_enqueue() to insert after a marker page rather than
+ * at the tail of the queue.
+ */
+ VM_BATCHQ_FOREACH(&rq, m) {
+ if (vm_page_active(m) &&
+ (m->aflags & PGA_ENQUEUED) != 0) {
+ TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
+ TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
}
- } else
- vm_page_requeue_locked(m);
- vm_page_unlock(m);
+ }
}
+ TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_marker, plinks.q);
vm_pagequeue_unlock(pq);
if (pass > 0)
vm_swapout_run_idle();
Index: sys/vm/vm_pagequeue.h
===================================================================
--- sys/vm/vm_pagequeue.h
+++ sys/vm/vm_pagequeue.h
@@ -73,7 +73,23 @@
const char * const pq_name;
} __aligned(CACHE_LINE_SIZE);
+#ifndef VM_BATCHQUEUE_SIZE
+#define VM_BATCHQUEUE_SIZE 7
+#endif
+
+struct vm_batchqueue {
+ vm_page_t bq_pa[VM_BATCHQUEUE_SIZE];
+ int bq_cnt;
+} __aligned(CACHE_LINE_SIZE);
+
+#define VM_BATCHQ_FOREACH(batchqp, page) \
+ for (vm_page_t *__mp = &(batchqp)->bq_pa[0]; \
+ (page) = *__mp, __mp != &(batchqp)->bq_pa[(batchqp)->bq_cnt]; \
+ __mp++)
+
+#include <vm/uma.h>
#include <sys/pidctrl.h>
+
struct sysctl_oid;
/*
@@ -144,6 +160,7 @@
#define vm_pagequeue_assert_locked(pq) mtx_assert(&(pq)->pq_mutex, MA_OWNED)
#define vm_pagequeue_lock(pq) mtx_lock(&(pq)->pq_mutex)
#define vm_pagequeue_lockptr(pq) (&(pq)->pq_mutex)
+#define vm_pagequeue_trylock(pq) mtx_trylock(&(pq)->pq_mutex)
#define vm_pagequeue_unlock(pq) mtx_unlock(&(pq)->pq_mutex)
#define vm_domain_free_assert_locked(n) \
@@ -154,6 +171,8 @@
mtx_lock(vm_domain_free_lockptr((d)))
#define vm_domain_free_lockptr(d) \
(&(d)->vmd_free_mtx)
+#define vm_domain_free_trylock(d) \
+ mtx_trylock(vm_domain_free_lockptr((d)))
#define vm_domain_free_unlock(d) \
mtx_unlock(vm_domain_free_lockptr((d)))
@@ -172,14 +191,30 @@
vm_pagequeue_cnt_add(struct vm_pagequeue *pq, int addend)
{
-#ifdef notyet
vm_pagequeue_assert_locked(pq);
-#endif
pq->pq_cnt += addend;
}
#define vm_pagequeue_cnt_inc(pq) vm_pagequeue_cnt_add((pq), 1)
#define vm_pagequeue_cnt_dec(pq) vm_pagequeue_cnt_add((pq), -1)
+static inline void
+vm_batchqueue_init(struct vm_batchqueue *bq)
+{
+
+ bq->bq_cnt = 0;
+}
+
+static inline bool
+vm_batchqueue_insert(struct vm_batchqueue *bq, vm_page_t m)
+{
+
+ if (bq->bq_cnt < nitems(bq->bq_pa)) {
+ bq->bq_pa[bq->bq_cnt++] = m;
+ return (true);
+ }
+ return (false);
+}
+
void vm_domain_set(struct vm_domain *vmd);
void vm_domain_clear(struct vm_domain *vmd);
int vm_domain_allocate(struct vm_domain *vmd, int req, int npages);
Index: sys/vm/vm_phys.c
===================================================================
--- sys/vm/vm_phys.c
+++ sys/vm/vm_phys.c
@@ -354,9 +354,9 @@
m->order = order;
if (tail)
- TAILQ_INSERT_TAIL(&fl[order].pl, m, plinks.q);
+ TAILQ_INSERT_TAIL(&fl[order].pl, m, listq);
else
- TAILQ_INSERT_HEAD(&fl[order].pl, m, plinks.q);
+ TAILQ_INSERT_HEAD(&fl[order].pl, m, listq);
fl[order].lcnt++;
}
@@ -364,7 +364,7 @@
vm_freelist_rem(struct vm_freelist *fl, vm_page_t m, int order)
{
- TAILQ_REMOVE(&fl[order].pl, m, plinks.q);
+ TAILQ_REMOVE(&fl[order].pl, m, listq);
fl[order].lcnt--;
m->order = VM_NFREEORDER;
}
@@ -1196,7 +1196,7 @@
oind++) {
for (pind = 0; pind < VM_NFREEPOOL; pind++) {
fl = (*seg->free_queues)[pind];
- TAILQ_FOREACH(m_ret, &fl[oind].pl, plinks.q) {
+ TAILQ_FOREACH(m_ret, &fl[oind].pl, listq) {
/*
* Is the size of this allocation request
* larger than the largest block size?
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Fri, Apr 10, 11:35 PM (21 h, 38 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
31253847
Default Alt Text
D14893.id40876.diff (70 KB)
Attached To
Mode
D14893: VM page queue batching
Attached
Detach File
Event Timeline
Log In to Comment