Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F103800596
D14893.id41728.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
63 KB
Referenced Files
None
Subscribers
None
D14893.id41728.diff
View Options
Index: sys/amd64/include/vmparam.h
===================================================================
--- sys/amd64/include/vmparam.h
+++ sys/amd64/include/vmparam.h
@@ -227,4 +227,10 @@
#define ZERO_REGION_SIZE (2 * 1024 * 1024) /* 2MB */
+/*
+ * Use a fairly large batch size since we expect amd64 systems to have lots of
+ * memory.
+ */
+#define VM_BATCHQUEUE_SIZE 31
+
#endif /* _MACHINE_VMPARAM_H_ */
Index: sys/kern/subr_witness.c
===================================================================
--- sys/kern/subr_witness.c
+++ sys/kern/subr_witness.c
@@ -603,7 +603,6 @@
* CDEV
*/
{ "vm map (system)", &lock_class_mtx_sleep },
- { "vm pagequeue", &lock_class_mtx_sleep },
{ "vnode interlock", &lock_class_mtx_sleep },
{ "cdev", &lock_class_mtx_sleep },
{ NULL, NULL },
@@ -613,11 +612,11 @@
{ "vm map (user)", &lock_class_sx },
{ "vm object", &lock_class_rw },
{ "vm page", &lock_class_mtx_sleep },
- { "vm pagequeue", &lock_class_mtx_sleep },
{ "pmap pv global", &lock_class_rw },
{ "pmap", &lock_class_mtx_sleep },
{ "pmap pv list", &lock_class_rw },
{ "vm page free queue", &lock_class_mtx_sleep },
+ { "vm pagequeue", &lock_class_mtx_sleep },
{ NULL, NULL },
/*
* kqueue/VFS interaction
Index: sys/vm/vm_object.c
===================================================================
--- sys/vm/vm_object.c
+++ sys/vm/vm_object.c
@@ -720,14 +720,11 @@
vm_object_terminate_pages(vm_object_t object)
{
vm_page_t p, p_next;
- struct mtx *mtx, *mtx1;
- struct vm_pagequeue *pq, *pq1;
- int dequeued;
+ struct mtx *mtx;
VM_OBJECT_ASSERT_WLOCKED(object);
mtx = NULL;
- pq = NULL;
/*
* Free any remaining pageable pages. This also removes them from the
@@ -737,60 +734,21 @@
*/
TAILQ_FOREACH_SAFE(p, &object->memq, listq, p_next) {
vm_page_assert_unbusied(p);
- if ((object->flags & OBJ_UNMANAGED) == 0) {
+ if ((object->flags & OBJ_UNMANAGED) == 0)
/*
* vm_page_free_prep() only needs the page
* lock for managed pages.
*/
- mtx1 = vm_page_lockptr(p);
- if (mtx1 != mtx) {
- if (mtx != NULL)
- mtx_unlock(mtx);
- if (pq != NULL) {
- vm_pagequeue_cnt_add(pq, dequeued);
- vm_pagequeue_unlock(pq);
- pq = NULL;
- }
- mtx = mtx1;
- mtx_lock(mtx);
- }
- }
+ vm_page_change_lock(p, &mtx);
p->object = NULL;
if (p->wire_count != 0)
- goto unlist;
- VM_CNT_INC(v_pfree);
- p->flags &= ~PG_ZERO;
- if (p->queue != PQ_NONE) {
- KASSERT(p->queue < PQ_COUNT, ("vm_object_terminate: "
- "page %p is not queued", p));
- pq1 = vm_page_pagequeue(p);
- if (pq != pq1) {
- if (pq != NULL) {
- vm_pagequeue_cnt_add(pq, dequeued);
- vm_pagequeue_unlock(pq);
- }
- pq = pq1;
- vm_pagequeue_lock(pq);
- dequeued = 0;
- }
- p->queue = PQ_NONE;
- TAILQ_REMOVE(&pq->pq_pl, p, plinks.q);
- dequeued--;
- }
- if (vm_page_free_prep(p, true))
continue;
-unlist:
- TAILQ_REMOVE(&object->memq, p, listq);
- }
- if (pq != NULL) {
- vm_pagequeue_cnt_add(pq, dequeued);
- vm_pagequeue_unlock(pq);
+ VM_CNT_INC(v_pfree);
+ vm_page_free(p);
}
if (mtx != NULL)
mtx_unlock(mtx);
- vm_page_free_phys_pglist(&object->memq);
-
/*
* If the object contained any pages, then reset it to an empty state.
* None of the object's fields, including "resident_page_count", were
@@ -1973,7 +1931,6 @@
{
vm_page_t p, next;
struct mtx *mtx;
- struct pglist pgl;
VM_OBJECT_ASSERT_WLOCKED(object);
KASSERT((object->flags & OBJ_UNMANAGED) == 0 ||
@@ -1982,7 +1939,6 @@
if (object->resident_page_count == 0)
return;
vm_object_pip_add(object, 1);
- TAILQ_INIT(&pgl);
again:
p = vm_page_find_least(object, start);
mtx = NULL;
@@ -2036,13 +1992,10 @@
}
if ((options & OBJPR_NOTMAPPED) == 0 && object->ref_count != 0)
pmap_remove_all(p);
- p->flags &= ~PG_ZERO;
- if (vm_page_free_prep(p, false))
- TAILQ_INSERT_TAIL(&pgl, p, listq);
+ vm_page_free(p);
}
if (mtx != NULL)
mtx_unlock(mtx);
- vm_page_free_phys_pglist(&pgl);
vm_object_pip_wakeup(object);
}
Index: sys/vm/vm_page.h
===================================================================
--- sys/vm/vm_page.h
+++ sys/vm/vm_page.h
@@ -93,8 +93,11 @@
*
* In general, operations on this structure's mutable fields are
* synchronized using either one of or a combination of the lock on the
- * object that the page belongs to (O), the pool lock for the page (P),
- * or the lock for either the free or paging queue (Q). If a field is
+ * object that the page belongs to (O), the page lock (P),
+ * the per-domain lock for the free queues (F), or the page's queue
+ * lock (Q). The physical address of a page is used to select its page
+ * lock from a pool. The queue lock for a page depends on the value of
+ * its queue field and described in detail below. If a field is
* annotated below with two of these locks, then holding either lock is
* sufficient for read access, but both locks are required for write
* access. An annotation of (C) indicates that the field is immutable.
@@ -143,6 +146,29 @@
* causing the thread to block. vm_page_sleep_if_busy() can be used to
* sleep until the page's busy state changes, after which the caller
* must re-lookup the page and re-evaluate its state.
+ *
+ * The queue field is the index of the page queue containing the
+ * page, or PQ_NONE if the page is not enqueued. The queue lock of a
+ * page is the page queue lock corresponding to the page queue index,
+ * or the page lock (P) for the page if it is not enqueued. To modify
+ * the queue field, the queue lock for the old value of the field must
+ * be held. It is invalid for a page's queue field to transition
+ * between two distinct page queue indices. That is, when updating
+ * the queue field, either the new value or the old value must be
+ * PQ_NONE.
+ *
+ * To avoid contention on page queue locks, page queue operations
+ * (enqueue, dequeue, requeue) are batched using per-CPU queues.
+ * A deferred operation is requested by inserting an entry into a
+ * batch queue; the entry is simply a pointer to the page, and the
+ * request type is encoded in the page's aflags field using the values
+ * in PGA_QUEUE_STATE_MASK. The type-stability of struct vm_pages is
+ * crucial to this scheme since the processing of entries in a given
+ * batch queue may be deferred indefinitely. In particular, a page
+ * may be freed before its pending batch queue entries have been
+ * processed. The page lock (P) must be held to schedule a batched
+ * queue operation, and the page queue lock must be held in order to
+ * process batch queue entries for the page queue.
*/
#if PAGE_SIZE == 4096
@@ -174,7 +200,7 @@
TAILQ_ENTRY(vm_page) listq; /* pages in same object (O) */
vm_object_t object; /* which object am I in (O,P) */
vm_pindex_t pindex; /* offset into object (O,P) */
- vm_paddr_t phys_addr; /* physical address of page */
+ vm_paddr_t phys_addr; /* physical address of page (C) */
struct md_page md; /* machine dependent stuff */
u_int wire_count; /* wired down maps refs (P) */
volatile u_int busy_lock; /* busy owners lock */
@@ -182,11 +208,11 @@
uint16_t flags; /* page PG_* flags (P) */
uint8_t aflags; /* access is atomic */
uint8_t oflags; /* page VPO_* flags (O) */
- uint8_t queue; /* page queue index (P,Q) */
+ uint8_t queue; /* page queue index (Q) */
int8_t psind; /* pagesizes[] index (O) */
int8_t segind; /* vm_phys segment index (C) */
- uint8_t order; /* index of the buddy queue */
- uint8_t pool; /* vm_phys freepool index (Q) */
+ uint8_t order; /* index of the buddy queue (F) */
+ uint8_t pool; /* vm_phys freepool index (F) */
u_char act_count; /* page usage count (P) */
/* NOTE that these must support one bit per DEV_BSIZE in a page */
/* so, on normal X86 kernels, they must be at least 8 bits wide */
@@ -314,10 +340,38 @@
*
* PGA_EXECUTABLE may be set by pmap routines, and indicates that a page has
* at least one executable mapping. It is not consumed by the MI VM layer.
+ *
+ * PGA_ENQUEUED is set and cleared when a page is inserted into or removed
+ * from a page queue, respectively. It determines whether the plinks.q field
+ * of the page is valid. To set or clear this flag, the queue lock for the
+ * page must be held: the page queue lock corresponding to the page's "queue"
+ * field if its value is not PQ_NONE, and the page lock otherwise.
+ *
+ * PGA_DEQUEUE is set when the page is scheduled to be dequeued from a page
+ * queue, and cleared when the dequeue request is processed. A page may
+ * have PGA_DEQUEUE set and PGA_ENQUEUED cleared, for instance if a dequeue
+ * is requested after the page is scheduled to be enqueued but before it is
+ * actually inserted into the page queue. The page lock must be held to set
+ * this flag, and the queue lock for the page must be held to clear it.
+ *
+ * PGA_REQUEUE is set when the page is scheduled to be enqueued or requeued
+ * in its page queue. The page lock must be held to set this flag, and the
+ * queue lock for the page must be held to clear it.
+ *
+ * PGA_REQUEUE_HEAD is a special flag for enqueuing pages near the head of
+ * the inactive queue, thus bypassing LRU. The page lock must be held to
+ * set this flag, and the queue lock for the page must be held to clear it.
*/
#define PGA_WRITEABLE 0x01 /* page may be mapped writeable */
#define PGA_REFERENCED 0x02 /* page has been referenced */
#define PGA_EXECUTABLE 0x04 /* page may be mapped executable */
+#define PGA_ENQUEUED 0x08 /* page is enqueued in a page queue */
+#define PGA_DEQUEUE 0x10 /* page is due to be dequeued */
+#define PGA_REQUEUE 0x20 /* page is due to be requeued */
+#define PGA_REQUEUE_HEAD 0x40 /* page requeue should bypass LRU */
+
+#define PGA_QUEUE_STATE_MASK (PGA_ENQUEUED | PGA_DEQUEUE | PGA_REQUEUE | \
+ PGA_REQUEUE_HEAD)
/*
* Page flags. If changed at any other time than page allocation or
@@ -484,13 +538,13 @@
void vm_page_deactivate(vm_page_t);
void vm_page_deactivate_noreuse(vm_page_t);
void vm_page_dequeue(vm_page_t m);
+void vm_page_dequeue_deferred(vm_page_t m);
void vm_page_dequeue_locked(vm_page_t m);
+void vm_page_drain_pqbatch(void);
vm_page_t vm_page_find_least(vm_object_t, vm_pindex_t);
-void vm_page_free_phys_pglist(struct pglist *tq);
-bool vm_page_free_prep(vm_page_t m, bool pagequeue_locked);
+bool vm_page_free_prep(vm_page_t m);
vm_page_t vm_page_getfake(vm_paddr_t paddr, vm_memattr_t memattr);
void vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr);
-void vm_page_init_marker(vm_page_t m, int queue);
int vm_page_insert (vm_page_t, vm_object_t, vm_pindex_t);
void vm_page_launder(vm_page_t m);
vm_page_t vm_page_lookup (vm_object_t, vm_pindex_t);
@@ -752,6 +806,24 @@
return (m->queue == PQ_LAUNDRY || m->queue == PQ_UNSWAPPABLE);
}
+/*
+ * vm_page_enqueued:
+ *
+ * Return true if the page is logically enqueued and no deferred
+ * dequeue is pending.
+ */
+static inline bool
+vm_page_enqueued(vm_page_t m)
+{
+
+ vm_page_assert_locked(m);
+
+ if ((m->aflags & PGA_DEQUEUE) != 0)
+ return (false);
+ atomic_thread_fence_acq();
+ return (m->queue != PQ_NONE);
+}
+
/*
* vm_page_held:
*
Index: sys/vm/vm_page.c
===================================================================
--- sys/vm/vm_page.c
+++ sys/vm/vm_page.c
@@ -102,6 +102,7 @@
#include <sys/proc.h>
#include <sys/rwlock.h>
#include <sys/sbuf.h>
+#include <sys/sched.h>
#include <sys/smp.h>
#include <sys/sysctl.h>
#include <sys/vmmeter.h>
@@ -131,13 +132,10 @@
extern void uma_startup(void *, int);
extern int vmem_startup_count(void);
-/*
- * Associated with page of user-allocatable memory is a
- * page structure.
- */
-
struct vm_domain vm_dom[MAXMEMDOM];
+static DPCPU_DEFINE(struct vm_batchqueue, pqbatch[MAXMEMDOM][PQ_COUNT]);
+
struct mtx_padalign __exclusive_cache_line pa_lock[PA_LOCK_COUNT];
struct mtx_padalign __exclusive_cache_line vm_domainset_lock;
@@ -176,7 +174,8 @@
static void vm_page_alloc_check(vm_page_t m);
static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits);
-static void vm_page_enqueue(uint8_t queue, vm_page_t m);
+static void vm_page_dequeue_complete(vm_page_t m);
+static void vm_page_enqueue(vm_page_t m, uint8_t queue);
static void vm_page_init(void *dummy);
static int vm_page_insert_after(vm_page_t m, vm_object_t object,
vm_pindex_t pindex, vm_page_t mpred);
@@ -443,12 +442,13 @@
* Nonetheless, it write busies and initializes the hold count to one as
* safety precautions.
*/
-void
-vm_page_init_marker(vm_page_t marker, int queue)
+static void
+vm_page_init_marker(vm_page_t marker, int queue, uint8_t aflags)
{
bzero(marker, sizeof(*marker));
marker->flags = PG_MARKER;
+ marker->aflags = aflags;
marker->busy_lock = VPB_SINGLE_EXCLUSIVER;
marker->queue = queue;
marker->hold_count = 1;
@@ -481,14 +481,32 @@
TAILQ_INIT(&pq->pq_pl);
mtx_init(&pq->pq_mutex, pq->pq_name, "vm pagequeue",
MTX_DEF | MTX_DUPOK);
- vm_page_init_marker(&vmd->vmd_markers[i], i);
+ vm_page_init_marker(&vmd->vmd_markers[i], i, 0);
}
mtx_init(&vmd->vmd_free_mtx, "vm page free queue", NULL, MTX_DEF);
mtx_init(&vmd->vmd_pageout_mtx, "vm pageout lock", NULL, MTX_DEF);
- vm_page_init_marker(&vmd->vmd_inacthead, PQ_INACTIVE);
+ snprintf(vmd->vmd_name, sizeof(vmd->vmd_name), "%d", domain);
+
+ /*
+ * inacthead is used to provide FIFO ordering for LRU-bypassing
+ * insertions.
+ */
+ vm_page_init_marker(&vmd->vmd_inacthead, PQ_INACTIVE, PGA_ENQUEUED);
TAILQ_INSERT_HEAD(&vmd->vmd_pagequeues[PQ_INACTIVE].pq_pl,
&vmd->vmd_inacthead, plinks.q);
- snprintf(vmd->vmd_name, sizeof(vmd->vmd_name), "%d", domain);
+
+ /*
+ * The clock pages are used to implement active queue scanning without
+ * requeues. Scans start at clock[0], which is advanced after the scan
+ * ends. When the two clock hands meet, they are reset and scanning
+ * resumes from the head of the queue.
+ */
+ vm_page_init_marker(&vmd->vmd_clock[0], PQ_ACTIVE, PGA_ENQUEUED);
+ vm_page_init_marker(&vmd->vmd_clock[1], PQ_ACTIVE, PGA_ENQUEUED);
+ TAILQ_INSERT_HEAD(&vmd->vmd_pagequeues[PQ_ACTIVE].pq_pl,
+ &vmd->vmd_clock[0], plinks.q);
+ TAILQ_INSERT_TAIL(&vmd->vmd_pagequeues[PQ_ACTIVE].pq_pl,
+ &vmd->vmd_clock[1], plinks.q);
}
/*
@@ -1847,6 +1865,7 @@
KASSERT(m != NULL, ("missing page"));
found:
+ vm_page_dequeue(m);
vm_page_alloc_check(m);
/*
@@ -2043,8 +2062,10 @@
#if VM_NRESERVLEVEL > 0
found:
#endif
- for (m = m_ret; m < &m_ret[npages]; m++)
+ for (m = m_ret; m < &m_ret[npages]; m++) {
+ vm_page_dequeue(m);
vm_page_alloc_check(m);
+ }
/*
* Initialize the pages. Only the PG_ZERO flag is inherited.
@@ -2188,6 +2209,7 @@
goto again;
return (NULL);
}
+ vm_page_dequeue(m);
vm_page_alloc_check(m);
/*
@@ -2381,7 +2403,7 @@
vm_reserv_size(level)) - pa);
#endif
} else if (object->memattr == VM_MEMATTR_DEFAULT &&
- m->queue != PQ_NONE && !vm_page_busied(m)) {
+ vm_page_enqueued(m) && !vm_page_busied(m)) {
/*
* The page is allocated but eligible for
* relocation. Extend the current run by one
@@ -2532,7 +2554,7 @@
error = EINVAL;
else if (object->memattr != VM_MEMATTR_DEFAULT)
error = EINVAL;
- else if (m->queue != PQ_NONE && !vm_page_busied(m)) {
+ else if (vm_page_enqueued(m) && !vm_page_busied(m)) {
KASSERT(pmap_page_get_memattr(m) ==
VM_MEMATTR_DEFAULT,
("page %p has an unexpected memattr", m));
@@ -2592,7 +2614,8 @@
*/
if (object->ref_count != 0)
pmap_remove_all(m);
- m_new->aflags = m->aflags;
+ m_new->aflags = m->aflags &
+ ~PGA_QUEUE_STATE_MASK;
KASSERT(m_new->oflags == VPO_UNMANAGED,
("page %p is managed", m_new));
m_new->oflags = m->oflags & VPO_NOSYNC;
@@ -2604,7 +2627,7 @@
vm_page_remque(m);
vm_page_replace_checked(m_new, object,
m->pindex, m);
- if (vm_page_free_prep(m, false))
+ if (vm_page_free_prep(m))
SLIST_INSERT_HEAD(&free, m,
plinks.s.ss);
@@ -2618,7 +2641,7 @@
m->flags &= ~PG_ZERO;
vm_page_remque(m);
vm_page_remove(m);
- if (vm_page_free_prep(m, false))
+ if (vm_page_free_prep(m))
SLIST_INSERT_HEAD(&free, m,
plinks.s.ss);
KASSERT(m->dirty == 0,
@@ -3061,113 +3084,297 @@
return (&vm_pagequeue_domain(m)->vmd_pagequeues[m->queue]);
}
+static struct mtx *
+vm_page_pagequeue_lockptr(vm_page_t m)
+{
+
+ if (m->queue == PQ_NONE)
+ return (NULL);
+ return (&vm_page_pagequeue(m)->pq_mutex);
+}
+
+static inline void
+vm_pqbatch_process_page(struct vm_pagequeue *pq, vm_page_t m)
+{
+ struct vm_domain *vmd;
+ uint8_t aflags;
+
+ vm_pagequeue_assert_locked(pq);
+ KASSERT(pq == vm_page_pagequeue(m),
+ ("page %p doesn't belong to %p", m, pq));
+
+ aflags = m->aflags;
+ if ((aflags & PGA_DEQUEUE) != 0) {
+ if (__predict_true((aflags & PGA_ENQUEUED) != 0)) {
+ TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
+ vm_pagequeue_cnt_dec(pq);
+ }
+ vm_page_dequeue_complete(m);
+ } else if ((aflags & (PGA_REQUEUE | PGA_REQUEUE_HEAD)) != 0) {
+ if ((aflags & PGA_ENQUEUED) != 0)
+ TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
+ else {
+ vm_pagequeue_cnt_inc(pq);
+ vm_page_aflag_set(m, PGA_ENQUEUED);
+ }
+ if ((aflags & PGA_REQUEUE_HEAD) != 0) {
+ KASSERT(m->queue == PQ_INACTIVE,
+ ("head enqueue not supported for page %p", m));
+ vmd = vm_pagequeue_domain(m);
+ TAILQ_INSERT_BEFORE(&vmd->vmd_inacthead, m, plinks.q);
+ } else
+ TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
+
+ /*
+ * PGA_REQUEUE and PGA_REQUEUE_HEAD must be cleared after
+ * setting PGA_ENQUEUED in order to synchronize with the
+ * page daemon.
+ */
+ vm_page_aflag_clear(m, PGA_REQUEUE | PGA_REQUEUE_HEAD);
+ }
+}
+
+static void
+vm_pqbatch_process(struct vm_pagequeue *pq, struct vm_batchqueue *bq,
+ uint8_t queue)
+{
+ vm_page_t m;
+ int i;
+
+ for (i = 0; i < bq->bq_cnt; i++) {
+ m = bq->bq_pa[i];
+ if (__predict_false(m->queue != queue))
+ continue;
+ vm_pqbatch_process_page(pq, m);
+ }
+ vm_batchqueue_init(bq);
+}
+
+static void
+vm_pqbatch_submit_page(vm_page_t m, uint8_t queue)
+{
+ struct vm_batchqueue *bq;
+ struct vm_pagequeue *pq;
+ int domain;
+
+ vm_page_assert_locked(m);
+ KASSERT(queue < PQ_COUNT, ("invalid queue %d", queue));
+
+ domain = vm_phys_domain(m);
+ pq = &vm_pagequeue_domain(m)->vmd_pagequeues[queue];
+
+ critical_enter();
+ bq = DPCPU_PTR(pqbatch[domain][queue]);
+ if (vm_batchqueue_insert(bq, m)) {
+ critical_exit();
+ return;
+ }
+ if (!vm_pagequeue_trylock(pq)) {
+ critical_exit();
+ vm_pagequeue_lock(pq);
+ critical_enter();
+ bq = DPCPU_PTR(pqbatch[domain][queue]);
+ }
+ vm_pqbatch_process(pq, bq, queue);
+
+ /*
+ * The page may have been logically dequeued before we acquired the
+ * page queue lock. In this case, the page lock prevents the page
+ * from being logically enqueued elsewhere.
+ */
+ if (__predict_true(m->queue == queue))
+ vm_pqbatch_process_page(pq, m);
+ else {
+ KASSERT(m->queue == PQ_NONE,
+ ("invalid queue transition for page %p", m));
+ KASSERT((m->aflags & PGA_ENQUEUED) == 0,
+ ("page %p is enqueued with invalid queue index", m));
+ vm_page_aflag_clear(m, PGA_QUEUE_STATE_MASK);
+ }
+ vm_pagequeue_unlock(pq);
+ critical_exit();
+}
+
/*
- * vm_page_dequeue:
+ * vm_page_drain_pqbatch: [ internal use only ]
+ *
+ * Force all per-CPU page queue batch queues to be drained. This is
+ * intended for use in severe memory shortages, to ensure that pages
+ * do not remain stuck in the batch queues.
+ */
+void
+vm_page_drain_pqbatch(void)
+{
+ struct thread *td;
+ struct vm_domain *vmd;
+ struct vm_pagequeue *pq;
+ int cpu, domain, queue;
+
+ td = curthread;
+ CPU_FOREACH(cpu) {
+ thread_lock(td);
+ sched_bind(td, cpu);
+ thread_unlock(td);
+
+ for (domain = 0; domain < vm_ndomains; domain++) {
+ vmd = VM_DOMAIN(domain);
+ for (queue = 0; queue < PQ_COUNT; queue++) {
+ pq = &vmd->vmd_pagequeues[queue];
+ vm_pagequeue_lock(pq);
+ critical_enter();
+ vm_pqbatch_process(pq,
+ DPCPU_PTR(pqbatch[domain][queue]), queue);
+ critical_exit();
+ vm_pagequeue_unlock(pq);
+ }
+ }
+ }
+ thread_lock(td);
+ sched_unbind(td);
+ thread_unlock(td);
+}
+
+/*
+ * Complete the logical removal of a page from a page queue. We must be
+ * careful to synchronize with the page daemon, which may be concurrently
+ * examining the page with only the page lock held. The page must not be
+ * in a state where it appears to be logically enqueued.
+ */
+static void
+vm_page_dequeue_complete(vm_page_t m)
+{
+
+ m->queue = PQ_NONE;
+ atomic_thread_fence_rel();
+ vm_page_aflag_clear(m, PGA_QUEUE_STATE_MASK);
+}
+
+/*
+ * vm_page_dequeue_deferred: [ internal use only ]
*
- * Remove the given page from its current page queue.
+ * Request removal of the given page from its current page
+ * queue. Physical removal from the queue may be deferred
+ * indefinitely.
*
* The page must be locked.
*/
void
-vm_page_dequeue(vm_page_t m)
+vm_page_dequeue_deferred(vm_page_t m)
{
- struct vm_pagequeue *pq;
+ int queue;
vm_page_assert_locked(m);
- KASSERT(m->queue < PQ_COUNT, ("vm_page_dequeue: page %p is not queued",
- m));
- pq = vm_page_pagequeue(m);
- vm_pagequeue_lock(pq);
- m->queue = PQ_NONE;
- TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
- vm_pagequeue_cnt_dec(pq);
- vm_pagequeue_unlock(pq);
+
+ queue = m->queue;
+ if (queue == PQ_NONE) {
+ KASSERT((m->aflags & PGA_QUEUE_STATE_MASK) == 0,
+ ("page %p has queue state", m));
+ return;
+ }
+ if ((m->aflags & PGA_DEQUEUE) == 0)
+ vm_page_aflag_set(m, PGA_DEQUEUE);
+ vm_pqbatch_submit_page(m, queue);
}
/*
* vm_page_dequeue_locked:
*
- * Remove the given page from its current page queue.
+ * Remove the page from its page queue, which must be locked.
+ * If the page lock is not held, there is no guarantee that the
+ * page will not be enqueued by another thread before this function
+ * returns. In this case, it is up to the caller to ensure that
+ * no other threads hold a reference to the page.
*
- * The page and page queue must be locked.
+ * The page queue lock must be held. If the page is not already
+ * logically dequeued, the page lock must be held as well.
*/
void
vm_page_dequeue_locked(vm_page_t m)
{
struct vm_pagequeue *pq;
- vm_page_lock_assert(m, MA_OWNED);
pq = vm_page_pagequeue(m);
+
+ KASSERT(m->queue != PQ_NONE,
+ ("%s: page %p queue field is PQ_NONE", __func__, m));
vm_pagequeue_assert_locked(pq);
- m->queue = PQ_NONE;
- TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
- vm_pagequeue_cnt_dec(pq);
+ KASSERT((m->aflags & PGA_DEQUEUE) != 0 ||
+ mtx_owned(vm_page_lockptr(m)),
+ ("%s: queued unlocked page %p", __func__, m));
+
+ if ((m->aflags & PGA_ENQUEUED) != 0) {
+ TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
+ vm_pagequeue_cnt_dec(pq);
+ }
+ vm_page_dequeue_complete(m);
}
/*
- * vm_page_enqueue:
- *
- * Add the given page to the specified page queue.
+ * vm_page_dequeue:
*
- * The page must be locked.
+ * Remove the page from whichever page queue it's in, if any.
+ * If the page lock is not held, there is no guarantee that the
+ * page will not be enqueued by another thread before this function
+ * returns. In this case, it is up to the caller to ensure that
+ * no other threads hold a reference to the page.
*/
-static void
-vm_page_enqueue(uint8_t queue, vm_page_t m)
+void
+vm_page_dequeue(vm_page_t m)
{
- struct vm_pagequeue *pq;
+ struct mtx *lock, *lock1;
- vm_page_lock_assert(m, MA_OWNED);
- KASSERT(queue < PQ_COUNT,
- ("vm_page_enqueue: invalid queue %u request for page %p",
- queue, m));
- pq = &vm_pagequeue_domain(m)->vmd_pagequeues[queue];
- vm_pagequeue_lock(pq);
- m->queue = queue;
- TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
- vm_pagequeue_cnt_inc(pq);
- vm_pagequeue_unlock(pq);
+ lock = vm_page_pagequeue_lockptr(m);
+ for (;;) {
+ if (lock == NULL)
+ return;
+ mtx_lock(lock);
+ if ((lock1 = vm_page_pagequeue_lockptr(m)) == lock)
+ break;
+ mtx_unlock(lock);
+ lock = lock1;
+ }
+ KASSERT(lock == vm_page_pagequeue_lockptr(m),
+ ("%s: page %p migrated directly between queues", __func__, m));
+ vm_page_dequeue_locked(m);
+ mtx_unlock(lock);
}
/*
- * vm_page_requeue:
- *
- * Move the given page to the tail of its current page queue.
- *
- * The page must be locked.
+ * Schedule the given page for insertion into the specified page queue.
+ * Physical insertion of the page may be deferred indefinitely.
*/
-void
-vm_page_requeue(vm_page_t m)
+static void
+vm_page_enqueue(vm_page_t m, uint8_t queue)
{
- struct vm_pagequeue *pq;
- vm_page_lock_assert(m, MA_OWNED);
- KASSERT(m->queue != PQ_NONE,
- ("vm_page_requeue: page %p is not queued", m));
- pq = vm_page_pagequeue(m);
- vm_pagequeue_lock(pq);
- TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
- TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
- vm_pagequeue_unlock(pq);
+ vm_page_assert_locked(m);
+ KASSERT(m->queue == PQ_NONE && (m->aflags & PGA_QUEUE_STATE_MASK) == 0,
+ ("%s: page %p is already enqueued", __func__, m));
+
+ m->queue = queue;
+ if ((m->aflags & PGA_REQUEUE) == 0)
+ vm_page_aflag_set(m, PGA_REQUEUE);
+ vm_pqbatch_submit_page(m, queue);
}
/*
- * vm_page_requeue_locked:
+ * vm_page_requeue: [ internal use only ]
*
- * Move the given page to the tail of its current page queue.
+ * Schedule a requeue of the given page.
*
- * The page queue must be locked.
+ * The page must be locked.
*/
void
-vm_page_requeue_locked(vm_page_t m)
+vm_page_requeue(vm_page_t m)
{
- struct vm_pagequeue *pq;
+ vm_page_assert_locked(m);
KASSERT(m->queue != PQ_NONE,
- ("vm_page_requeue_locked: page %p is not queued", m));
- pq = vm_page_pagequeue(m);
- vm_pagequeue_assert_locked(pq);
- TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
- TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
+ ("%s: page %p is not logically enqueued", __func__, m));
+
+ if ((m->aflags & PGA_REQUEUE) == 0)
+ vm_page_aflag_set(m, PGA_REQUEUE);
+ vm_pqbatch_submit_page(m, m->queue);
}
/*
@@ -3185,18 +3392,18 @@
int queue;
vm_page_lock_assert(m, MA_OWNED);
- if ((queue = m->queue) != PQ_ACTIVE) {
- if (m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0) {
- if (m->act_count < ACT_INIT)
- m->act_count = ACT_INIT;
- if (queue != PQ_NONE)
- vm_page_dequeue(m);
- vm_page_enqueue(PQ_ACTIVE, m);
- }
- } else {
- if (m->act_count < ACT_INIT)
+
+ if ((queue = m->queue) == PQ_ACTIVE || m->wire_count > 0 ||
+ (m->oflags & VPO_UNMANAGED) != 0) {
+ if (queue == PQ_ACTIVE && m->act_count < ACT_INIT)
m->act_count = ACT_INIT;
+ return;
}
+
+ vm_page_remque(m);
+ if (m->act_count < ACT_INIT)
+ m->act_count = ACT_INIT;
+ vm_page_enqueue(m, PQ_ACTIVE);
}
/*
@@ -3207,11 +3414,10 @@
* the page to the free list only if this function returns true.
*
* The object must be locked. The page must be locked if it is
- * managed. For a queued managed page, the pagequeue_locked
- * argument specifies whether the page queue is already locked.
+ * managed.
*/
bool
-vm_page_free_prep(vm_page_t m, bool pagequeue_locked)
+vm_page_free_prep(vm_page_t m)
{
#if defined(DIAGNOSTIC) && defined(PHYS_TO_DMAP)
@@ -3227,14 +3433,14 @@
if ((m->oflags & VPO_UNMANAGED) == 0) {
vm_page_lock_assert(m, MA_OWNED);
KASSERT(!pmap_page_is_mapped(m),
- ("vm_page_free_toq: freeing mapped page %p", m));
+ ("vm_page_free_prep: freeing mapped page %p", m));
} else
KASSERT(m->queue == PQ_NONE,
- ("vm_page_free_toq: unmanaged page %p is queued", m));
+ ("vm_page_free_prep: unmanaged page %p is queued", m));
VM_CNT_INC(v_tfree);
if (vm_page_sbusied(m))
- panic("vm_page_free: freeing busy page %p", m);
+ panic("vm_page_free_prep: freeing busy page %p", m);
vm_page_remove(m);
@@ -3250,21 +3456,23 @@
return (false);
}
- if (m->queue != PQ_NONE) {
- if (pagequeue_locked)
- vm_page_dequeue_locked(m);
- else
- vm_page_dequeue(m);
- }
+ /*
+ * Pages need not be dequeued before they are returned to the physical
+ * memory allocator, but they must at least be marked for a deferred
+ * dequeue.
+ */
+ if ((m->oflags & VPO_UNMANAGED) == 0)
+ vm_page_dequeue_deferred(m);
+
m->valid = 0;
vm_page_undirty(m);
if (m->wire_count != 0)
- panic("vm_page_free: freeing wired page %p", m);
+ panic("vm_page_free_prep: freeing wired page %p", m);
if (m->hold_count != 0) {
m->flags &= ~PG_ZERO;
KASSERT((m->flags & PG_UNHOLDFREE) == 0,
- ("vm_page_free: freeing PG_UNHOLDFREE page %p", m));
+ ("vm_page_free_prep: freeing PG_UNHOLDFREE page %p", m));
m->flags |= PG_UNHOLDFREE;
return (false);
}
@@ -3283,36 +3491,6 @@
return (true);
}
-void
-vm_page_free_phys_pglist(struct pglist *tq)
-{
- struct vm_domain *vmd;
- vm_page_t m;
- int cnt;
-
- if (TAILQ_EMPTY(tq))
- return;
- vmd = NULL;
- cnt = 0;
- TAILQ_FOREACH(m, tq, listq) {
- if (vmd != vm_pagequeue_domain(m)) {
- if (vmd != NULL) {
- vm_domain_free_unlock(vmd);
- vm_domain_freecnt_inc(vmd, cnt);
- cnt = 0;
- }
- vmd = vm_pagequeue_domain(m);
- vm_domain_free_lock(vmd);
- }
- vm_phys_free_pages(m, 0);
- cnt++;
- }
- if (vmd != NULL) {
- vm_domain_free_unlock(vmd);
- vm_domain_freecnt_inc(vmd, cnt);
- }
-}
-
/*
* vm_page_free_toq:
*
@@ -3327,7 +3505,7 @@
{
struct vm_domain *vmd;
- if (!vm_page_free_prep(m, false))
+ if (!vm_page_free_prep(m))
return;
vmd = vm_pagequeue_domain(m);
@@ -3425,22 +3603,25 @@
KASSERT(queue < PQ_COUNT || queue == PQ_NONE,
("vm_page_unwire: invalid queue %u request for page %p",
queue, m));
+ if ((m->oflags & VPO_UNMANAGED) == 0)
+ vm_page_assert_locked(m);
unwired = vm_page_unwire_noq(m);
- if (unwired && (m->oflags & VPO_UNMANAGED) == 0 && m->object != NULL) {
- if (m->queue == queue) {
+ if (!unwired || (m->oflags & VPO_UNMANAGED) != 0 || m->object == NULL)
+ return (unwired);
+
+ if (m->queue == queue) {
+ if (queue == PQ_ACTIVE)
+ vm_page_reference(m);
+ else if (queue != PQ_NONE)
+ vm_page_requeue(m);
+ } else {
+ vm_page_dequeue(m);
+ if (queue != PQ_NONE) {
+ vm_page_enqueue(m, queue);
if (queue == PQ_ACTIVE)
- vm_page_reference(m);
- else if (queue != PQ_NONE)
- vm_page_requeue(m);
- } else {
- vm_page_remque(m);
- if (queue != PQ_NONE) {
- vm_page_enqueue(queue, m);
- if (queue == PQ_ACTIVE)
- /* Initialize act_count. */
- vm_page_activate(m);
- }
+ /* Initialize act_count. */
+ vm_page_activate(m);
}
}
return (unwired);
@@ -3476,65 +3657,32 @@
}
/*
- * Move the specified page to the inactive queue, or requeue the page if it is
- * already in the inactive queue.
- *
- * Normally, "noreuse" is FALSE, resulting in LRU ordering of the inactive
- * queue. However, setting "noreuse" to TRUE will accelerate the specified
- * page's reclamation, but it will not unmap the page from any address space.
- * This is implemented by inserting the page near the head of the inactive
- * queue, using a marker page to guide FIFO insertion ordering.
+ * Move the specified page to the tail of the inactive queue, or requeue
+ * the page if it is already in the inactive queue.
*
* The page must be locked.
*/
-static inline void
-_vm_page_deactivate(vm_page_t m, boolean_t noreuse)
+void
+vm_page_deactivate(vm_page_t m)
{
- struct vm_pagequeue *pq;
- int queue;
vm_page_assert_locked(m);
- if (m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0) {
- pq = &vm_pagequeue_domain(m)->vmd_pagequeues[PQ_INACTIVE];
- /* Avoid multiple acquisitions of the inactive queue lock. */
- queue = m->queue;
- if (queue == PQ_INACTIVE) {
- vm_pagequeue_lock(pq);
- vm_page_dequeue_locked(m);
- } else {
- if (queue != PQ_NONE)
- vm_page_dequeue(m);
- vm_pagequeue_lock(pq);
- }
- m->queue = PQ_INACTIVE;
- if (noreuse)
- TAILQ_INSERT_BEFORE(
- &vm_pagequeue_domain(m)->vmd_inacthead, m,
- plinks.q);
- else
- TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
- vm_pagequeue_cnt_inc(pq);
- vm_pagequeue_unlock(pq);
- }
-}
-
-/*
- * Move the specified page to the inactive queue, or requeue the page if it is
- * already in the inactive queue.
- *
- * The page must be locked.
- */
-void
-vm_page_deactivate(vm_page_t m)
-{
+ if (m->wire_count > 0 || (m->oflags & VPO_UNMANAGED) != 0)
+ return;
- _vm_page_deactivate(m, FALSE);
+ if (!vm_page_inactive(m)) {
+ vm_page_remque(m);
+ vm_page_enqueue(m, PQ_INACTIVE);
+ } else
+ vm_page_requeue(m);
}
/*
- * Move the specified page to the inactive queue with the expectation
- * that it is unlikely to be reused.
+ * Move the specified page close to the head of the inactive queue,
+ * bypassing LRU. A marker page is used to maintain FIFO ordering.
+ * As with regular enqueues, we use a per-CPU batch queue to reduce
+ * contention on the page queue lock.
*
* The page must be locked.
*/
@@ -3542,7 +3690,17 @@
vm_page_deactivate_noreuse(vm_page_t m)
{
- _vm_page_deactivate(m, TRUE);
+ vm_page_assert_locked(m);
+
+ if (m->wire_count > 0 || (m->oflags & VPO_UNMANAGED) != 0)
+ return;
+
+ if (!vm_page_inactive(m))
+ vm_page_remque(m);
+ m->queue = PQ_INACTIVE;
+ if ((m->aflags & PGA_REQUEUE_HEAD) == 0)
+ vm_page_aflag_set(m, PGA_REQUEUE_HEAD);
+ vm_pqbatch_submit_page(m, PQ_INACTIVE);
}
/*
@@ -3555,13 +3713,14 @@
{
vm_page_assert_locked(m);
- if (m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0) {
- if (m->queue == PQ_LAUNDRY)
- vm_page_requeue(m);
- else {
- vm_page_remque(m);
- vm_page_enqueue(PQ_LAUNDRY, m);
- }
+ if (m->wire_count > 0 || (m->oflags & VPO_UNMANAGED) != 0)
+ return;
+
+ if (m->queue == PQ_LAUNDRY)
+ vm_page_requeue(m);
+ else {
+ vm_page_remque(m);
+ vm_page_enqueue(m, PQ_LAUNDRY);
}
}
@@ -3577,9 +3736,9 @@
vm_page_assert_locked(m);
KASSERT(m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0,
("page %p already unswappable", m));
- if (m->queue != PQ_NONE)
- vm_page_dequeue(m);
- vm_page_enqueue(PQ_UNSWAPPABLE, m);
+
+ vm_page_remque(m);
+ vm_page_enqueue(m, PQ_UNSWAPPABLE);
}
/*
Index: sys/vm/vm_pageout.c
===================================================================
--- sys/vm/vm_pageout.c
+++ sys/vm/vm_pageout.c
@@ -201,103 +201,134 @@
CTLFLAG_RW, &vm_page_max_wired, 0, "System-wide limit to wired page count");
static u_int isqrt(u_int num);
-static boolean_t vm_pageout_fallback_object_lock(vm_page_t, vm_page_t *);
static int vm_pageout_launder(struct vm_domain *vmd, int launder,
bool in_shortfall);
static void vm_pageout_laundry_worker(void *arg);
-static boolean_t vm_pageout_page_lock(vm_page_t, vm_page_t *);
+
+struct scan_state {
+ struct vm_batchqueue bq;
+ struct vm_pagequeue *pq;
+ vm_page_t marker;
+ int maxscan;
+ int scanned;
+};
+
+static void
+vm_pageout_init_scan(struct scan_state *ss, struct vm_pagequeue *pq,
+ vm_page_t marker, vm_page_t after, int maxscan)
+{
+
+ vm_pagequeue_assert_locked(pq);
+ KASSERT((marker->aflags & PGA_ENQUEUED) == 0,
+ ("marker %p already enqueued", marker));
+
+ if (after == NULL)
+ TAILQ_INSERT_HEAD(&pq->pq_pl, marker, plinks.q);
+ else
+ TAILQ_INSERT_AFTER(&pq->pq_pl, after, marker, plinks.q);
+ vm_page_aflag_set(marker, PGA_ENQUEUED);
+
+ vm_batchqueue_init(&ss->bq);
+ ss->pq = pq;
+ ss->marker = marker;
+ ss->maxscan = maxscan;
+ ss->scanned = 0;
+ vm_pagequeue_unlock(pq);
+}
+
+static void
+vm_pageout_end_scan(struct scan_state *ss)
+{
+ struct vm_pagequeue *pq;
+
+ pq = ss->pq;
+ vm_pagequeue_assert_locked(pq);
+ KASSERT((ss->marker->aflags & PGA_ENQUEUED) != 0,
+ ("marker %p not enqueued", ss->marker));
+
+ TAILQ_REMOVE(&pq->pq_pl, ss->marker, plinks.q);
+ vm_page_aflag_clear(ss->marker, PGA_ENQUEUED);
+ VM_CNT_ADD(v_pdpages, ss->scanned);
+}
/*
- * vm_pageout_fallback_object_lock:
- *
- * Lock vm object currently associated with `m'. VM_OBJECT_TRYWLOCK is
- * known to have failed and page queue must be either PQ_ACTIVE or
- * PQ_INACTIVE. To avoid lock order violation, unlock the page queue
- * while locking the vm object. Use marker page to detect page queue
- * changes and maintain notion of next page on page queue. Return
- * TRUE if no changes were detected, FALSE otherwise. vm object is
- * locked on return.
- *
- * This function depends on both the lock portion of struct vm_object
- * and normal struct vm_page being type stable.
+ * Ensure that the page has not been dequeued after a pageout batch was
+ * collected. See vm_page_dequeue_complete().
*/
-static boolean_t
-vm_pageout_fallback_object_lock(vm_page_t m, vm_page_t *next)
+static inline bool
+vm_pageout_page_queued(vm_page_t m, int queue)
{
- struct vm_page marker;
- struct vm_pagequeue *pq;
- boolean_t unchanged;
- vm_object_t object;
- int queue;
- queue = m->queue;
- vm_page_init_marker(&marker, queue);
- pq = vm_page_pagequeue(m);
- object = m->object;
-
- TAILQ_INSERT_AFTER(&pq->pq_pl, m, &marker, plinks.q);
- vm_pagequeue_unlock(pq);
- vm_page_unlock(m);
- VM_OBJECT_WLOCK(object);
- vm_page_lock(m);
- vm_pagequeue_lock(pq);
+ vm_page_assert_locked(m);
- /*
- * The page's object might have changed, and/or the page might
- * have moved from its original position in the queue. If the
- * page's object has changed, then the caller should abandon
- * processing the page because the wrong object lock was
- * acquired. Use the marker's plinks.q, not the page's, to
- * determine if the page has been moved. The state of the
- * page's plinks.q can be indeterminate; whereas, the marker's
- * plinks.q must be valid.
- */
- *next = TAILQ_NEXT(&marker, plinks.q);
- unchanged = m->object == object &&
- m == TAILQ_PREV(&marker, pglist, plinks.q);
- KASSERT(!unchanged || m->queue == queue,
- ("page %p queue %d %d", m, queue, m->queue));
- TAILQ_REMOVE(&pq->pq_pl, &marker, plinks.q);
- return (unchanged);
+ if ((m->aflags & PGA_DEQUEUE) != 0)
+ return (false);
+ atomic_thread_fence_acq();
+ return (m->queue == queue);
}
/*
- * Lock the page while holding the page queue lock. Use marker page
- * to detect page queue changes and maintain notion of next page on
- * page queue. Return TRUE if no changes were detected, FALSE
- * otherwise. The page is locked on return. The page queue lock might
- * be dropped and reacquired.
+ * Add a small number of queued pages to a batch queue for later processing
+ * without the corresponding queue lock held. The caller must have enqueued a
+ * marker page at the desired start point for the scan. Pages will be
+ * physically dequeued if the caller so requests. Otherwise, the returned
+ * batch may contain marker pages, and it is up to the caller to handle them.
*
- * This function depends on normal struct vm_page being type stable.
+ * When processing the batch queue, vm_pageout_page_queued() must be used to
+ * determine whether the page was logically dequeued by another thread. Once
+ * this check is performed, the page lock guarantees that the page will not be
+ * disassociated from the queue.
*/
-static boolean_t
-vm_pageout_page_lock(vm_page_t m, vm_page_t *next)
+static __always_inline void
+vm_pageout_collect_batch(struct scan_state *ss, const bool dequeue)
{
- struct vm_page marker;
struct vm_pagequeue *pq;
- boolean_t unchanged;
- int queue;
+ vm_page_t m, marker;
- vm_page_lock_assert(m, MA_NOTOWNED);
- if (vm_page_trylock(m))
- return (TRUE);
+ marker = ss->marker;
+ pq = ss->pq;
- queue = m->queue;
- vm_page_init_marker(&marker, queue);
- pq = vm_page_pagequeue(m);
+ KASSERT((marker->aflags & PGA_ENQUEUED) != 0,
+ ("marker %p not enqueued", ss->marker));
- TAILQ_INSERT_AFTER(&pq->pq_pl, m, &marker, plinks.q);
- vm_pagequeue_unlock(pq);
- vm_page_lock(m);
vm_pagequeue_lock(pq);
+ for (m = TAILQ_NEXT(marker, plinks.q); m != NULL &&
+ ss->scanned < ss->maxscan && ss->bq.bq_cnt < VM_BATCHQUEUE_SIZE;
+ m = TAILQ_NEXT(m, plinks.q), ss->scanned++) {
+ if ((m->flags & PG_MARKER) == 0) {
+ KASSERT((m->aflags & PGA_ENQUEUED) != 0,
+ ("page %p not enqueued", m));
+ KASSERT((m->flags & PG_FICTITIOUS) == 0,
+ ("Fictitious page %p cannot be in page queue", m));
+ KASSERT((m->oflags & VPO_UNMANAGED) == 0,
+ ("Unmanaged page %p cannot be in page queue", m));
+ } else if (dequeue)
+ continue;
+
+ (void)vm_batchqueue_insert(&ss->bq, m);
+ if (dequeue) {
+ TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
+ vm_page_aflag_clear(m, PGA_ENQUEUED);
+ }
+ }
+ TAILQ_REMOVE(&pq->pq_pl, marker, plinks.q);
+ if (__predict_true(m != NULL))
+ TAILQ_INSERT_BEFORE(m, marker, plinks.q);
+ else
+ TAILQ_INSERT_TAIL(&pq->pq_pl, marker, plinks.q);
+ if (dequeue)
+ vm_pagequeue_cnt_add(pq, -ss->bq.bq_cnt);
+ vm_pagequeue_unlock(pq);
+}
+
+/* Return the next page to be scanned, or NULL if the scan is complete. */
+static __always_inline vm_page_t
+vm_pageout_next(struct scan_state *ss, const bool dequeue)
+{
- /* Page queue might have changed. */
- *next = TAILQ_NEXT(&marker, plinks.q);
- unchanged = m == TAILQ_PREV(&marker, pglist, plinks.q);
- KASSERT(!unchanged || m->queue == queue,
- ("page %p queue %d %d", m, queue, m->queue));
- TAILQ_REMOVE(&pq->pq_pl, &marker, plinks.q);
- return (unchanged);
+ if (ss->bq.bq_cnt == 0)
+ vm_pageout_collect_batch(ss, dequeue);
+ return (vm_batchqueue_pop(&ss->bq));
}
/*
@@ -353,12 +384,12 @@
break;
}
vm_page_test_dirty(p);
- if (p->dirty == 0) {
+ if (p->dirty == 0 || !vm_page_in_laundry(p)) {
ib = 0;
break;
}
vm_page_lock(p);
- if (!vm_page_in_laundry(p) || vm_page_held(p)) {
+ if (vm_page_held(p)) {
vm_page_unlock(p);
ib = 0;
break;
@@ -381,10 +412,10 @@
if ((p = vm_page_next(ps)) == NULL || vm_page_busied(p))
break;
vm_page_test_dirty(p);
- if (p->dirty == 0)
+ if (p->dirty == 0 || !vm_page_in_laundry(p))
break;
vm_page_lock(p);
- if (!vm_page_in_laundry(p) || vm_page_held(p)) {
+ if (vm_page_held(p)) {
vm_page_unlock(p);
break;
}
@@ -675,13 +706,18 @@
static int
vm_pageout_launder(struct vm_domain *vmd, int launder, bool in_shortfall)
{
+ struct scan_state ss;
struct vm_pagequeue *pq;
+ struct mtx *mtx;
vm_object_t object;
- vm_page_t m, marker, next;
- int act_delta, error, maxscan, numpagedout, queue, starting_target;
+ vm_page_t m, marker;
+ int act_delta, error, numpagedout, queue, starting_target;
int vnodes_skipped;
- bool pageout_ok, queue_locked;
+ bool obj_locked, pageout_ok;
+ mtx = NULL;
+ obj_locked = false;
+ object = NULL;
starting_target = launder;
vnodes_skipped = 0;
@@ -691,10 +727,6 @@
* we've reached the end of the queue. A single iteration of this loop
* may cause more than one page to be laundered because of clustering.
*
- * maxscan ensures that we don't re-examine requeued pages. Any
- * additional pages written as part of a cluster are subtracted from
- * maxscan since they must be taken from the laundry queue.
- *
* As an optimization, we avoid laundering from PQ_UNSWAPPABLE when no
* swap devices are configured.
*/
@@ -704,53 +736,68 @@
queue = PQ_LAUNDRY;
scan:
- pq = &vmd->vmd_pagequeues[queue];
marker = &vmd->vmd_markers[queue];
-
+ pq = &vmd->vmd_pagequeues[queue];
vm_pagequeue_lock(pq);
- maxscan = pq->pq_cnt;
- queue_locked = true;
- for (m = TAILQ_FIRST(&pq->pq_pl);
- m != NULL && maxscan-- > 0 && launder > 0;
- m = next) {
- vm_pagequeue_assert_locked(pq);
- KASSERT(queue_locked, ("unlocked laundry queue"));
- KASSERT(vm_page_in_laundry(m),
- ("page %p has an inconsistent queue", m));
- next = TAILQ_NEXT(m, plinks.q);
- if ((m->flags & PG_MARKER) != 0)
+ vm_pageout_init_scan(&ss, pq, marker, NULL, pq->pq_cnt);
+ while (launder > 0 && (m = vm_pageout_next(&ss, false)) != NULL) {
+ if (__predict_false((m->flags & PG_MARKER) != 0))
continue;
- KASSERT((m->flags & PG_FICTITIOUS) == 0,
- ("PG_FICTITIOUS page %p cannot be in laundry queue", m));
- KASSERT((m->oflags & VPO_UNMANAGED) == 0,
- ("VPO_UNMANAGED page %p cannot be in laundry queue", m));
- if (!vm_pageout_page_lock(m, &next) || m->hold_count != 0) {
- vm_page_unlock(m);
+
+ vm_page_change_lock(m, &mtx);
+
+recheck:
+ /*
+ * The page may have been disassociated from the queue
+ * while locks were dropped.
+ */
+ if (!vm_pageout_page_queued(m, queue))
+ continue;
+
+ /*
+ * A requeue was requested, so this page gets a second
+ * chance.
+ */
+ if ((m->aflags & PGA_REQUEUE) != 0) {
+ vm_page_requeue(m);
continue;
}
+
+ /*
+ * Held pages are essentially stuck in the queue.
+ *
+ * Wired pages may not be freed. Complete their removal
+ * from the queue now to avoid needless revisits during
+ * future scans.
+ */
+ if (m->hold_count != 0)
+ continue;
if (m->wire_count != 0) {
- vm_page_dequeue_locked(m);
- vm_page_unlock(m);
+ vm_page_dequeue_deferred(m);
continue;
}
- object = m->object;
- if ((!VM_OBJECT_TRYWLOCK(object) &&
- (!vm_pageout_fallback_object_lock(m, &next) ||
- vm_page_held(m))) || vm_page_busied(m)) {
- VM_OBJECT_WUNLOCK(object);
- if (m->wire_count != 0 && vm_page_pagequeue(m) == pq)
- vm_page_dequeue_locked(m);
- vm_page_unlock(m);
- continue;
+
+ if (object != m->object) {
+ if (obj_locked) {
+ VM_OBJECT_WUNLOCK(object);
+ obj_locked = false;
+ }
+ object = m->object;
+ }
+ if (!obj_locked) {
+ if (!VM_OBJECT_TRYWLOCK(object)) {
+ mtx_unlock(mtx);
+ /* Depends on type-stability. */
+ VM_OBJECT_WLOCK(object);
+ obj_locked = true;
+ mtx_lock(mtx);
+ goto recheck;
+ } else
+ obj_locked = true;
}
- /*
- * Unlock the laundry queue, invalidating the 'next' pointer.
- * Use a marker to remember our place in the laundry queue.
- */
- TAILQ_INSERT_AFTER(&pq->pq_pl, m, marker, plinks.q);
- vm_pagequeue_unlock(pq);
- queue_locked = false;
+ if (vm_page_busied(m))
+ continue;
/*
* Invalid pages can be easily freed. They cannot be
@@ -799,9 +846,11 @@
*/
if (!in_shortfall)
launder--;
- goto drop_page;
- } else if ((object->flags & OBJ_DEAD) == 0)
- goto requeue_page;
+ continue;
+ } else if ((object->flags & OBJ_DEAD) == 0) {
+ vm_page_requeue(m);
+ continue;
+ }
}
/*
@@ -836,11 +885,8 @@
else
pageout_ok = true;
if (!pageout_ok) {
-requeue_page:
- vm_pagequeue_lock(pq);
- queue_locked = true;
- vm_page_requeue_locked(m);
- goto drop_page;
+ vm_page_requeue(m);
+ continue;
}
/*
@@ -859,24 +905,25 @@
error = vm_pageout_clean(m, &numpagedout);
if (error == 0) {
launder -= numpagedout;
- maxscan -= numpagedout - 1;
+ ss.scanned += numpagedout;
} else if (error == EDEADLK) {
pageout_lock_miss++;
vnodes_skipped++;
}
- goto relock_queue;
+ mtx = NULL;
+ obj_locked = false;
}
-drop_page:
- vm_page_unlock(m);
+ }
+ if (mtx != NULL) {
+ mtx_unlock(mtx);
+ mtx = NULL;
+ }
+ if (obj_locked) {
VM_OBJECT_WUNLOCK(object);
-relock_queue:
- if (!queue_locked) {
- vm_pagequeue_lock(pq);
- queue_locked = true;
- }
- next = TAILQ_NEXT(marker, plinks.q);
- TAILQ_REMOVE(&pq->pq_pl, marker, plinks.q);
+ obj_locked = false;
}
+ vm_pagequeue_lock(pq);
+ vm_pageout_end_scan(&ss);
vm_pagequeue_unlock(pq);
if (launder > 0 && queue == PQ_UNSWAPPABLE) {
@@ -1077,6 +1124,56 @@
}
}
+static int
+vm_pageout_reinsert_inactive_page(struct scan_state *ss, vm_page_t m)
+{
+ struct vm_domain *vmd;
+
+ if (!vm_page_inactive(m) || (m->aflags & PGA_ENQUEUED) != 0)
+ return (0);
+ vm_page_aflag_set(m, PGA_ENQUEUED);
+ if ((m->aflags & PGA_REQUEUE_HEAD) != 0) {
+ vmd = vm_pagequeue_domain(m);
+ TAILQ_INSERT_BEFORE(&vmd->vmd_inacthead, m, plinks.q);
+ vm_page_aflag_clear(m, PGA_REQUEUE | PGA_REQUEUE_HEAD);
+ } else if ((m->aflags & PGA_REQUEUE) != 0) {
+ TAILQ_INSERT_TAIL(&ss->pq->pq_pl, m, plinks.q);
+ vm_page_aflag_clear(m, PGA_REQUEUE | PGA_REQUEUE_HEAD);
+ } else
+ TAILQ_INSERT_BEFORE(ss->marker, m, plinks.q);
+ return (1);
+}
+
+/*
+ * Re-add stuck pages to the inactive queue. We will examine them again
+ * during the next scan. If the queue state of a page has changed since
+ * it was physically removed from the page queue in
+ * vm_pageout_collect_batch(), don't do anything with that page.
+ */
+static void
+vm_pageout_reinsert_inactive(struct scan_state *ss, struct vm_batchqueue *bq,
+ vm_page_t m)
+{
+ struct vm_pagequeue *pq;
+ int delta;
+
+ delta = 0;
+ pq = ss->pq;
+
+ if (m != NULL) {
+ if (vm_batchqueue_insert(bq, m))
+ return;
+ vm_pagequeue_lock(pq);
+ delta += vm_pageout_reinsert_inactive_page(ss, m);
+ } else
+ vm_pagequeue_lock(pq);
+ while ((m = vm_batchqueue_pop(bq)) != NULL)
+ delta += vm_pageout_reinsert_inactive_page(ss, m);
+ vm_pagequeue_cnt_add(pq, delta);
+ vm_pagequeue_unlock(pq);
+ vm_batchqueue_init(bq);
+}
+
/*
* vm_pageout_scan does the dirty work for the pageout daemon.
*
@@ -1089,13 +1186,16 @@
static bool
vm_pageout_scan(struct vm_domain *vmd, int pass, int shortage)
{
- vm_page_t m, marker, next;
+ struct scan_state ss;
+ struct vm_batchqueue rq;
+ struct mtx *mtx;
+ vm_page_t m, marker;
struct vm_pagequeue *pq;
vm_object_t object;
long min_scan;
- int act_delta, addl_page_shortage, deficit, inactq_shortage, maxscan;
- int page_shortage, scan_tick, scanned, starting_page_shortage;
- boolean_t queue_locked;
+ int act_delta, addl_page_shortage, deficit, inactq_shortage, max_scan;
+ int page_shortage, scan_tick, starting_page_shortage;
+ bool obj_locked;
/*
* If we need to reclaim memory ask kernel caches to return
@@ -1136,79 +1236,85 @@
page_shortage = deficit = 0;
starting_page_shortage = page_shortage;
+ mtx = NULL;
+ obj_locked = false;
+ object = NULL;
+ vm_batchqueue_init(&rq);
+
/*
* Start scanning the inactive queue for pages that we can free. The
* scan will stop when we reach the target or we have scanned the
* entire queue. (Note that m->act_count is not used to make
* decisions for the inactive queue, only for the active queue.)
*/
- pq = &vmd->vmd_pagequeues[PQ_INACTIVE];
marker = &vmd->vmd_markers[PQ_INACTIVE];
- maxscan = pq->pq_cnt;
+ pq = &vmd->vmd_pagequeues[PQ_INACTIVE];
vm_pagequeue_lock(pq);
- queue_locked = TRUE;
- for (m = TAILQ_FIRST(&pq->pq_pl);
- m != NULL && maxscan-- > 0 && page_shortage > 0;
- m = next) {
- vm_pagequeue_assert_locked(pq);
- KASSERT(queue_locked, ("unlocked inactive queue"));
- KASSERT(vm_page_inactive(m), ("Inactive queue %p", m));
+ vm_pageout_init_scan(&ss, pq, marker, NULL, pq->pq_cnt);
+ while (page_shortage > 0 && (m = vm_pageout_next(&ss, true)) != NULL) {
+ KASSERT((m->flags & PG_MARKER) == 0,
+ ("marker page %p was dequeued", m));
- VM_CNT_INC(v_pdpages);
- next = TAILQ_NEXT(m, plinks.q);
+ vm_page_change_lock(m, &mtx);
+recheck:
/*
- * skip marker pages
+ * The page may have been disassociated from the queue
+ * while locks were dropped.
*/
- if (m->flags & PG_MARKER)
+ if (!vm_pageout_page_queued(m, PQ_INACTIVE)) {
+ addl_page_shortage++;
continue;
+ }
- KASSERT((m->flags & PG_FICTITIOUS) == 0,
- ("Fictitious page %p cannot be in inactive queue", m));
- KASSERT((m->oflags & VPO_UNMANAGED) == 0,
- ("Unmanaged page %p cannot be in inactive queue", m));
+ /*
+ * The page was re-enqueued after the page queue lock was
+ * dropped, or a requeue was requested. This page gets a second
+ * chance.
+ */
+ if ((m->aflags & (PGA_ENQUEUED | PGA_REQUEUE |
+ PGA_REQUEUE_HEAD)) != 0)
+ goto reinsert;
/*
- * The page or object lock acquisitions fail if the
- * page was removed from the queue or moved to a
- * different position within the queue. In either
- * case, addl_page_shortage should not be incremented.
+ * Held pages are essentially stuck in the queue. So,
+ * they ought to be discounted from the inactive count.
+ * See the calculation of inactq_shortage before the
+ * loop over the active queue below.
+ *
+ * Wired pages may not be freed. Complete their removal
+ * from the queue now to avoid needless revisits during
+ * future scans.
*/
- if (!vm_pageout_page_lock(m, &next))
- goto unlock_page;
- else if (m->wire_count != 0) {
- /*
- * Wired pages may not be freed, and unwiring a queued
- * page will cause it to be requeued. Thus, remove them
- * from the queue now to avoid unnecessary revisits.
- */
- vm_page_dequeue_locked(m);
+ if (m->hold_count != 0) {
addl_page_shortage++;
- goto unlock_page;
- } else if (m->hold_count != 0) {
- /*
- * Held pages are essentially stuck in the
- * queue. So, they ought to be discounted
- * from the inactive count. See the
- * calculation of inactq_shortage before the
- * loop over the active queue below.
- */
+ goto reinsert;
+ }
+ if (m->wire_count != 0) {
addl_page_shortage++;
- goto unlock_page;
+ vm_page_dequeue_deferred(m);
+ continue;
}
- object = m->object;
- if (!VM_OBJECT_TRYWLOCK(object)) {
- if (!vm_pageout_fallback_object_lock(m, &next))
- goto unlock_object;
- else if (m->wire_count != 0) {
- vm_page_dequeue_locked(m);
- addl_page_shortage++;
- goto unlock_object;
- } else if (m->hold_count != 0) {
- addl_page_shortage++;
- goto unlock_object;
+
+ if (object != m->object) {
+ if (obj_locked) {
+ VM_OBJECT_WUNLOCK(object);
+ obj_locked = false;
}
+ object = m->object;
}
+ if (!obj_locked) {
+ if (!VM_OBJECT_TRYWLOCK(object)) {
+ mtx_unlock(mtx);
+ /* Depends on type-stability. */
+ VM_OBJECT_WLOCK(object);
+ obj_locked = true;
+ mtx_lock(mtx);
+ goto recheck;
+ } else
+ obj_locked = true;
+ }
+
if (vm_page_busied(m)) {
/*
* Don't mess with busy pages. Leave them at
@@ -1219,26 +1325,8 @@
* inactive count.
*/
addl_page_shortage++;
-unlock_object:
- VM_OBJECT_WUNLOCK(object);
-unlock_page:
- vm_page_unlock(m);
- continue;
+ goto reinsert;
}
- KASSERT(!vm_page_held(m), ("Held page %p", m));
-
- /*
- * Dequeue the inactive page and unlock the inactive page
- * queue, invalidating the 'next' pointer. Dequeueing the
- * page here avoids a later reacquisition (and release) of
- * the inactive page queue lock when vm_page_activate(),
- * vm_page_free(), or vm_page_launder() is called. Use a
- * marker to remember our place in the inactive queue.
- */
- TAILQ_INSERT_AFTER(&pq->pq_pl, m, marker, plinks.q);
- vm_page_dequeue_locked(m);
- vm_pagequeue_unlock(pq);
- queue_locked = FALSE;
/*
* Invalid pages can be easily freed. They cannot be
@@ -1276,14 +1364,10 @@
* queue.
*/
m->act_count += act_delta + ACT_ADVANCE;
- goto drop_page;
+ continue;
} else if ((object->flags & OBJ_DEAD) == 0) {
- vm_pagequeue_lock(pq);
- queue_locked = TRUE;
- m->queue = PQ_INACTIVE;
- TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
- vm_pagequeue_cnt_inc(pq);
- goto drop_page;
+ vm_page_aflag_set(m, PGA_REQUEUE);
+ goto reinsert;
}
}
@@ -1309,23 +1393,39 @@
*/
if (m->dirty == 0) {
free_page:
+ /*
+ * Because we dequeued the page and have already
+ * checked for concurrent dequeue and enqueue
+ * requests, we can safely disassociate the page
+ * from the inactive queue.
+ */
+ KASSERT((m->aflags & PGA_QUEUE_STATE_MASK) == 0,
+ ("page %p has queue state", m));
+ m->queue = PQ_NONE;
vm_page_free(m);
- VM_CNT_INC(v_dfree);
- --page_shortage;
+ page_shortage--;
} else if ((object->flags & OBJ_DEAD) == 0)
vm_page_launder(m);
-drop_page:
- vm_page_unlock(m);
+ continue;
+reinsert:
+ vm_pageout_reinsert_inactive(&ss, &rq, m);
+ }
+ if (mtx != NULL) {
+ mtx_unlock(mtx);
+ mtx = NULL;
+ }
+ if (obj_locked) {
VM_OBJECT_WUNLOCK(object);
- if (!queue_locked) {
- vm_pagequeue_lock(pq);
- queue_locked = TRUE;
- }
- next = TAILQ_NEXT(marker, plinks.q);
- TAILQ_REMOVE(&pq->pq_pl, marker, plinks.q);
+ obj_locked = false;
}
+ vm_pageout_reinsert_inactive(&ss, &rq, NULL);
+ vm_pageout_reinsert_inactive(&ss, &ss.bq, NULL);
+ vm_pagequeue_lock(pq);
+ vm_pageout_end_scan(&ss);
vm_pagequeue_unlock(pq);
+ VM_CNT_ADD(v_dfree, starting_page_shortage - page_shortage);
+
/*
* Wake up the laundry thread so that it can perform any needed
* laundering. If we didn't meet our target, we're in shortfall and
@@ -1386,9 +1486,9 @@
vm_paging_target(vmd) + deficit + addl_page_shortage;
inactq_shortage *= act_scan_laundry_weight;
+ marker = &vmd->vmd_markers[PQ_ACTIVE];
pq = &vmd->vmd_pagequeues[PQ_ACTIVE];
vm_pagequeue_lock(pq);
- maxscan = pq->pq_cnt;
/*
* If we're just idle polling attempt to visit every
@@ -1401,43 +1501,55 @@
min_scan /= hz * vm_pageout_update_period;
} else
min_scan = 0;
- if (min_scan > 0 || (inactq_shortage > 0 && maxscan > 0))
+ if (min_scan > 0 || (inactq_shortage > 0 && pq->pq_cnt > 0))
vmd->vmd_last_active_scan = scan_tick;
/*
* Scan the active queue for pages that can be deactivated. Update
* the per-page activity counter and use it to identify deactivation
* candidates. Held pages may be deactivated.
+ *
+ * To avoid requeuing each page that remains in the active queue, we
+ * implement the CLOCK algorithm. To maintain consistency in the
+ * generic page queue code, pages are inserted at the tail of the
+ * active queue. We thus use two hands, represented by marker pages:
+ * scans begin at the first hand, which precedes the second hand in
+ * the queue. When the two hands meet, they are moved back to the
+ * head and tail of the queue, respectively, and scanning resumes.
*/
- for (m = TAILQ_FIRST(&pq->pq_pl), scanned = 0; m != NULL && (scanned <
- min_scan || (inactq_shortage > 0 && scanned < maxscan)); m = next,
- scanned++) {
- KASSERT(m->queue == PQ_ACTIVE,
- ("vm_pageout_scan: page %p isn't active", m));
- next = TAILQ_NEXT(m, plinks.q);
- if ((m->flags & PG_MARKER) != 0)
- continue;
- KASSERT((m->flags & PG_FICTITIOUS) == 0,
- ("Fictitious page %p cannot be in active queue", m));
- KASSERT((m->oflags & VPO_UNMANAGED) == 0,
- ("Unmanaged page %p cannot be in active queue", m));
- if (!vm_pageout_page_lock(m, &next)) {
- vm_page_unlock(m);
- continue;
+ max_scan = inactq_shortage > 0 ? pq->pq_cnt : min_scan;
+act_scan:
+ vm_pageout_init_scan(&ss, pq, marker, &vmd->vmd_clock[0], max_scan);
+ while ((m = vm_pageout_next(&ss, false)) != NULL) {
+ if (__predict_false(m == &vmd->vmd_clock[1])) {
+ vm_pagequeue_lock(pq);
+ TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_clock[0], plinks.q);
+ TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_clock[1], plinks.q);
+ TAILQ_INSERT_HEAD(&pq->pq_pl, &vmd->vmd_clock[0],
+ plinks.q);
+ TAILQ_INSERT_TAIL(&pq->pq_pl, &vmd->vmd_clock[1],
+ plinks.q);
+ max_scan -= ss.scanned;
+ vm_pageout_end_scan(&ss);
+ goto act_scan;
}
+ if (__predict_false((m->flags & PG_MARKER) != 0))
+ continue;
+
+ vm_page_change_lock(m, &mtx);
/*
- * The count for page daemon pages is updated after checking
- * the page for eligibility.
+ * The page may have been disassociated from the queue
+ * while locks were dropped.
*/
- VM_CNT_INC(v_pdpages);
+ if (!vm_pageout_page_queued(m, PQ_ACTIVE))
+ continue;
/*
* Wired pages are dequeued lazily.
*/
if (m->wire_count != 0) {
- vm_page_dequeue_locked(m);
- vm_page_unlock(m);
+ vm_page_dequeue_deferred(m);
continue;
}
@@ -1476,14 +1588,7 @@
} else
m->act_count -= min(m->act_count, ACT_DECLINE);
- /*
- * Move this page to the tail of the active, inactive or laundry
- * queue depending on usage.
- */
if (m->act_count == 0) {
- /* Dequeue to avoid later lock recursion. */
- vm_page_dequeue_locked(m);
-
/*
* When not short for inactive pages, let dirty pages go
* through the inactive queue before moving to the
@@ -1515,11 +1620,18 @@
inactq_shortage--;
}
}
- } else
- vm_page_requeue_locked(m);
- vm_page_unlock(m);
+ }
+ }
+ if (mtx != NULL) {
+ mtx_unlock(mtx);
+ mtx = NULL;
}
+ vm_pagequeue_lock(pq);
+ TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_clock[0], plinks.q);
+ TAILQ_INSERT_AFTER(&pq->pq_pl, marker, &vmd->vmd_clock[0], plinks.q);
+ vm_pageout_end_scan(&ss);
vm_pagequeue_unlock(pq);
+
if (pass > 0)
vm_swapout_run_idle();
return (page_shortage <= 0);
Index: sys/vm/vm_pagequeue.h
===================================================================
--- sys/vm/vm_pagequeue.h
+++ sys/vm/vm_pagequeue.h
@@ -73,8 +73,17 @@
const char * const pq_name;
} __aligned(CACHE_LINE_SIZE);
-#include <sys/pidctrl.h>
+#ifndef VM_BATCHQUEUE_SIZE
+#define VM_BATCHQUEUE_SIZE 7
+#endif
+
+struct vm_batchqueue {
+ vm_page_t bq_pa[VM_BATCHQUEUE_SIZE];
+ int bq_cnt;
+} __aligned(CACHE_LINE_SIZE);
+
#include <vm/uma.h>
+#include <sys/pidctrl.h>
struct sysctl_oid;
/*
@@ -82,12 +91,12 @@
* and accounting.
*
* Lock Key:
- * f vmd_free_mtx
- * p vmd_pageout_mtx
- * d vm_domainset_lock
- * a atomic
- * c const after boot
- * q page queue lock
+ * f vmd_free_mtx
+ * p vmd_pageout_mtx
+ * d vm_domainset_lock
+ * a atomic
+ * c const after boot
+ * q page queue lock
*/
struct vm_domain {
struct vm_pagequeue vmd_pagequeues[PQ_COUNT];
@@ -107,8 +116,9 @@
boolean_t vmd_oom;
int vmd_oom_seq;
int vmd_last_active_scan;
- struct vm_page vmd_markers[PQ_COUNT]; /* markers for queue scans */
+ struct vm_page vmd_markers[PQ_COUNT]; /* (q) markers for queue scans */
struct vm_page vmd_inacthead; /* marker for LRU-defeating insertions */
+ struct vm_page vmd_clock[2]; /* markers for active queue scan */
int vmd_pageout_wanted; /* (a, p) pageout daemon wait channel */
int vmd_pageout_pages_needed; /* (d) page daemon waiting for pages? */
@@ -144,6 +154,7 @@
#define vm_pagequeue_assert_locked(pq) mtx_assert(&(pq)->pq_mutex, MA_OWNED)
#define vm_pagequeue_lock(pq) mtx_lock(&(pq)->pq_mutex)
#define vm_pagequeue_lockptr(pq) (&(pq)->pq_mutex)
+#define vm_pagequeue_trylock(pq) mtx_trylock(&(pq)->pq_mutex)
#define vm_pagequeue_unlock(pq) mtx_unlock(&(pq)->pq_mutex)
#define vm_domain_free_assert_locked(n) \
@@ -154,6 +165,8 @@
mtx_lock(vm_domain_free_lockptr((d)))
#define vm_domain_free_lockptr(d) \
(&(d)->vmd_free_mtx)
+#define vm_domain_free_trylock(d) \
+ mtx_trylock(vm_domain_free_lockptr((d)))
#define vm_domain_free_unlock(d) \
mtx_unlock(vm_domain_free_lockptr((d)))
@@ -172,14 +185,39 @@
vm_pagequeue_cnt_add(struct vm_pagequeue *pq, int addend)
{
-#ifdef notyet
vm_pagequeue_assert_locked(pq);
-#endif
pq->pq_cnt += addend;
}
#define vm_pagequeue_cnt_inc(pq) vm_pagequeue_cnt_add((pq), 1)
#define vm_pagequeue_cnt_dec(pq) vm_pagequeue_cnt_add((pq), -1)
+static inline void
+vm_batchqueue_init(struct vm_batchqueue *bq)
+{
+
+ bq->bq_cnt = 0;
+}
+
+static inline bool
+vm_batchqueue_insert(struct vm_batchqueue *bq, vm_page_t m)
+{
+
+ if (bq->bq_cnt < nitems(bq->bq_pa)) {
+ bq->bq_pa[bq->bq_cnt++] = m;
+ return (true);
+ }
+ return (false);
+}
+
+static inline vm_page_t
+vm_batchqueue_pop(struct vm_batchqueue *bq)
+{
+
+ if (bq->bq_cnt == 0)
+ return (NULL);
+ return (bq->bq_pa[--bq->bq_cnt]);
+}
+
void vm_domain_set(struct vm_domain *vmd);
void vm_domain_clear(struct vm_domain *vmd);
int vm_domain_allocate(struct vm_domain *vmd, int req, int npages);
Index: sys/vm/vm_phys.c
===================================================================
--- sys/vm/vm_phys.c
+++ sys/vm/vm_phys.c
@@ -354,9 +354,9 @@
m->order = order;
if (tail)
- TAILQ_INSERT_TAIL(&fl[order].pl, m, plinks.q);
+ TAILQ_INSERT_TAIL(&fl[order].pl, m, listq);
else
- TAILQ_INSERT_HEAD(&fl[order].pl, m, plinks.q);
+ TAILQ_INSERT_HEAD(&fl[order].pl, m, listq);
fl[order].lcnt++;
}
@@ -364,7 +364,7 @@
vm_freelist_rem(struct vm_freelist *fl, vm_page_t m, int order)
{
- TAILQ_REMOVE(&fl[order].pl, m, plinks.q);
+ TAILQ_REMOVE(&fl[order].pl, m, listq);
fl[order].lcnt--;
m->order = VM_NFREEORDER;
}
@@ -1196,7 +1196,7 @@
oind++) {
for (pind = 0; pind < VM_NFREEPOOL; pind++) {
fl = (*seg->free_queues)[pind];
- TAILQ_FOREACH(m_ret, &fl[oind].pl, plinks.q) {
+ TAILQ_FOREACH(m_ret, &fl[oind].pl, listq) {
/*
* Is the size of this allocation request
* larger than the largest block size?
Index: sys/vm/vm_swapout.c
===================================================================
--- sys/vm/vm_swapout.c
+++ sys/vm/vm_swapout.c
@@ -399,8 +399,15 @@
swapout_flags = vm_pageout_req_swapout;
vm_pageout_req_swapout = 0;
mtx_unlock(&vm_daemon_mtx);
- if (swapout_flags)
+ if (swapout_flags != 0) {
+ /*
+ * Drain the per-CPU page queue batches as a deadlock
+ * avoidance measure.
+ */
+ if ((swapout_flags & VM_SWAP_NORMAL) != 0)
+ vm_page_drain_pqbatch();
swapout_procs(swapout_flags);
+ }
/*
* scan the processes for exceeding their rlimits or if
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sat, Nov 30, 3:30 PM (8 h, 53 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
14944126
Default Alt Text
D14893.id41728.diff (63 KB)
Attached To
Mode
D14893: VM page queue batching
Attached
Detach File
Event Timeline
Log In to Comment