Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F151335429
D21257.id61584.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
10 KB
Referenced Files
None
Subscribers
None
D21257.id61584.diff
View Options
Index: head/sys/vm/vm_page.h
===================================================================
--- head/sys/vm/vm_page.h
+++ head/sys/vm/vm_page.h
@@ -147,28 +147,34 @@
* sleep until the page's busy state changes, after which the caller
* must re-lookup the page and re-evaluate its state.
*
- * The queue field is the index of the page queue containing the
- * page, or PQ_NONE if the page is not enqueued. The queue lock of a
- * page is the page queue lock corresponding to the page queue index,
- * or the page lock (P) for the page if it is not enqueued. To modify
- * the queue field, the queue lock for the old value of the field must
- * be held. It is invalid for a page's queue field to transition
- * between two distinct page queue indices. That is, when updating
- * the queue field, either the new value or the old value must be
- * PQ_NONE.
+ * The queue field is the index of the page queue containing the page,
+ * or PQ_NONE if the page is not enqueued. The queue lock of a page is
+ * the page queue lock corresponding to the page queue index, or the
+ * page lock (P) for the page if it is not enqueued. To modify the
+ * queue field, the queue lock for the old value of the field must be
+ * held. There is one exception to this rule: the page daemon may
+ * transition the queue field from PQ_INACTIVE to PQ_NONE immediately
+ * prior to freeing a page during an inactive queue scan. At that
+ * point the page has already been physically dequeued and no other
+ * references to that vm_page structure exist.
*
* To avoid contention on page queue locks, page queue operations
- * (enqueue, dequeue, requeue) are batched using per-CPU queues.
- * A deferred operation is requested by inserting an entry into a
- * batch queue; the entry is simply a pointer to the page, and the
- * request type is encoded in the page's aflags field using the values
- * in PGA_QUEUE_STATE_MASK. The type-stability of struct vm_pages is
+ * (enqueue, dequeue, requeue) are batched using per-CPU queues. A
+ * deferred operation is requested by inserting an entry into a batch
+ * queue; the entry is simply a pointer to the page, and the request
+ * type is encoded in the page's aflags field using the values in
+ * PGA_QUEUE_STATE_MASK. The type-stability of struct vm_pages is
* crucial to this scheme since the processing of entries in a given
- * batch queue may be deferred indefinitely. In particular, a page
- * may be freed before its pending batch queue entries have been
- * processed. The page lock (P) must be held to schedule a batched
- * queue operation, and the page queue lock must be held in order to
- * process batch queue entries for the page queue.
+ * batch queue may be deferred indefinitely. In particular, a page may
+ * be freed before its pending batch queue entries have been processed.
+ * The page lock (P) must be held to schedule a batched queue
+ * operation, and the page queue lock must be held in order to process
+ * batch queue entries for the page queue. There is one exception to
+ * this rule: the thread freeing a page may schedule a dequeue without
+ * holding the page lock. In this scenario the only other thread which
+ * may hold a reference to the page is the page daemon, which is
+ * careful to avoid modifying the page's queue state once the dequeue
+ * has been requested by setting PGA_DEQUEUE.
*/
#if PAGE_SIZE == 4096
@@ -578,6 +584,7 @@
int vm_page_sleep_if_busy(vm_page_t m, const char *msg);
vm_offset_t vm_page_startup(vm_offset_t vaddr);
void vm_page_sunbusy(vm_page_t m);
+void vm_page_swapqueue(vm_page_t m, uint8_t oldq, uint8_t newq);
int vm_page_trysbusy(vm_page_t m);
void vm_page_unhold_pages(vm_page_t *ma, int count);
void vm_page_unswappable(vm_page_t m);
@@ -667,9 +674,31 @@
* destinations. In order that we can easily use a 32-bit operation, we
* require that the aflags field be 32-bit aligned.
*/
-CTASSERT(offsetof(struct vm_page, aflags) % sizeof(uint32_t) == 0);
+_Static_assert(offsetof(struct vm_page, aflags) % sizeof(uint32_t) == 0,
+ "aflags field is not 32-bit aligned");
/*
+ * We want to be able to update the aflags and queue fields atomically in
+ * the same operation.
+ */
+_Static_assert(offsetof(struct vm_page, aflags) / sizeof(uint32_t) ==
+ offsetof(struct vm_page, queue) / sizeof(uint32_t),
+ "aflags and queue fields do not belong to the same 32-bit word");
+_Static_assert(offsetof(struct vm_page, queue) % sizeof(uint32_t) == 2,
+ "queue field is at an unexpected offset");
+_Static_assert(sizeof(((struct vm_page *)NULL)->queue) == 1,
+ "queue field has an unexpected size");
+
+#if BYTE_ORDER == LITTLE_ENDIAN
+#define VM_PAGE_AFLAG_SHIFT 0
+#define VM_PAGE_QUEUE_SHIFT 16
+#else
+#define VM_PAGE_AFLAG_SHIFT 24
+#define VM_PAGE_QUEUE_SHIFT 8
+#endif
+#define VM_PAGE_QUEUE_MASK (0xff << VM_PAGE_QUEUE_SHIFT)
+
+/*
* Clear the given bits in the specified page.
*/
static inline void
@@ -689,12 +718,7 @@
* within this word are handled properly by the atomic update.
*/
addr = (void *)&m->aflags;
- KASSERT(((uintptr_t)addr & (sizeof(uint32_t) - 1)) == 0,
- ("vm_page_aflag_clear: aflags is misaligned"));
- val = bits;
-#if BYTE_ORDER == BIG_ENDIAN
- val <<= 24;
-#endif
+ val = bits << VM_PAGE_AFLAG_SHIFT;
atomic_clear_32(addr, val);
}
@@ -714,14 +738,44 @@
* within this word are handled properly by the atomic update.
*/
addr = (void *)&m->aflags;
- KASSERT(((uintptr_t)addr & (sizeof(uint32_t) - 1)) == 0,
- ("vm_page_aflag_set: aflags is misaligned"));
- val = bits;
-#if BYTE_ORDER == BIG_ENDIAN
- val <<= 24;
-#endif
+ val = bits << VM_PAGE_AFLAG_SHIFT;
atomic_set_32(addr, val);
-}
+}
+
+/*
+ * Atomically update the queue state of the page. The operation fails if
+ * any of the queue flags in "fflags" are set or if the "queue" field of
+ * the page does not match the expected value; if the operation is
+ * successful, the flags in "nflags" are set and all other queue state
+ * flags are cleared.
+ */
+static inline bool
+vm_page_pqstate_cmpset(vm_page_t m, uint32_t oldq, uint32_t newq,
+ uint32_t fflags, uint32_t nflags)
+{
+ uint32_t *addr, nval, oval, qsmask;
+
+ vm_page_assert_locked(m);
+
+ fflags <<= VM_PAGE_AFLAG_SHIFT;
+ nflags <<= VM_PAGE_AFLAG_SHIFT;
+ newq <<= VM_PAGE_QUEUE_SHIFT;
+ oldq <<= VM_PAGE_QUEUE_SHIFT;
+ qsmask = ((PGA_DEQUEUE | PGA_REQUEUE | PGA_REQUEUE_HEAD) <<
+ VM_PAGE_AFLAG_SHIFT) | VM_PAGE_QUEUE_MASK;
+
+ addr = (void *)&m->aflags;
+ oval = atomic_load_32(addr);
+ do {
+ if ((oval & fflags) != 0)
+ return (false);
+ if ((oval & VM_PAGE_QUEUE_MASK) != oldq)
+ return (false);
+ nval = (oval & ~qsmask) | nflags | newq;
+ } while (!atomic_fcmpset_32(addr, &oval, nval));
+
+ return (true);
+}
/*
* vm_page_dirty:
Index: head/sys/vm/vm_page.c
===================================================================
--- head/sys/vm/vm_page.c
+++ head/sys/vm/vm_page.c
@@ -3130,6 +3130,13 @@
vm_batchqueue_init(bq);
}
+/*
+ * vm_page_pqbatch_submit: [ internal use only ]
+ *
+ * Enqueue a page in the specified page queue's batched work queue.
+ * The caller must have encoded the requested operation in the page
+ * structure's aflags field.
+ */
void
vm_page_pqbatch_submit(vm_page_t m, uint8_t queue)
{
@@ -3251,17 +3258,26 @@
if ((queue = vm_page_queue(m)) == PQ_NONE)
return;
- vm_page_aflag_set(m, PGA_DEQUEUE);
- vm_page_pqbatch_submit(m, queue);
+
+ /*
+ * Set PGA_DEQUEUE if it is not already set to handle a concurrent call
+ * to vm_page_dequeue_deferred_free(). In particular, avoid modifying
+ * the page's queue state once vm_page_dequeue_deferred_free() has been
+ * called. In the event of a race, two batch queue entries for the page
+ * will be created, but the second will have no effect.
+ */
+ if (vm_page_pqstate_cmpset(m, queue, queue, PGA_DEQUEUE, PGA_DEQUEUE))
+ vm_page_pqbatch_submit(m, queue);
}
/*
* A variant of vm_page_dequeue_deferred() that does not assert the page
- * lock and is only to be called from vm_page_free_prep(). It is just an
- * open-coded implementation of vm_page_dequeue_deferred(). Because the
- * page is being freed, we can assume that nothing else is scheduling queue
- * operations on this page, so we get for free the mutual exclusion that
- * is otherwise provided by the page lock.
+ * lock and is only to be called from vm_page_free_prep(). Because the
+ * page is being freed, we can assume that nothing other than the page
+ * daemon is scheduling queue operations on this page, so we get for
+ * free the mutual exclusion that is otherwise provided by the page lock.
+ * To handle races, the page daemon must take care to atomically check
+ * for PGA_DEQUEUE when updating queue state.
*/
static void
vm_page_dequeue_deferred_free(vm_page_t m)
@@ -3372,6 +3388,42 @@
if ((m->aflags & PGA_REQUEUE) == 0)
vm_page_aflag_set(m, PGA_REQUEUE);
vm_page_pqbatch_submit(m, atomic_load_8(&m->queue));
+}
+
+/*
+ * vm_page_swapqueue: [ internal use only ]
+ *
+ * Move the page from one queue to another, or to the tail of its
+ * current queue, in the face of a possible concurrent call to
+ * vm_page_dequeue_deferred_free().
+ */
+void
+vm_page_swapqueue(vm_page_t m, uint8_t oldq, uint8_t newq)
+{
+ struct vm_pagequeue *pq;
+
+ KASSERT(oldq < PQ_COUNT && newq < PQ_COUNT && oldq != newq,
+ ("vm_page_swapqueue: invalid queues (%d, %d)", oldq, newq));
+ KASSERT((m->oflags & VPO_UNMANAGED) == 0,
+ ("vm_page_swapqueue: page %p is unmanaged", m));
+ vm_page_assert_locked(m);
+
+ /*
+ * Atomically update the queue field and set PGA_REQUEUE while
+ * ensuring that PGA_DEQUEUE has not been set.
+ */
+ pq = &vm_pagequeue_domain(m)->vmd_pagequeues[oldq];
+ vm_pagequeue_lock(pq);
+ if (!vm_page_pqstate_cmpset(m, oldq, newq, PGA_DEQUEUE, PGA_REQUEUE)) {
+ vm_pagequeue_unlock(pq);
+ return;
+ }
+ if ((m->aflags & PGA_ENQUEUED) != 0) {
+ vm_pagequeue_remove(pq, m);
+ vm_page_aflag_clear(m, PGA_ENQUEUED);
+ }
+ vm_pagequeue_unlock(pq);
+ vm_page_pqbatch_submit(m, newq);
}
/*
Index: head/sys/vm/vm_pageout.c
===================================================================
--- head/sys/vm/vm_pageout.c
+++ head/sys/vm/vm_pageout.c
@@ -742,7 +742,7 @@
* chance.
*/
if ((m->aflags & PGA_REQUEUE) != 0) {
- vm_page_requeue(m);
+ vm_page_pqbatch_submit(m, queue);
continue;
}
@@ -1256,9 +1256,9 @@
* place them directly in the laundry queue to reduce
* queuing overhead.
*/
- if (page_shortage <= 0)
- vm_page_deactivate(m);
- else {
+ if (page_shortage <= 0) {
+ vm_page_swapqueue(m, PQ_ACTIVE, PQ_INACTIVE);
+ } else {
/*
* Calling vm_page_test_dirty() here would
* require acquisition of the object's write
@@ -1270,11 +1270,13 @@
* dirty field by the pmap.
*/
if (m->dirty == 0) {
- vm_page_deactivate(m);
+ vm_page_swapqueue(m, PQ_ACTIVE,
+ PQ_INACTIVE);
page_shortage -=
act_scan_laundry_weight;
} else {
- vm_page_launder(m);
+ vm_page_swapqueue(m, PQ_ACTIVE,
+ PQ_LAUNDRY);
page_shortage--;
}
}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Apr 8, 4:43 PM (7 h, 53 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
31092192
Default Alt Text
D21257.id61584.diff (10 KB)
Attached To
Mode
D21257: Support atomic updates of the "aflags" and "queue" fields.
Attached
Detach File
Event Timeline
Log In to Comment