Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F154439695
D22908.id66119.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
30 KB
Referenced Files
None
Subscribers
None
D22908.id66119.diff
View Options
Index: sys/kern/kern_synch.c
===================================================================
--- sys/kern/kern_synch.c
+++ sys/kern/kern_synch.c
@@ -381,15 +381,21 @@
* a precise answer should use refcount_wait().
*/
void
-refcount_sleep(volatile u_int *count, const char *wmesg, int pri)
+_refcount_sleep(volatile u_int *count, struct lock_object *lock,
+ const char *wmesg, int pri)
{
void *wchan;
u_int old;
- if (REFCOUNT_COUNT(*count) == 0)
+ if (REFCOUNT_COUNT(*count) == 0) {
+ if (lock != NULL)
+ LOCK_CLASS(lock)->lc_unlock(lock);
return;
+ }
wchan = __DEVOLATILE(void *, count);
sleepq_lock(wchan);
+ if (lock != NULL)
+ LOCK_CLASS(lock)->lc_unlock(lock);
old = *count;
for (;;) {
if (REFCOUNT_COUNT(old) == 0) {
Index: sys/sys/refcount.h
===================================================================
--- sys/sys/refcount.h
+++ sys/sys/refcount.h
@@ -46,7 +46,10 @@
#define REFCOUNT_COUNT(x) ((x) & ~REFCOUNT_WAITER)
bool refcount_release_last(volatile u_int *count, u_int n, u_int old);
-void refcount_sleep(volatile u_int *count, const char *wmesg, int prio);
+
+struct lock_object;
+void _refcount_sleep(volatile u_int *count, struct lock_object *,
+ const char *wmesg, int prio);
/*
* Attempt to handle reference count overflow and underflow. Force the counter
@@ -135,6 +138,16 @@
return (refcount_releasen(count, 1));
}
+static __inline void
+refcount_sleep(volatile u_int *count, const char *wmesg, int prio)
+{
+
+ _refcount_sleep(count, NULL, wmesg, prio);
+}
+
+#define refcount_sleep_interlock(count, lock, wmesg, prio) \
+ _refcount_sleep((count), (struct lock_object *)(lock), (wmesg), (prio))
+
static __inline void
refcount_wait(volatile u_int *count, const char *wmesg, int prio)
{
Index: sys/vm/swap_pager.c
===================================================================
--- sys/vm/swap_pager.c
+++ sys/vm/swap_pager.c
@@ -974,15 +974,12 @@
* Destination has no swapblk and is not resident, transfer source.
* swp_pager_meta_build() can sleep.
*/
- vm_object_pip_add(srcobject, 1);
VM_OBJECT_WUNLOCK(srcobject);
- vm_object_pip_add(dstobject, 1);
dstaddr = swp_pager_meta_build(dstobject, pindex, addr);
KASSERT(dstaddr == SWAPBLK_NONE,
("Unexpected destination swapblk"));
- vm_object_pip_wakeup(dstobject);
VM_OBJECT_WLOCK(srcobject);
- vm_object_pip_wakeup(srcobject);
+
return (true);
}
@@ -995,8 +992,7 @@
* we keep the destination's.
*
* This routine is allowed to sleep. It may sleep allocating metadata
- * indirectly through swp_pager_meta_build() or if paging is still in
- * progress on the source.
+ * indirectly through swp_pager_meta_build().
*
* The source object contains no vm_page_t's (which is just as well)
*
@@ -1019,18 +1015,14 @@
*/
if (destroysource && (srcobject->flags & OBJ_ANON) == 0 &&
srcobject->handle != NULL) {
- vm_object_pip_add(srcobject, 1);
VM_OBJECT_WUNLOCK(srcobject);
- vm_object_pip_add(dstobject, 1);
VM_OBJECT_WUNLOCK(dstobject);
sx_xlock(&sw_alloc_sx);
TAILQ_REMOVE(NOBJLIST(srcobject->handle), srcobject,
pager_object_list);
sx_xunlock(&sw_alloc_sx);
VM_OBJECT_WLOCK(dstobject);
- vm_object_pip_wakeup(dstobject);
VM_OBJECT_WLOCK(srcobject);
- vm_object_pip_wakeup(srcobject);
}
/*
@@ -1207,26 +1199,29 @@
reqcount = count;
- /*
- * Determine the final number of read-behind pages and
- * allocate them BEFORE releasing the object lock. Otherwise,
- * there can be a problematic race with vm_object_split().
- * Specifically, vm_object_split() might first transfer pages
- * that precede ma[0] in the current object to a new object,
- * and then this function incorrectly recreates those pages as
- * read-behind pages in the current object.
- */
KASSERT(object->type == OBJT_SWAP,
("%s: object not swappable", __func__));
if (!swap_pager_haspage(object, ma[0]->pindex, &maxbehind, &maxahead))
return (VM_PAGER_FAIL);
+ KASSERT(reqcount - 1 <= maxahead,
+ ("page count %d extends beyond swap block", reqcount));
+
+ /*
+ * Do not transfer any pages other than those that are xbusied
+ * when running during a split or collapse operation. This
+ * prevents clustering from re-creating pages which are being
+ * moved into another object.
+ */
+ if ((object->flags & (OBJ_SPLIT | OBJ_DEAD)) != 0) {
+ maxahead = reqcount - 1;
+ maxbehind = 0;
+ }
+
/*
* Clip the readahead and readbehind ranges to exclude resident pages.
*/
if (rahead != NULL) {
- KASSERT(reqcount - 1 <= maxahead,
- ("page count %d extends beyond swap block", reqcount));
*rahead = imin(*rahead, maxahead - (reqcount - 1));
pindex = ma[reqcount - 1]->pindex;
msucc = TAILQ_NEXT(ma[reqcount - 1], listq);
Index: sys/vm/vm_object.h
===================================================================
--- sys/vm/vm_object.h
+++ sys/vm/vm_object.h
@@ -190,6 +190,8 @@
#define OBJ_SIZEVNLOCK 0x0040 /* lock vnode to check obj size */
#define OBJ_PG_DTOR 0x0080 /* dont reset object, leave that for dtor */
#define OBJ_TMPFS_NODE 0x0200 /* object belongs to tmpfs VREG node */
+#define OBJ_SPLIT 0x0400 /* object is being split */
+#define OBJ_COLLAPSING 0x0800 /* Parent of collapse. */
#define OBJ_COLORED 0x1000 /* pg_color is defined */
#define OBJ_ONEMAPPING 0x2000 /* One USE (a single, non-forked) mapping flag */
#define OBJ_SHADOWLIST 0x4000 /* Object is on the shadow list. */
Index: sys/vm/vm_object.c
===================================================================
--- sys/vm/vm_object.c
+++ sys/vm/vm_object.c
@@ -116,7 +116,6 @@
boolean_t *eio);
static boolean_t vm_object_page_remove_write(vm_page_t p, int flags,
boolean_t *allclean);
-static void vm_object_qcollapse(vm_object_t object);
static void vm_object_vndeallocate(vm_object_t object);
static void vm_object_backing_remove(vm_object_t object);
@@ -164,12 +163,18 @@
&object_bypasses,
"VM object bypasses");
+static counter_u64_t object_collapse_waits = EARLY_COUNTER;
+SYSCTL_COUNTER_U64(_vm_stats_object, OID_AUTO, collapse_waits, CTLFLAG_RD,
+ &object_collapse_waits,
+ "Number of sleeps for collapse");
+
static void
counter_startup(void)
{
object_collapses = counter_u64_alloc(M_WAITOK);
object_bypasses = counter_u64_alloc(M_WAITOK);
+ object_collapse_waits = counter_u64_alloc(M_WAITOK);
}
SYSINIT(object_counters, SI_SUB_CPU, SI_ORDER_ANY, counter_startup, NULL);
@@ -376,6 +381,19 @@
refcount_releasen(&object->paging_in_progress, i);
}
+/*
+ * Atomically drop the interlock and wait for pip to drain. This protects
+ * from sleep/wakeup races due to identity changes. The lock is not
+ * re-acquired on return.
+ */
+static void
+vm_object_pip_sleep(vm_object_t object, char *waitid)
+{
+
+ refcount_sleep_interlock(&object->paging_in_progress,
+ &object->lock, waitid, PVM);
+}
+
void
vm_object_pip_wait(vm_object_t object, char *waitid)
{
@@ -383,8 +401,7 @@
VM_OBJECT_ASSERT_WLOCKED(object);
while (REFCOUNT_COUNT(object->paging_in_progress) > 0) {
- VM_OBJECT_WUNLOCK(object);
- refcount_wait(&object->paging_in_progress, waitid, PVM);
+ vm_object_pip_sleep(object, waitid);
VM_OBJECT_WLOCK(object);
}
}
@@ -470,8 +487,7 @@
/*
* vm_object_reference:
*
- * Gets another reference to the given object. Note: OBJ_DEAD
- * objects can be referenced during final cleaning.
+ * Gets another reference to the given object.
*/
void
vm_object_reference(vm_object_t object)
@@ -483,21 +499,23 @@
return;
/*
- * Many places assume exclusive access to objects with a single
- * ref. vm_object_collapse() in particular will directly mainpulate
- * references for objects in this state. vnode objects only need
- * the lock for the first ref to reference the vnode.
+ * vnode objects need the lock for the first reference
+ * to serialize with vnode_object_deallocate().
*/
- if (!refcount_acquire_if_gt(&object->ref_count,
- object->type == OBJT_VNODE ? 0 : 1)) {
- VM_OBJECT_RLOCK(object);
- old = refcount_acquire(&object->ref_count);
- if (object->type == OBJT_VNODE && old == 0) {
- vp = object->handle;
- vref(vp);
+ if (object->type == OBJT_VNODE) {
+ if (!refcount_acquire_if_gt(&object->ref_count, 0)) {
+ VM_OBJECT_RLOCK(object);
+ old = refcount_acquire(&object->ref_count);
+ if (object->type == OBJT_VNODE && old == 0) {
+ vp = object->handle;
+ vref(vp);
+ }
+ VM_OBJECT_RUNLOCK(object);
}
- VM_OBJECT_RUNLOCK(object);
- }
+ } else
+ refcount_acquire(&object->ref_count);
+ KASSERT((object->flags & OBJ_DEAD) == 0,
+ ("vm_object_reference: Referenced dead object."));
}
/*
@@ -519,6 +537,8 @@
vp = object->handle;
vref(vp);
}
+ KASSERT((object->flags & OBJ_DEAD) == 0,
+ ("vm_object_reference: Referenced dead object."));
}
/*
@@ -548,6 +568,53 @@
vrele(vp);
}
+
+/*
+ * We dropped a reference on an object and discovered that it had a
+ * single remaining shadow. This is a sibling of the reference we
+ * dropped. Attempt to collapse the sibling and backing object.
+ */
+static vm_object_t
+vm_object_anon_deallocate(vm_object_t backing_object)
+{
+ vm_object_t object;
+
+ /* Fetch the final shadow. */
+ object = LIST_FIRST(&backing_object->shadow_head);
+ KASSERT(object != NULL && backing_object->shadow_count == 1,
+ ("vm_object_anon_deallocate: ref_count: %d, shadow_count: %d",
+ backing_object->ref_count, backing_object->shadow_count));
+ KASSERT((object->flags & (OBJ_TMPFS_NODE | OBJ_ANON)) == OBJ_ANON,
+ ("invalid shadow object %p", object));
+
+ if (!VM_OBJECT_TRYWLOCK(object)) {
+ /*
+ * Prevent object from disappearing since we do not have a
+ * reference.
+ */
+ vm_object_pip_add(object, 1);
+ VM_OBJECT_WUNLOCK(backing_object);
+ VM_OBJECT_WLOCK(object);
+ vm_object_pip_wakeup(object);
+ } else
+ VM_OBJECT_WUNLOCK(backing_object);
+
+ /*
+ * Check for a collapse/terminate race with the last reference holder.
+ */
+ if ((object->flags & (OBJ_DEAD | OBJ_COLLAPSING)) != 0 ||
+ !refcount_acquire_if_not_zero(&object->ref_count)) {
+ VM_OBJECT_WUNLOCK(object);
+ return (NULL);
+ }
+ backing_object = object->backing_object;
+ if (backing_object != NULL && (backing_object->flags & OBJ_ANON) != 0)
+ vm_object_collapse(object);
+ VM_OBJECT_WUNLOCK(object);
+
+ return (object);
+}
+
/*
* vm_object_deallocate:
*
@@ -562,7 +629,7 @@
void
vm_object_deallocate(vm_object_t object)
{
- vm_object_t robject, temp;
+ vm_object_t temp;
bool released;
while (object != NULL) {
@@ -594,92 +661,30 @@
("vm_object_deallocate: object deallocated too many times: %d",
object->type));
- if (refcount_release(&object->ref_count))
- goto doterm;
- if (object->ref_count > 1) {
- VM_OBJECT_WUNLOCK(object);
- return;
- } else if (object->ref_count == 1) {
- if (object->shadow_count == 0 &&
- (object->flags & OBJ_ANON) != 0) {
- vm_object_set_flag(object, OBJ_ONEMAPPING);
- } else if (object->shadow_count == 1) {
- KASSERT((object->flags & OBJ_ANON) != 0,
- ("obj %p with shadow_count > 0 is not anon",
- object));
- robject = LIST_FIRST(&object->shadow_head);
- KASSERT(robject != NULL,
- ("vm_object_deallocate: ref_count: %d, "
- "shadow_count: %d", object->ref_count,
- object->shadow_count));
- KASSERT((robject->flags & OBJ_TMPFS_NODE) == 0,
- ("shadowed tmpfs v_object %p", object));
- if (!VM_OBJECT_TRYWLOCK(robject)) {
- /*
- * Avoid a potential deadlock.
- */
- refcount_acquire(&object->ref_count);
- VM_OBJECT_WUNLOCK(object);
- /*
- * More likely than not the thread
- * holding robject's lock has lower
- * priority than the current thread.
- * Let the lower priority thread run.
- */
- pause("vmo_de", 1);
- continue;
- }
- /*
- * Collapse object into its shadow unless its
- * shadow is dead. In that case, object will
- * be deallocated by the thread that is
- * deallocating its shadow.
- */
- if ((robject->flags &
- (OBJ_DEAD | OBJ_ANON)) == OBJ_ANON) {
-
- refcount_acquire(&robject->ref_count);
-retry:
- if (REFCOUNT_COUNT(robject->paging_in_progress) > 0) {
- VM_OBJECT_WUNLOCK(object);
- vm_object_pip_wait(robject,
- "objde1");
- temp = robject->backing_object;
- if (object == temp) {
- VM_OBJECT_WLOCK(object);
- goto retry;
- }
- } else if (REFCOUNT_COUNT(object->paging_in_progress) > 0) {
- VM_OBJECT_WUNLOCK(robject);
- VM_OBJECT_WUNLOCK(object);
- refcount_wait(
- &object->paging_in_progress,
- "objde2", PVM);
- VM_OBJECT_WLOCK(robject);
- temp = robject->backing_object;
- if (object == temp) {
- VM_OBJECT_WLOCK(object);
- goto retry;
- }
- } else
- VM_OBJECT_WUNLOCK(object);
-
- if (robject->ref_count == 1) {
- refcount_release(&robject->ref_count);
- object = robject;
- goto doterm;
- }
- object = robject;
- vm_object_collapse(object);
- VM_OBJECT_WUNLOCK(object);
- continue;
- }
- VM_OBJECT_WUNLOCK(robject);
+ /*
+ * If this is not the final reference to an anonymous
+ * object we may need to collapse the shadow chain.
+ */
+ if (!refcount_release(&object->ref_count)) {
+ if (object->ref_count > 1 ||
+ object->shadow_count == 0) {
+ if ((object->flags & OBJ_ANON) != 0 &&
+ object->ref_count == 1)
+ vm_object_set_flag(object,
+ OBJ_ONEMAPPING);
+ VM_OBJECT_WUNLOCK(object);
+ return;
}
- VM_OBJECT_WUNLOCK(object);
- return;
+
+ /* Handle collapsing last ref on anonymous objects. */
+ object = vm_object_anon_deallocate(object);
+ continue;
}
-doterm:
+
+ /*
+ * Handle the final reference to an object. We restart
+ * the loop with the backing object to avoid recursion.
+ */
umtx_shm_object_terminated(object);
temp = object->backing_object;
if (temp != NULL) {
@@ -687,16 +692,11 @@
("shadowed tmpfs v_object 2 %p", object));
vm_object_backing_remove(object);
}
- /*
- * Don't double-terminate, we could be in a termination
- * recursion due to the terminate having to sync data
- * to disk.
- */
- if ((object->flags & OBJ_DEAD) == 0) {
- vm_object_set_flag(object, OBJ_DEAD);
- vm_object_terminate(object);
- } else
- VM_OBJECT_WUNLOCK(object);
+
+ KASSERT((object->flags & OBJ_DEAD) == 0,
+ ("vm_object_deallocate: Terminating dead object."));
+ vm_object_set_flag(object, OBJ_DEAD);
+ vm_object_terminate(object);
object = temp;
}
}
@@ -788,6 +788,98 @@
object->backing_object = backing_object;
}
+/*
+ * Insert an object into a backing_object's shadow list with an additional
+ * reference to the backing_object added.
+ */
+static void
+vm_object_backing_insert_ref(vm_object_t object, vm_object_t backing_object)
+{
+
+ VM_OBJECT_ASSERT_WLOCKED(object);
+
+ if ((backing_object->flags & OBJ_ANON) != 0) {
+ VM_OBJECT_WLOCK(backing_object);
+ KASSERT((backing_object->flags & OBJ_DEAD) == 0,
+ ("shadowing dead anonymous object"));
+ vm_object_reference_locked(backing_object);
+ vm_object_backing_insert_locked(object, backing_object);
+ vm_object_clear_flag(backing_object, OBJ_ONEMAPPING);
+ VM_OBJECT_WUNLOCK(backing_object);
+ } else {
+ vm_object_reference(backing_object);
+ object->backing_object = backing_object;
+ }
+}
+
+/*
+ * Transfer a backing reference from backing_object to object.
+ */
+static void
+vm_object_backing_transfer(vm_object_t object, vm_object_t backing_object)
+{
+ vm_object_t new_backing_object;
+
+ /*
+ * Note that the reference to backing_object->backing_object
+ * moves from within backing_object to within object.
+ */
+ vm_object_backing_remove_locked(object);
+ new_backing_object = backing_object->backing_object;
+ if (new_backing_object == NULL)
+ return;
+ if ((new_backing_object->flags & OBJ_ANON) != 0) {
+ VM_OBJECT_WLOCK(new_backing_object);
+ vm_object_backing_remove_locked(backing_object);
+ vm_object_backing_insert_locked(object, new_backing_object);
+ VM_OBJECT_WUNLOCK(new_backing_object);
+ } else {
+ object->backing_object = new_backing_object;
+ backing_object->backing_object = NULL;
+ }
+}
+
+/*
+ * Wait for a concurrent collapse to settle.
+ */
+static void
+vm_object_collapse_wait(vm_object_t object)
+{
+
+ VM_OBJECT_ASSERT_WLOCKED(object);
+
+ while ((object->flags & OBJ_COLLAPSING) != 0) {
+ vm_object_pip_wait(object, "vmcolwait");
+ counter_u64_add(object_collapse_waits, 1);
+ }
+}
+
+/*
+ * Waits for a backing object to clear a pending collapse and returns
+ * it locked if it is an ANON object.
+ */
+static vm_object_t
+vm_object_backing_collapse_wait(vm_object_t object)
+{
+ vm_object_t backing_object;
+
+ VM_OBJECT_ASSERT_WLOCKED(object);
+
+ for (;;) {
+ backing_object = object->backing_object;
+ if (backing_object == NULL ||
+ (backing_object->flags & OBJ_ANON) == 0)
+ return (NULL);
+ VM_OBJECT_WLOCK(backing_object);
+ if ((backing_object->flags & (OBJ_DEAD | OBJ_COLLAPSING)) == 0)
+ break;
+ VM_OBJECT_WUNLOCK(object);
+ vm_object_pip_sleep(backing_object, "vmbckwait");
+ counter_u64_add(object_collapse_waits, 1);
+ VM_OBJECT_WLOCK(object);
+ }
+ return (backing_object);
+}
/*
* vm_object_terminate_pages removes any remaining pageable pages
@@ -843,9 +935,12 @@
void
vm_object_terminate(vm_object_t object)
{
+
VM_OBJECT_ASSERT_WLOCKED(object);
KASSERT((object->flags & OBJ_DEAD) != 0,
("terminating non-dead obj %p", object));
+ KASSERT(object->backing_object == NULL,
+ ("terminating shadow obj %p", object));
/*
* wait for the pageout daemon to be done with the object
@@ -855,7 +950,7 @@
KASSERT(!REFCOUNT_COUNT(object->paging_in_progress),
("vm_object_terminate: pageout in progress"));
- KASSERT(object->ref_count == 0,
+ KASSERT(object->ref_count == 0,
("vm_object_terminate: object with references, ref_count=%d",
object->ref_count));
@@ -868,7 +963,7 @@
#endif
KASSERT(object->cred == NULL || object->type == OBJT_DEFAULT ||
- object->type == OBJT_SWAP,
+ object->type == OBJT_SWAP || object->type == OBJT_DEAD,
("%s: non-swap obj %p has cred", __func__, object));
/*
@@ -1402,6 +1497,8 @@
vm_size_t size;
orig_object = entry->object.vm_object;
+ KASSERT((orig_object->flags & OBJ_ONEMAPPING) != 0,
+ ("vm_object_split: Splitting object with multiple mappings."));
if ((orig_object->flags & OBJ_ANON) == 0)
return;
if (orig_object->ref_count <= 1)
@@ -1418,36 +1515,26 @@
new_object = vm_object_allocate_anon(size, orig_object,
orig_object->cred, ptoa(size));
+ /*
+ * We must wait for the orig_object to complete any in-progress
+ * collapse so that the swap blocks are stable below. The
+ * additional reference on backing_object by new object will
+ * prevent further collapse operations until split completes.
+ */
+ VM_OBJECT_WLOCK(orig_object);
+ vm_object_collapse_wait(orig_object);
+
/*
* At this point, the new object is still private, so the order in
* which the original and new objects are locked does not matter.
*/
VM_OBJECT_WLOCK(new_object);
- VM_OBJECT_WLOCK(orig_object);
new_object->domain = orig_object->domain;
source = orig_object->backing_object;
if (source != NULL) {
- if ((source->flags & (OBJ_ANON | OBJ_DEAD)) != 0) {
- VM_OBJECT_WLOCK(source);
- if ((source->flags & OBJ_DEAD) != 0) {
- VM_OBJECT_WUNLOCK(source);
- VM_OBJECT_WUNLOCK(orig_object);
- VM_OBJECT_WUNLOCK(new_object);
- new_object->cred = NULL;
- vm_object_deallocate(new_object);
- VM_OBJECT_WLOCK(orig_object);
- return;
- }
- vm_object_backing_insert_locked(new_object, source);
- vm_object_reference_locked(source); /* for new_object */
- vm_object_clear_flag(source, OBJ_ONEMAPPING);
- VM_OBJECT_WUNLOCK(source);
- } else {
- vm_object_backing_insert(new_object, source);
- vm_object_reference(source);
- }
+ vm_object_backing_insert_ref(new_object, source);
new_object->backing_object_offset =
- orig_object->backing_object_offset + entry->offset;
+ orig_object->backing_object_offset + entry->offset;
}
if (orig_object->cred != NULL) {
crhold(orig_object->cred);
@@ -1455,6 +1542,12 @@
("orig_object->charge < 0"));
orig_object->charge -= ptoa(size);
}
+
+ /*
+ * Mark the split operation so that swap_pager_getpages() knows
+ * that the object is in transition.
+ */
+ vm_object_set_flag(orig_object, OBJ_SPLIT);
retry:
m = vm_page_find_least(orig_object, offidxstart);
for (; m != NULL && (idx = m->pindex - offidxstart) < size;
@@ -1523,6 +1616,7 @@
TAILQ_FOREACH(m, &new_object->memq, listq)
vm_page_xunbusy(m);
}
+ vm_object_clear_flag(orig_object, OBJ_SPLIT);
VM_OBJECT_WUNLOCK(orig_object);
VM_OBJECT_WUNLOCK(new_object);
entry->object.vm_object = new_object;
@@ -1531,12 +1625,8 @@
VM_OBJECT_WLOCK(new_object);
}
-#define OBSC_COLLAPSE_NOWAIT 0x0002
-#define OBSC_COLLAPSE_WAIT 0x0004
-
static vm_page_t
-vm_object_collapse_scan_wait(vm_object_t object, vm_page_t p, vm_page_t next,
- int op)
+vm_object_collapse_scan_wait(vm_object_t object, vm_page_t p)
{
vm_object_t backing_object;
@@ -1546,8 +1636,6 @@
KASSERT(p == NULL || p->object == object || p->object == backing_object,
("invalid ownership %p %p %p", p, object, backing_object));
- if ((op & OBSC_COLLAPSE_NOWAIT) != 0)
- return (next);
/* The page is only NULL when rename fails. */
if (p == NULL) {
VM_OBJECT_WUNLOCK(object);
@@ -1624,8 +1712,8 @@
return (true);
}
-static bool
-vm_object_collapse_scan(vm_object_t object, int op)
+static void
+vm_object_collapse_scan(vm_object_t object)
{
vm_object_t backing_object;
vm_page_t next, p, pp;
@@ -1637,12 +1725,6 @@
backing_object = object->backing_object;
backing_offset_index = OFF_TO_IDX(object->backing_object_offset);
- /*
- * Initial conditions
- */
- if ((op & OBSC_COLLAPSE_WAIT) != 0)
- vm_object_set_flag(backing_object, OBJ_DEAD);
-
/*
* Our scan
*/
@@ -1654,7 +1736,7 @@
* Check for busy page
*/
if (vm_page_tryxbusy(p) == 0) {
- next = vm_object_collapse_scan_wait(object, p, next, op);
+ next = vm_object_collapse_scan_wait(object, p);
continue;
}
@@ -1689,8 +1771,7 @@
* unbusy the original (backing_obj) page before we can
* (re)lock the parent. Hence we can get here.
*/
- next = vm_object_collapse_scan_wait(object, pp, next,
- op);
+ next = vm_object_collapse_scan_wait(object, pp);
continue;
}
@@ -1734,10 +1815,7 @@
*/
if (vm_page_rename(p, object, new_pindex)) {
vm_page_xunbusy(p);
- if (pp != NULL)
- vm_page_xunbusy(pp);
- next = vm_object_collapse_scan_wait(object, NULL, next,
- op);
+ next = vm_object_collapse_scan_wait(object, NULL);
continue;
}
@@ -1755,27 +1833,7 @@
#endif
vm_page_xunbusy(p);
}
- return (true);
-}
-
-
-/*
- * this version of collapse allows the operation to occur earlier and
- * when paging_in_progress is true for an object... This is not a complete
- * operation, but should plug 99.9% of the rest of the leaks.
- */
-static void
-vm_object_qcollapse(vm_object_t object)
-{
- vm_object_t backing_object = object->backing_object;
-
- VM_OBJECT_ASSERT_WLOCKED(object);
- VM_OBJECT_ASSERT_WLOCKED(backing_object);
-
- if (backing_object->ref_count != 1)
- return;
-
- vm_object_collapse_scan(object, OBSC_COLLAPSE_NOWAIT);
+ return;
}
/*
@@ -1793,53 +1851,48 @@
VM_OBJECT_ASSERT_WLOCKED(object);
while (TRUE) {
- /*
- * Verify that the conditions are right for collapse:
- *
- * The object exists and the backing object exists.
- */
- if ((backing_object = object->backing_object) == NULL)
- break;
+ KASSERT((object->flags & (OBJ_DEAD | OBJ_ANON)) == OBJ_ANON,
+ ("collapsing invalid object"));
/*
- * we check the backing object first, because it is most likely
- * not collapsable.
+ * Wait for the backing_object to finish any pending
+ * collapse so that the caller sees the shortest possible
+ * shadow chain.
*/
- if ((backing_object->flags & OBJ_ANON) == 0)
- break;
- VM_OBJECT_WLOCK(backing_object);
- if ((backing_object->flags & OBJ_DEAD) != 0 ||
- (object->flags & (OBJ_DEAD | OBJ_ANON)) != OBJ_ANON) {
- VM_OBJECT_WUNLOCK(backing_object);
- break;
- }
+ backing_object = vm_object_backing_collapse_wait(object);
+ if (backing_object == NULL)
+ return;
- if (REFCOUNT_COUNT(object->paging_in_progress) > 0 ||
- REFCOUNT_COUNT(backing_object->paging_in_progress) > 0) {
- vm_object_qcollapse(object);
- VM_OBJECT_WUNLOCK(backing_object);
- break;
- }
+ KASSERT(object->ref_count > 0 &&
+ object->ref_count > object->shadow_count,
+ ("collapse with invalid ref %d or shadow %d count.",
+ object->ref_count, object->shadow_count));
+ KASSERT((backing_object->flags &
+ (OBJ_COLLAPSING | OBJ_DEAD)) == 0,
+ ("vm_object_collapse: Backing object already collapsing."));
+ KASSERT((object->flags & (OBJ_COLLAPSING | OBJ_DEAD)) == 0,
+ ("vm_object_collapse: object is already collapsing."));
/*
- * We know that we can either collapse the backing object (if
- * the parent is the only reference to it) or (perhaps) have
+ * We know that we can either collapse the backing object if
+ * the parent is the only reference to it, or (perhaps) have
* the parent bypass the object if the parent happens to shadow
* all the resident pages in the entire backing object.
- *
- * This is ignoring pager-backed pages such as swap pages.
- * vm_object_collapse_scan fails the shadowing test in this
- * case.
*/
if (backing_object->ref_count == 1) {
+ KASSERT(backing_object->shadow_count == 1,
+ ("vm_object_collapse: shadow_count: %d",
+ backing_object->shadow_count));
vm_object_pip_add(object, 1);
+ vm_object_set_flag(object, OBJ_COLLAPSING);
vm_object_pip_add(backing_object, 1);
+ vm_object_set_flag(backing_object, OBJ_DEAD);
/*
* If there is exactly one reference to the backing
* object, we can collapse it into the parent.
*/
- vm_object_collapse_scan(object, OBSC_COLLAPSE_WAIT);
+ vm_object_collapse_scan(object);
#if VM_NRESERVLEVEL > 0
/*
@@ -1866,23 +1919,16 @@
object,
OFF_TO_IDX(object->backing_object_offset), TRUE);
}
+
/*
* Object now shadows whatever backing_object did.
- * Note that the reference to
- * backing_object->backing_object moves from within
- * backing_object to within object.
*/
- vm_object_backing_remove_locked(object);
- new_backing_object = backing_object->backing_object;
- if (new_backing_object != NULL) {
- VM_OBJECT_WLOCK(new_backing_object);
- vm_object_backing_remove_locked(backing_object);
- vm_object_backing_insert_locked(object,
- new_backing_object);
- VM_OBJECT_WUNLOCK(new_backing_object);
- }
+ vm_object_backing_transfer(object, backing_object);
object->backing_object_offset +=
backing_object->backing_object_offset;
+ vm_object_clear_flag(object, OBJ_COLLAPSING);
+ VM_OBJECT_WUNLOCK(object);
+ vm_object_pip_wakeup(object);
/*
* Discard backing_object.
@@ -1894,18 +1940,19 @@
KASSERT(backing_object->ref_count == 1, (
"backing_object %p was somehow re-referenced during collapse!",
backing_object));
- vm_object_pip_wakeup(backing_object);
backing_object->type = OBJT_DEAD;
+ vm_object_pip_wakeup(backing_object);
refcount_release(&backing_object->ref_count);
- VM_OBJECT_WUNLOCK(backing_object);
- vm_object_destroy(backing_object);
-
- vm_object_pip_wakeup(object);
+ vm_object_terminate(backing_object);
counter_u64_add(object_collapses, 1);
+ VM_OBJECT_WLOCK(object);
} else {
/*
* If we do not entirely shadow the backing object,
* there is nothing we can do so we give up.
+ *
+ * The object lock and backing_object lock must not
+ * be dropped during this sequence.
*/
if (object->resident_page_count != object->size &&
!vm_object_scan_all_shadowed(object)) {
@@ -1919,14 +1966,12 @@
* it, since its reference count is at least 2.
*/
vm_object_backing_remove_locked(object);
-
new_backing_object = backing_object->backing_object;
if (new_backing_object != NULL) {
- vm_object_backing_insert(object,
+ vm_object_backing_insert_ref(object,
new_backing_object);
- vm_object_reference(new_backing_object);
object->backing_object_offset +=
- backing_object->backing_object_offset;
+ backing_object->backing_object_offset;
}
/*
@@ -1934,6 +1979,9 @@
* its ref_count was at least 2, it will not vanish.
*/
refcount_release(&backing_object->ref_count);
+ KASSERT(backing_object->ref_count >= 1, (
+"backing_object %p was somehow dereferenced during collapse!",
+ backing_object));
VM_OBJECT_WUNLOCK(backing_object);
counter_u64_add(object_bypasses, 1);
}
@@ -2148,7 +2196,7 @@
VM_OBJECT_WLOCK(prev_object);
/*
- * Try to collapse the object first
+ * Try to collapse the object first.
*/
vm_object_collapse(prev_object);
Index: sys/vm/vnode_pager.c
===================================================================
--- sys/vm/vnode_pager.c
+++ sys/vm/vnode_pager.c
@@ -200,36 +200,24 @@
MPASS(obj->type == OBJT_VNODE);
umtx_shm_object_terminated(obj);
if (obj->ref_count == 0) {
+ KASSERT((obj->flags & OBJ_DEAD) == 0,
+ ("vnode_destroy_vobject: Terminating dead object"));
+ vm_object_set_flag(obj, OBJ_DEAD);
+
/*
- * don't double-terminate the object
+ * Clean pages and flush buffers.
*/
- if ((obj->flags & OBJ_DEAD) == 0) {
- vm_object_set_flag(obj, OBJ_DEAD);
-
- /*
- * Clean pages and flush buffers.
- */
- vm_object_page_clean(obj, 0, 0, OBJPC_SYNC);
- VM_OBJECT_WUNLOCK(obj);
+ vm_object_page_clean(obj, 0, 0, OBJPC_SYNC);
+ VM_OBJECT_WUNLOCK(obj);
- vinvalbuf(vp, V_SAVE, 0, 0);
+ vinvalbuf(vp, V_SAVE, 0, 0);
- BO_LOCK(&vp->v_bufobj);
- vp->v_bufobj.bo_flag |= BO_DEAD;
- BO_UNLOCK(&vp->v_bufobj);
+ BO_LOCK(&vp->v_bufobj);
+ vp->v_bufobj.bo_flag |= BO_DEAD;
+ BO_UNLOCK(&vp->v_bufobj);
- VM_OBJECT_WLOCK(obj);
- vm_object_terminate(obj);
- } else {
- /*
- * Waiters were already handled during object
- * termination. The exclusive vnode lock hopefully
- * prevented new waiters from referencing the dying
- * object.
- */
- vp->v_object = NULL;
- VM_OBJECT_WUNLOCK(obj);
- }
+ VM_OBJECT_WLOCK(obj);
+ vm_object_terminate(obj);
} else {
/*
* Woe to the process that tries to page now :-).
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Apr 29, 1:58 PM (13 h, 41 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
32306900
Default Alt Text
D22908.id66119.diff (30 KB)
Attached To
Mode
D22908: Allow collapse to operate to completion during paging.
Attached
Detach File
Event Timeline
Log In to Comment