Page MenuHomeFreeBSD

D22908.id66119.diff
No OneTemporary

D22908.id66119.diff

Index: sys/kern/kern_synch.c
===================================================================
--- sys/kern/kern_synch.c
+++ sys/kern/kern_synch.c
@@ -381,15 +381,21 @@
* a precise answer should use refcount_wait().
*/
void
-refcount_sleep(volatile u_int *count, const char *wmesg, int pri)
+_refcount_sleep(volatile u_int *count, struct lock_object *lock,
+ const char *wmesg, int pri)
{
void *wchan;
u_int old;
- if (REFCOUNT_COUNT(*count) == 0)
+ if (REFCOUNT_COUNT(*count) == 0) {
+ if (lock != NULL)
+ LOCK_CLASS(lock)->lc_unlock(lock);
return;
+ }
wchan = __DEVOLATILE(void *, count);
sleepq_lock(wchan);
+ if (lock != NULL)
+ LOCK_CLASS(lock)->lc_unlock(lock);
old = *count;
for (;;) {
if (REFCOUNT_COUNT(old) == 0) {
Index: sys/sys/refcount.h
===================================================================
--- sys/sys/refcount.h
+++ sys/sys/refcount.h
@@ -46,7 +46,10 @@
#define REFCOUNT_COUNT(x) ((x) & ~REFCOUNT_WAITER)
bool refcount_release_last(volatile u_int *count, u_int n, u_int old);
-void refcount_sleep(volatile u_int *count, const char *wmesg, int prio);
+
+struct lock_object;
+void _refcount_sleep(volatile u_int *count, struct lock_object *,
+ const char *wmesg, int prio);
/*
* Attempt to handle reference count overflow and underflow. Force the counter
@@ -135,6 +138,16 @@
return (refcount_releasen(count, 1));
}
+static __inline void
+refcount_sleep(volatile u_int *count, const char *wmesg, int prio)
+{
+
+ _refcount_sleep(count, NULL, wmesg, prio);
+}
+
+#define refcount_sleep_interlock(count, lock, wmesg, prio) \
+ _refcount_sleep((count), (struct lock_object *)(lock), (wmesg), (prio))
+
static __inline void
refcount_wait(volatile u_int *count, const char *wmesg, int prio)
{
Index: sys/vm/swap_pager.c
===================================================================
--- sys/vm/swap_pager.c
+++ sys/vm/swap_pager.c
@@ -974,15 +974,12 @@
* Destination has no swapblk and is not resident, transfer source.
* swp_pager_meta_build() can sleep.
*/
- vm_object_pip_add(srcobject, 1);
VM_OBJECT_WUNLOCK(srcobject);
- vm_object_pip_add(dstobject, 1);
dstaddr = swp_pager_meta_build(dstobject, pindex, addr);
KASSERT(dstaddr == SWAPBLK_NONE,
("Unexpected destination swapblk"));
- vm_object_pip_wakeup(dstobject);
VM_OBJECT_WLOCK(srcobject);
- vm_object_pip_wakeup(srcobject);
+
return (true);
}
@@ -995,8 +992,7 @@
* we keep the destination's.
*
* This routine is allowed to sleep. It may sleep allocating metadata
- * indirectly through swp_pager_meta_build() or if paging is still in
- * progress on the source.
+ * indirectly through swp_pager_meta_build().
*
* The source object contains no vm_page_t's (which is just as well)
*
@@ -1019,18 +1015,14 @@
*/
if (destroysource && (srcobject->flags & OBJ_ANON) == 0 &&
srcobject->handle != NULL) {
- vm_object_pip_add(srcobject, 1);
VM_OBJECT_WUNLOCK(srcobject);
- vm_object_pip_add(dstobject, 1);
VM_OBJECT_WUNLOCK(dstobject);
sx_xlock(&sw_alloc_sx);
TAILQ_REMOVE(NOBJLIST(srcobject->handle), srcobject,
pager_object_list);
sx_xunlock(&sw_alloc_sx);
VM_OBJECT_WLOCK(dstobject);
- vm_object_pip_wakeup(dstobject);
VM_OBJECT_WLOCK(srcobject);
- vm_object_pip_wakeup(srcobject);
}
/*
@@ -1207,26 +1199,29 @@
reqcount = count;
- /*
- * Determine the final number of read-behind pages and
- * allocate them BEFORE releasing the object lock. Otherwise,
- * there can be a problematic race with vm_object_split().
- * Specifically, vm_object_split() might first transfer pages
- * that precede ma[0] in the current object to a new object,
- * and then this function incorrectly recreates those pages as
- * read-behind pages in the current object.
- */
KASSERT(object->type == OBJT_SWAP,
("%s: object not swappable", __func__));
if (!swap_pager_haspage(object, ma[0]->pindex, &maxbehind, &maxahead))
return (VM_PAGER_FAIL);
+ KASSERT(reqcount - 1 <= maxahead,
+ ("page count %d extends beyond swap block", reqcount));
+
+ /*
+ * Do not transfer any pages other than those that are xbusied
+ * when running during a split or collapse operation. This
+ * prevents clustering from re-creating pages which are being
+ * moved into another object.
+ */
+ if ((object->flags & (OBJ_SPLIT | OBJ_DEAD)) != 0) {
+ maxahead = reqcount - 1;
+ maxbehind = 0;
+ }
+
/*
* Clip the readahead and readbehind ranges to exclude resident pages.
*/
if (rahead != NULL) {
- KASSERT(reqcount - 1 <= maxahead,
- ("page count %d extends beyond swap block", reqcount));
*rahead = imin(*rahead, maxahead - (reqcount - 1));
pindex = ma[reqcount - 1]->pindex;
msucc = TAILQ_NEXT(ma[reqcount - 1], listq);
Index: sys/vm/vm_object.h
===================================================================
--- sys/vm/vm_object.h
+++ sys/vm/vm_object.h
@@ -190,6 +190,8 @@
#define OBJ_SIZEVNLOCK 0x0040 /* lock vnode to check obj size */
#define OBJ_PG_DTOR 0x0080 /* dont reset object, leave that for dtor */
#define OBJ_TMPFS_NODE 0x0200 /* object belongs to tmpfs VREG node */
+#define OBJ_SPLIT 0x0400 /* object is being split */
+#define OBJ_COLLAPSING 0x0800 /* Parent of collapse. */
#define OBJ_COLORED 0x1000 /* pg_color is defined */
#define OBJ_ONEMAPPING 0x2000 /* One USE (a single, non-forked) mapping flag */
#define OBJ_SHADOWLIST 0x4000 /* Object is on the shadow list. */
Index: sys/vm/vm_object.c
===================================================================
--- sys/vm/vm_object.c
+++ sys/vm/vm_object.c
@@ -116,7 +116,6 @@
boolean_t *eio);
static boolean_t vm_object_page_remove_write(vm_page_t p, int flags,
boolean_t *allclean);
-static void vm_object_qcollapse(vm_object_t object);
static void vm_object_vndeallocate(vm_object_t object);
static void vm_object_backing_remove(vm_object_t object);
@@ -164,12 +163,18 @@
&object_bypasses,
"VM object bypasses");
+static counter_u64_t object_collapse_waits = EARLY_COUNTER;
+SYSCTL_COUNTER_U64(_vm_stats_object, OID_AUTO, collapse_waits, CTLFLAG_RD,
+ &object_collapse_waits,
+ "Number of sleeps for collapse");
+
static void
counter_startup(void)
{
object_collapses = counter_u64_alloc(M_WAITOK);
object_bypasses = counter_u64_alloc(M_WAITOK);
+ object_collapse_waits = counter_u64_alloc(M_WAITOK);
}
SYSINIT(object_counters, SI_SUB_CPU, SI_ORDER_ANY, counter_startup, NULL);
@@ -376,6 +381,19 @@
refcount_releasen(&object->paging_in_progress, i);
}
+/*
+ * Atomically drop the interlock and wait for pip to drain. This protects
+ * from sleep/wakeup races due to identity changes. The lock is not
+ * re-acquired on return.
+ */
+static void
+vm_object_pip_sleep(vm_object_t object, char *waitid)
+{
+
+ refcount_sleep_interlock(&object->paging_in_progress,
+ &object->lock, waitid, PVM);
+}
+
void
vm_object_pip_wait(vm_object_t object, char *waitid)
{
@@ -383,8 +401,7 @@
VM_OBJECT_ASSERT_WLOCKED(object);
while (REFCOUNT_COUNT(object->paging_in_progress) > 0) {
- VM_OBJECT_WUNLOCK(object);
- refcount_wait(&object->paging_in_progress, waitid, PVM);
+ vm_object_pip_sleep(object, waitid);
VM_OBJECT_WLOCK(object);
}
}
@@ -470,8 +487,7 @@
/*
* vm_object_reference:
*
- * Gets another reference to the given object. Note: OBJ_DEAD
- * objects can be referenced during final cleaning.
+ * Gets another reference to the given object.
*/
void
vm_object_reference(vm_object_t object)
@@ -483,21 +499,23 @@
return;
/*
- * Many places assume exclusive access to objects with a single
- * ref. vm_object_collapse() in particular will directly mainpulate
- * references for objects in this state. vnode objects only need
- * the lock for the first ref to reference the vnode.
+ * vnode objects need the lock for the first reference
+ * to serialize with vnode_object_deallocate().
*/
- if (!refcount_acquire_if_gt(&object->ref_count,
- object->type == OBJT_VNODE ? 0 : 1)) {
- VM_OBJECT_RLOCK(object);
- old = refcount_acquire(&object->ref_count);
- if (object->type == OBJT_VNODE && old == 0) {
- vp = object->handle;
- vref(vp);
+ if (object->type == OBJT_VNODE) {
+ if (!refcount_acquire_if_gt(&object->ref_count, 0)) {
+ VM_OBJECT_RLOCK(object);
+ old = refcount_acquire(&object->ref_count);
+ if (object->type == OBJT_VNODE && old == 0) {
+ vp = object->handle;
+ vref(vp);
+ }
+ VM_OBJECT_RUNLOCK(object);
}
- VM_OBJECT_RUNLOCK(object);
- }
+ } else
+ refcount_acquire(&object->ref_count);
+ KASSERT((object->flags & OBJ_DEAD) == 0,
+ ("vm_object_reference: Referenced dead object."));
}
/*
@@ -519,6 +537,8 @@
vp = object->handle;
vref(vp);
}
+ KASSERT((object->flags & OBJ_DEAD) == 0,
+ ("vm_object_reference: Referenced dead object."));
}
/*
@@ -548,6 +568,53 @@
vrele(vp);
}
+
+/*
+ * We dropped a reference on an object and discovered that it had a
+ * single remaining shadow. This is a sibling of the reference we
+ * dropped. Attempt to collapse the sibling and backing object.
+ */
+static vm_object_t
+vm_object_anon_deallocate(vm_object_t backing_object)
+{
+ vm_object_t object;
+
+ /* Fetch the final shadow. */
+ object = LIST_FIRST(&backing_object->shadow_head);
+ KASSERT(object != NULL && backing_object->shadow_count == 1,
+ ("vm_object_anon_deallocate: ref_count: %d, shadow_count: %d",
+ backing_object->ref_count, backing_object->shadow_count));
+ KASSERT((object->flags & (OBJ_TMPFS_NODE | OBJ_ANON)) == OBJ_ANON,
+ ("invalid shadow object %p", object));
+
+ if (!VM_OBJECT_TRYWLOCK(object)) {
+ /*
+ * Prevent object from disappearing since we do not have a
+ * reference.
+ */
+ vm_object_pip_add(object, 1);
+ VM_OBJECT_WUNLOCK(backing_object);
+ VM_OBJECT_WLOCK(object);
+ vm_object_pip_wakeup(object);
+ } else
+ VM_OBJECT_WUNLOCK(backing_object);
+
+ /*
+ * Check for a collapse/terminate race with the last reference holder.
+ */
+ if ((object->flags & (OBJ_DEAD | OBJ_COLLAPSING)) != 0 ||
+ !refcount_acquire_if_not_zero(&object->ref_count)) {
+ VM_OBJECT_WUNLOCK(object);
+ return (NULL);
+ }
+ backing_object = object->backing_object;
+ if (backing_object != NULL && (backing_object->flags & OBJ_ANON) != 0)
+ vm_object_collapse(object);
+ VM_OBJECT_WUNLOCK(object);
+
+ return (object);
+}
+
/*
* vm_object_deallocate:
*
@@ -562,7 +629,7 @@
void
vm_object_deallocate(vm_object_t object)
{
- vm_object_t robject, temp;
+ vm_object_t temp;
bool released;
while (object != NULL) {
@@ -594,92 +661,30 @@
("vm_object_deallocate: object deallocated too many times: %d",
object->type));
- if (refcount_release(&object->ref_count))
- goto doterm;
- if (object->ref_count > 1) {
- VM_OBJECT_WUNLOCK(object);
- return;
- } else if (object->ref_count == 1) {
- if (object->shadow_count == 0 &&
- (object->flags & OBJ_ANON) != 0) {
- vm_object_set_flag(object, OBJ_ONEMAPPING);
- } else if (object->shadow_count == 1) {
- KASSERT((object->flags & OBJ_ANON) != 0,
- ("obj %p with shadow_count > 0 is not anon",
- object));
- robject = LIST_FIRST(&object->shadow_head);
- KASSERT(robject != NULL,
- ("vm_object_deallocate: ref_count: %d, "
- "shadow_count: %d", object->ref_count,
- object->shadow_count));
- KASSERT((robject->flags & OBJ_TMPFS_NODE) == 0,
- ("shadowed tmpfs v_object %p", object));
- if (!VM_OBJECT_TRYWLOCK(robject)) {
- /*
- * Avoid a potential deadlock.
- */
- refcount_acquire(&object->ref_count);
- VM_OBJECT_WUNLOCK(object);
- /*
- * More likely than not the thread
- * holding robject's lock has lower
- * priority than the current thread.
- * Let the lower priority thread run.
- */
- pause("vmo_de", 1);
- continue;
- }
- /*
- * Collapse object into its shadow unless its
- * shadow is dead. In that case, object will
- * be deallocated by the thread that is
- * deallocating its shadow.
- */
- if ((robject->flags &
- (OBJ_DEAD | OBJ_ANON)) == OBJ_ANON) {
-
- refcount_acquire(&robject->ref_count);
-retry:
- if (REFCOUNT_COUNT(robject->paging_in_progress) > 0) {
- VM_OBJECT_WUNLOCK(object);
- vm_object_pip_wait(robject,
- "objde1");
- temp = robject->backing_object;
- if (object == temp) {
- VM_OBJECT_WLOCK(object);
- goto retry;
- }
- } else if (REFCOUNT_COUNT(object->paging_in_progress) > 0) {
- VM_OBJECT_WUNLOCK(robject);
- VM_OBJECT_WUNLOCK(object);
- refcount_wait(
- &object->paging_in_progress,
- "objde2", PVM);
- VM_OBJECT_WLOCK(robject);
- temp = robject->backing_object;
- if (object == temp) {
- VM_OBJECT_WLOCK(object);
- goto retry;
- }
- } else
- VM_OBJECT_WUNLOCK(object);
-
- if (robject->ref_count == 1) {
- refcount_release(&robject->ref_count);
- object = robject;
- goto doterm;
- }
- object = robject;
- vm_object_collapse(object);
- VM_OBJECT_WUNLOCK(object);
- continue;
- }
- VM_OBJECT_WUNLOCK(robject);
+ /*
+ * If this is not the final reference to an anonymous
+ * object we may need to collapse the shadow chain.
+ */
+ if (!refcount_release(&object->ref_count)) {
+ if (object->ref_count > 1 ||
+ object->shadow_count == 0) {
+ if ((object->flags & OBJ_ANON) != 0 &&
+ object->ref_count == 1)
+ vm_object_set_flag(object,
+ OBJ_ONEMAPPING);
+ VM_OBJECT_WUNLOCK(object);
+ return;
}
- VM_OBJECT_WUNLOCK(object);
- return;
+
+ /* Handle collapsing last ref on anonymous objects. */
+ object = vm_object_anon_deallocate(object);
+ continue;
}
-doterm:
+
+ /*
+ * Handle the final reference to an object. We restart
+ * the loop with the backing object to avoid recursion.
+ */
umtx_shm_object_terminated(object);
temp = object->backing_object;
if (temp != NULL) {
@@ -687,16 +692,11 @@
("shadowed tmpfs v_object 2 %p", object));
vm_object_backing_remove(object);
}
- /*
- * Don't double-terminate, we could be in a termination
- * recursion due to the terminate having to sync data
- * to disk.
- */
- if ((object->flags & OBJ_DEAD) == 0) {
- vm_object_set_flag(object, OBJ_DEAD);
- vm_object_terminate(object);
- } else
- VM_OBJECT_WUNLOCK(object);
+
+ KASSERT((object->flags & OBJ_DEAD) == 0,
+ ("vm_object_deallocate: Terminating dead object."));
+ vm_object_set_flag(object, OBJ_DEAD);
+ vm_object_terminate(object);
object = temp;
}
}
@@ -788,6 +788,98 @@
object->backing_object = backing_object;
}
+/*
+ * Insert an object into a backing_object's shadow list with an additional
+ * reference to the backing_object added.
+ */
+static void
+vm_object_backing_insert_ref(vm_object_t object, vm_object_t backing_object)
+{
+
+ VM_OBJECT_ASSERT_WLOCKED(object);
+
+ if ((backing_object->flags & OBJ_ANON) != 0) {
+ VM_OBJECT_WLOCK(backing_object);
+ KASSERT((backing_object->flags & OBJ_DEAD) == 0,
+ ("shadowing dead anonymous object"));
+ vm_object_reference_locked(backing_object);
+ vm_object_backing_insert_locked(object, backing_object);
+ vm_object_clear_flag(backing_object, OBJ_ONEMAPPING);
+ VM_OBJECT_WUNLOCK(backing_object);
+ } else {
+ vm_object_reference(backing_object);
+ object->backing_object = backing_object;
+ }
+}
+
+/*
+ * Transfer a backing reference from backing_object to object.
+ */
+static void
+vm_object_backing_transfer(vm_object_t object, vm_object_t backing_object)
+{
+ vm_object_t new_backing_object;
+
+ /*
+ * Note that the reference to backing_object->backing_object
+ * moves from within backing_object to within object.
+ */
+ vm_object_backing_remove_locked(object);
+ new_backing_object = backing_object->backing_object;
+ if (new_backing_object == NULL)
+ return;
+ if ((new_backing_object->flags & OBJ_ANON) != 0) {
+ VM_OBJECT_WLOCK(new_backing_object);
+ vm_object_backing_remove_locked(backing_object);
+ vm_object_backing_insert_locked(object, new_backing_object);
+ VM_OBJECT_WUNLOCK(new_backing_object);
+ } else {
+ object->backing_object = new_backing_object;
+ backing_object->backing_object = NULL;
+ }
+}
+
+/*
+ * Wait for a concurrent collapse to settle.
+ */
+static void
+vm_object_collapse_wait(vm_object_t object)
+{
+
+ VM_OBJECT_ASSERT_WLOCKED(object);
+
+ while ((object->flags & OBJ_COLLAPSING) != 0) {
+ vm_object_pip_wait(object, "vmcolwait");
+ counter_u64_add(object_collapse_waits, 1);
+ }
+}
+
+/*
+ * Waits for a backing object to clear a pending collapse and returns
+ * it locked if it is an ANON object.
+ */
+static vm_object_t
+vm_object_backing_collapse_wait(vm_object_t object)
+{
+ vm_object_t backing_object;
+
+ VM_OBJECT_ASSERT_WLOCKED(object);
+
+ for (;;) {
+ backing_object = object->backing_object;
+ if (backing_object == NULL ||
+ (backing_object->flags & OBJ_ANON) == 0)
+ return (NULL);
+ VM_OBJECT_WLOCK(backing_object);
+ if ((backing_object->flags & (OBJ_DEAD | OBJ_COLLAPSING)) == 0)
+ break;
+ VM_OBJECT_WUNLOCK(object);
+ vm_object_pip_sleep(backing_object, "vmbckwait");
+ counter_u64_add(object_collapse_waits, 1);
+ VM_OBJECT_WLOCK(object);
+ }
+ return (backing_object);
+}
/*
* vm_object_terminate_pages removes any remaining pageable pages
@@ -843,9 +935,12 @@
void
vm_object_terminate(vm_object_t object)
{
+
VM_OBJECT_ASSERT_WLOCKED(object);
KASSERT((object->flags & OBJ_DEAD) != 0,
("terminating non-dead obj %p", object));
+ KASSERT(object->backing_object == NULL,
+ ("terminating shadow obj %p", object));
/*
* wait for the pageout daemon to be done with the object
@@ -855,7 +950,7 @@
KASSERT(!REFCOUNT_COUNT(object->paging_in_progress),
("vm_object_terminate: pageout in progress"));
- KASSERT(object->ref_count == 0,
+ KASSERT(object->ref_count == 0,
("vm_object_terminate: object with references, ref_count=%d",
object->ref_count));
@@ -868,7 +963,7 @@
#endif
KASSERT(object->cred == NULL || object->type == OBJT_DEFAULT ||
- object->type == OBJT_SWAP,
+ object->type == OBJT_SWAP || object->type == OBJT_DEAD,
("%s: non-swap obj %p has cred", __func__, object));
/*
@@ -1402,6 +1497,8 @@
vm_size_t size;
orig_object = entry->object.vm_object;
+ KASSERT((orig_object->flags & OBJ_ONEMAPPING) != 0,
+ ("vm_object_split: Splitting object with multiple mappings."));
if ((orig_object->flags & OBJ_ANON) == 0)
return;
if (orig_object->ref_count <= 1)
@@ -1418,36 +1515,26 @@
new_object = vm_object_allocate_anon(size, orig_object,
orig_object->cred, ptoa(size));
+ /*
+ * We must wait for the orig_object to complete any in-progress
+ * collapse so that the swap blocks are stable below. The
+ * additional reference on backing_object by new object will
+ * prevent further collapse operations until split completes.
+ */
+ VM_OBJECT_WLOCK(orig_object);
+ vm_object_collapse_wait(orig_object);
+
/*
* At this point, the new object is still private, so the order in
* which the original and new objects are locked does not matter.
*/
VM_OBJECT_WLOCK(new_object);
- VM_OBJECT_WLOCK(orig_object);
new_object->domain = orig_object->domain;
source = orig_object->backing_object;
if (source != NULL) {
- if ((source->flags & (OBJ_ANON | OBJ_DEAD)) != 0) {
- VM_OBJECT_WLOCK(source);
- if ((source->flags & OBJ_DEAD) != 0) {
- VM_OBJECT_WUNLOCK(source);
- VM_OBJECT_WUNLOCK(orig_object);
- VM_OBJECT_WUNLOCK(new_object);
- new_object->cred = NULL;
- vm_object_deallocate(new_object);
- VM_OBJECT_WLOCK(orig_object);
- return;
- }
- vm_object_backing_insert_locked(new_object, source);
- vm_object_reference_locked(source); /* for new_object */
- vm_object_clear_flag(source, OBJ_ONEMAPPING);
- VM_OBJECT_WUNLOCK(source);
- } else {
- vm_object_backing_insert(new_object, source);
- vm_object_reference(source);
- }
+ vm_object_backing_insert_ref(new_object, source);
new_object->backing_object_offset =
- orig_object->backing_object_offset + entry->offset;
+ orig_object->backing_object_offset + entry->offset;
}
if (orig_object->cred != NULL) {
crhold(orig_object->cred);
@@ -1455,6 +1542,12 @@
("orig_object->charge < 0"));
orig_object->charge -= ptoa(size);
}
+
+ /*
+ * Mark the split operation so that swap_pager_getpages() knows
+ * that the object is in transition.
+ */
+ vm_object_set_flag(orig_object, OBJ_SPLIT);
retry:
m = vm_page_find_least(orig_object, offidxstart);
for (; m != NULL && (idx = m->pindex - offidxstart) < size;
@@ -1523,6 +1616,7 @@
TAILQ_FOREACH(m, &new_object->memq, listq)
vm_page_xunbusy(m);
}
+ vm_object_clear_flag(orig_object, OBJ_SPLIT);
VM_OBJECT_WUNLOCK(orig_object);
VM_OBJECT_WUNLOCK(new_object);
entry->object.vm_object = new_object;
@@ -1531,12 +1625,8 @@
VM_OBJECT_WLOCK(new_object);
}
-#define OBSC_COLLAPSE_NOWAIT 0x0002
-#define OBSC_COLLAPSE_WAIT 0x0004
-
static vm_page_t
-vm_object_collapse_scan_wait(vm_object_t object, vm_page_t p, vm_page_t next,
- int op)
+vm_object_collapse_scan_wait(vm_object_t object, vm_page_t p)
{
vm_object_t backing_object;
@@ -1546,8 +1636,6 @@
KASSERT(p == NULL || p->object == object || p->object == backing_object,
("invalid ownership %p %p %p", p, object, backing_object));
- if ((op & OBSC_COLLAPSE_NOWAIT) != 0)
- return (next);
/* The page is only NULL when rename fails. */
if (p == NULL) {
VM_OBJECT_WUNLOCK(object);
@@ -1624,8 +1712,8 @@
return (true);
}
-static bool
-vm_object_collapse_scan(vm_object_t object, int op)
+static void
+vm_object_collapse_scan(vm_object_t object)
{
vm_object_t backing_object;
vm_page_t next, p, pp;
@@ -1637,12 +1725,6 @@
backing_object = object->backing_object;
backing_offset_index = OFF_TO_IDX(object->backing_object_offset);
- /*
- * Initial conditions
- */
- if ((op & OBSC_COLLAPSE_WAIT) != 0)
- vm_object_set_flag(backing_object, OBJ_DEAD);
-
/*
* Our scan
*/
@@ -1654,7 +1736,7 @@
* Check for busy page
*/
if (vm_page_tryxbusy(p) == 0) {
- next = vm_object_collapse_scan_wait(object, p, next, op);
+ next = vm_object_collapse_scan_wait(object, p);
continue;
}
@@ -1689,8 +1771,7 @@
* unbusy the original (backing_obj) page before we can
* (re)lock the parent. Hence we can get here.
*/
- next = vm_object_collapse_scan_wait(object, pp, next,
- op);
+ next = vm_object_collapse_scan_wait(object, pp);
continue;
}
@@ -1734,10 +1815,7 @@
*/
if (vm_page_rename(p, object, new_pindex)) {
vm_page_xunbusy(p);
- if (pp != NULL)
- vm_page_xunbusy(pp);
- next = vm_object_collapse_scan_wait(object, NULL, next,
- op);
+ next = vm_object_collapse_scan_wait(object, NULL);
continue;
}
@@ -1755,27 +1833,7 @@
#endif
vm_page_xunbusy(p);
}
- return (true);
-}
-
-
-/*
- * this version of collapse allows the operation to occur earlier and
- * when paging_in_progress is true for an object... This is not a complete
- * operation, but should plug 99.9% of the rest of the leaks.
- */
-static void
-vm_object_qcollapse(vm_object_t object)
-{
- vm_object_t backing_object = object->backing_object;
-
- VM_OBJECT_ASSERT_WLOCKED(object);
- VM_OBJECT_ASSERT_WLOCKED(backing_object);
-
- if (backing_object->ref_count != 1)
- return;
-
- vm_object_collapse_scan(object, OBSC_COLLAPSE_NOWAIT);
+ return;
}
/*
@@ -1793,53 +1851,48 @@
VM_OBJECT_ASSERT_WLOCKED(object);
while (TRUE) {
- /*
- * Verify that the conditions are right for collapse:
- *
- * The object exists and the backing object exists.
- */
- if ((backing_object = object->backing_object) == NULL)
- break;
+ KASSERT((object->flags & (OBJ_DEAD | OBJ_ANON)) == OBJ_ANON,
+ ("collapsing invalid object"));
/*
- * we check the backing object first, because it is most likely
- * not collapsable.
+ * Wait for the backing_object to finish any pending
+ * collapse so that the caller sees the shortest possible
+ * shadow chain.
*/
- if ((backing_object->flags & OBJ_ANON) == 0)
- break;
- VM_OBJECT_WLOCK(backing_object);
- if ((backing_object->flags & OBJ_DEAD) != 0 ||
- (object->flags & (OBJ_DEAD | OBJ_ANON)) != OBJ_ANON) {
- VM_OBJECT_WUNLOCK(backing_object);
- break;
- }
+ backing_object = vm_object_backing_collapse_wait(object);
+ if (backing_object == NULL)
+ return;
- if (REFCOUNT_COUNT(object->paging_in_progress) > 0 ||
- REFCOUNT_COUNT(backing_object->paging_in_progress) > 0) {
- vm_object_qcollapse(object);
- VM_OBJECT_WUNLOCK(backing_object);
- break;
- }
+ KASSERT(object->ref_count > 0 &&
+ object->ref_count > object->shadow_count,
+ ("collapse with invalid ref %d or shadow %d count.",
+ object->ref_count, object->shadow_count));
+ KASSERT((backing_object->flags &
+ (OBJ_COLLAPSING | OBJ_DEAD)) == 0,
+ ("vm_object_collapse: Backing object already collapsing."));
+ KASSERT((object->flags & (OBJ_COLLAPSING | OBJ_DEAD)) == 0,
+ ("vm_object_collapse: object is already collapsing."));
/*
- * We know that we can either collapse the backing object (if
- * the parent is the only reference to it) or (perhaps) have
+ * We know that we can either collapse the backing object if
+ * the parent is the only reference to it, or (perhaps) have
* the parent bypass the object if the parent happens to shadow
* all the resident pages in the entire backing object.
- *
- * This is ignoring pager-backed pages such as swap pages.
- * vm_object_collapse_scan fails the shadowing test in this
- * case.
*/
if (backing_object->ref_count == 1) {
+ KASSERT(backing_object->shadow_count == 1,
+ ("vm_object_collapse: shadow_count: %d",
+ backing_object->shadow_count));
vm_object_pip_add(object, 1);
+ vm_object_set_flag(object, OBJ_COLLAPSING);
vm_object_pip_add(backing_object, 1);
+ vm_object_set_flag(backing_object, OBJ_DEAD);
/*
* If there is exactly one reference to the backing
* object, we can collapse it into the parent.
*/
- vm_object_collapse_scan(object, OBSC_COLLAPSE_WAIT);
+ vm_object_collapse_scan(object);
#if VM_NRESERVLEVEL > 0
/*
@@ -1866,23 +1919,16 @@
object,
OFF_TO_IDX(object->backing_object_offset), TRUE);
}
+
/*
* Object now shadows whatever backing_object did.
- * Note that the reference to
- * backing_object->backing_object moves from within
- * backing_object to within object.
*/
- vm_object_backing_remove_locked(object);
- new_backing_object = backing_object->backing_object;
- if (new_backing_object != NULL) {
- VM_OBJECT_WLOCK(new_backing_object);
- vm_object_backing_remove_locked(backing_object);
- vm_object_backing_insert_locked(object,
- new_backing_object);
- VM_OBJECT_WUNLOCK(new_backing_object);
- }
+ vm_object_backing_transfer(object, backing_object);
object->backing_object_offset +=
backing_object->backing_object_offset;
+ vm_object_clear_flag(object, OBJ_COLLAPSING);
+ VM_OBJECT_WUNLOCK(object);
+ vm_object_pip_wakeup(object);
/*
* Discard backing_object.
@@ -1894,18 +1940,19 @@
KASSERT(backing_object->ref_count == 1, (
"backing_object %p was somehow re-referenced during collapse!",
backing_object));
- vm_object_pip_wakeup(backing_object);
backing_object->type = OBJT_DEAD;
+ vm_object_pip_wakeup(backing_object);
refcount_release(&backing_object->ref_count);
- VM_OBJECT_WUNLOCK(backing_object);
- vm_object_destroy(backing_object);
-
- vm_object_pip_wakeup(object);
+ vm_object_terminate(backing_object);
counter_u64_add(object_collapses, 1);
+ VM_OBJECT_WLOCK(object);
} else {
/*
* If we do not entirely shadow the backing object,
* there is nothing we can do so we give up.
+ *
+ * The object lock and backing_object lock must not
+ * be dropped during this sequence.
*/
if (object->resident_page_count != object->size &&
!vm_object_scan_all_shadowed(object)) {
@@ -1919,14 +1966,12 @@
* it, since its reference count is at least 2.
*/
vm_object_backing_remove_locked(object);
-
new_backing_object = backing_object->backing_object;
if (new_backing_object != NULL) {
- vm_object_backing_insert(object,
+ vm_object_backing_insert_ref(object,
new_backing_object);
- vm_object_reference(new_backing_object);
object->backing_object_offset +=
- backing_object->backing_object_offset;
+ backing_object->backing_object_offset;
}
/*
@@ -1934,6 +1979,9 @@
* its ref_count was at least 2, it will not vanish.
*/
refcount_release(&backing_object->ref_count);
+ KASSERT(backing_object->ref_count >= 1, (
+"backing_object %p was somehow dereferenced during collapse!",
+ backing_object));
VM_OBJECT_WUNLOCK(backing_object);
counter_u64_add(object_bypasses, 1);
}
@@ -2148,7 +2196,7 @@
VM_OBJECT_WLOCK(prev_object);
/*
- * Try to collapse the object first
+ * Try to collapse the object first.
*/
vm_object_collapse(prev_object);
Index: sys/vm/vnode_pager.c
===================================================================
--- sys/vm/vnode_pager.c
+++ sys/vm/vnode_pager.c
@@ -200,36 +200,24 @@
MPASS(obj->type == OBJT_VNODE);
umtx_shm_object_terminated(obj);
if (obj->ref_count == 0) {
+ KASSERT((obj->flags & OBJ_DEAD) == 0,
+ ("vnode_destroy_vobject: Terminating dead object"));
+ vm_object_set_flag(obj, OBJ_DEAD);
+
/*
- * don't double-terminate the object
+ * Clean pages and flush buffers.
*/
- if ((obj->flags & OBJ_DEAD) == 0) {
- vm_object_set_flag(obj, OBJ_DEAD);
-
- /*
- * Clean pages and flush buffers.
- */
- vm_object_page_clean(obj, 0, 0, OBJPC_SYNC);
- VM_OBJECT_WUNLOCK(obj);
+ vm_object_page_clean(obj, 0, 0, OBJPC_SYNC);
+ VM_OBJECT_WUNLOCK(obj);
- vinvalbuf(vp, V_SAVE, 0, 0);
+ vinvalbuf(vp, V_SAVE, 0, 0);
- BO_LOCK(&vp->v_bufobj);
- vp->v_bufobj.bo_flag |= BO_DEAD;
- BO_UNLOCK(&vp->v_bufobj);
+ BO_LOCK(&vp->v_bufobj);
+ vp->v_bufobj.bo_flag |= BO_DEAD;
+ BO_UNLOCK(&vp->v_bufobj);
- VM_OBJECT_WLOCK(obj);
- vm_object_terminate(obj);
- } else {
- /*
- * Waiters were already handled during object
- * termination. The exclusive vnode lock hopefully
- * prevented new waiters from referencing the dying
- * object.
- */
- vp->v_object = NULL;
- VM_OBJECT_WUNLOCK(obj);
- }
+ VM_OBJECT_WLOCK(obj);
+ vm_object_terminate(obj);
} else {
/*
* Woe to the process that tries to page now :-).

File Metadata

Mime Type
text/plain
Expires
Wed, Apr 29, 1:58 PM (13 h, 41 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
32306900
Default Alt Text
D22908.id66119.diff (30 KB)

Event Timeline