Page MenuHomeFreeBSD

D51474.id159004.diff
No OneTemporary

D51474.id159004.diff

diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c
--- a/sys/vm/vm_fault.c
+++ b/sys/vm/vm_fault.c
@@ -130,6 +130,7 @@
bool oom_started;
int nera;
bool can_read_lock;
+ bool can_sbusy;
/* Page reference for cow. */
vm_page_t m_cow;
@@ -165,6 +166,7 @@
FAULT_OUT_OF_BOUNDS, /* Invalid address for pager. */
FAULT_HARD, /* Performed I/O. */
FAULT_SOFT, /* Found valid page. */
+ FAULT_SOFT_MSHAREDBUSY, /* Found valid page and busied it shared. */
FAULT_PROTECTION_FAILURE, /* Invalid access. */
};
@@ -204,7 +206,10 @@
* pageout while optimizing fault restarts.
*/
vm_page_deactivate(m);
- vm_page_xunbusy(m);
+ if (vm_page_xbusied(m))
+ vm_page_xunbusy(m);
+ else
+ vm_page_sunbusy(m);
*mp = NULL;
}
}
@@ -354,7 +359,7 @@
*/
m = vm_page_lookup_unlocked(fs->first_object, fs->first_pindex);
if (m == NULL || !vm_page_all_valid(m) ||
- ((fs->prot & VM_PROT_WRITE) != 0 && vm_page_busied(m))) {
+ ((fs->prot & VM_PROT_WRITE) != 0 && vm_page_xbusied(m))) {
VM_OBJECT_WLOCK(fs->first_object);
return (FAULT_FAILURE);
}
@@ -374,7 +379,7 @@
vm_object_busy(fs->first_object);
if (!vm_page_all_valid(m) ||
- ((fs->prot & VM_PROT_WRITE) != 0 && vm_page_busied(m)))
+ ((fs->prot & VM_PROT_WRITE) != 0 && vm_page_xbusied(m)))
goto fail_busy;
m_map = m;
@@ -1002,10 +1007,20 @@
return (KERN_SUCCESS);
}
-static void
-vm_fault_cow(struct faultstate *fs)
+static bool
+vm_fault_can_cow_rename(struct faultstate *fs)
{
- bool is_first_object_locked;
+ return (
+ /* Only one shadow object and no other refs. */
+ fs->object->shadow_count == 1 && fs->object->ref_count == 1 &&
+ /* No other ways to look the object up. */
+ fs->object->handle == NULL && (fs->object->flags & OBJ_ANON) != 0);
+}
+
+static void
+vm_fault_cow(struct faultstate *fs, int res)
+{
+ bool is_first_object_locked, fast_cow;
KASSERT(fs->object != fs->first_object,
("source and target COW objects are identical"));
@@ -1019,21 +1034,28 @@
* object so that it will go out to swap when needed.
*/
is_first_object_locked = false;
- if (
- /*
- * Only one shadow object and no other refs.
- */
- fs->object->shadow_count == 1 && fs->object->ref_count == 1 &&
- /*
- * No other ways to look the object up
- */
- fs->object->handle == NULL && (fs->object->flags & OBJ_ANON) != 0 &&
- /*
- * We don't chase down the shadow chain and we can acquire locks.
- */
- (is_first_object_locked = VM_OBJECT_TRYWLOCK(fs->first_object)) &&
- fs->object == fs->first_object->backing_object &&
- VM_OBJECT_TRYWLOCK(fs->object)) {
+ fast_cow = false;
+
+ if (vm_fault_can_cow_rename(fs)) {
+ /*
+ * Check that we don't chase down the shadow chain and
+ * we can acquire locks.
+ */
+ is_first_object_locked = VM_OBJECT_TRYWLOCK(fs->first_object);
+ if (is_first_object_locked &&
+ fs->object == fs->first_object->backing_object) {
+ if (res == FAULT_SOFT_MSHAREDBUSY) {
+ fast_cow = VM_OBJECT_WOWNED(fs->object) ||
+ VM_OBJECT_TRYUPGRADE(fs->object);
+ } else {
+ fast_cow = VM_OBJECT_TRYWLOCK(fs->object);
+ }
+ }
+ }
+
+ if (fast_cow) {
+ vm_page_assert_xbusied(fs->m);
+
/*
* Remove but keep xbusy for replace. fs->m is moved into
* fs->first_object and left busy while fs->first_m is
@@ -1084,16 +1106,23 @@
* removed from those other address spaces.
*
* The flag check is racy, but this is tolerable: if
- * OBJ_ONEMAPPING is cleared after the check, the busy state
- * ensures that new mappings of m_cow can't be created.
- * pmap_enter() will replace an existing mapping in the current
- * address space. If OBJ_ONEMAPPING is set after the check,
- * removing mappings will at worse trigger some unnecessary page
- * faults.
+ * OBJ_ONEMAPPING is cleared after the check, either
+ * the exclusive busy state or the check for
+ * shadow_count in vm_fault_object() ensures that new
+ * mappings of m_cow can't be created. pmap_enter()
+ * will replace an existing mapping in the current
+ * address space. If OBJ_ONEMAPPING is set after the
+ * check, removing mappings will at worst trigger some
+ * unnecessary page faults.
*/
- vm_page_assert_xbusied(fs->m_cow);
+ if (res == FAULT_SOFT_MSHAREDBUSY)
+ vm_page_assert_busied(fs->m_cow);
+ else
+ vm_page_assert_xbusied(fs->m_cow);
if ((fs->first_object->flags & OBJ_ONEMAPPING) == 0)
pmap_remove_all(fs->m_cow);
+ if (res == FAULT_SOFT_MSHAREDBUSY)
+ VM_OBJECT_UNLOCK(fs->object);
}
vm_object_pip_wakeup(fs->object);
@@ -1487,6 +1516,38 @@
vm_page_iter_init(&pages, fs->object);
fs->m = vm_radix_iter_lookup(&pages, fs->pindex);
if (fs->m != NULL) {
+ /*
+ * If the found page is valid, either will be shadowed
+ * or mapped for read, and would not be renamed, then
+ * busy it in shared mode. This allows other faults
+ * needing this page to proceed in parallel.
+ *
+ * Unlocked check for validity, rechecked after busy
+ * is obtained.
+ */
+ if (vm_page_all_valid(fs->m) && fs->can_sbusy &&
+ (((fs->prot & VM_PROT_WRITE) == 0 &&
+ (fs->fault_type & (VM_PROT_COPY | VM_PROT_WRITE)) == 0) ||
+ fs->object != fs->first_object) &&
+ !(vm_fault_can_cow_rename(fs) &&
+ fs->object == fs->first_object->backing_object)) {
+ if (!vm_page_trysbusy(fs->m)) {
+restart:
+ fs->can_sbusy = false;
+ vm_fault_busy_sleep(fs);
+ return (FAULT_RESTART);
+ }
+ if (!vm_page_all_valid(fs->m)) {
+ vm_page_sunbusy(fs->m);
+ goto restart;
+ }
+ /*
+ * Keep fs->object locked for validity of the
+ * CoW checks.
+ */
+ return (FAULT_SOFT_MSHAREDBUSY);
+ }
+
if (!vm_page_tryxbusy(fs->m)) {
vm_fault_busy_sleep(fs);
return (FAULT_RESTART);
@@ -1546,8 +1607,8 @@
return (res);
}
-int
-vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
+static int
+vm_fault_rangelocked(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
int fault_flags, vm_page_t *m_hold)
{
struct pctrie_iter pages;
@@ -1555,12 +1616,7 @@
int ahead, behind, faultcount, rv;
enum fault_status res;
enum fault_next_status res_next;
- bool hardfault;
-
- VM_CNT_INC(v_vm_faults);
-
- if ((curthread->td_pflags & TDP_NOFAULTING) != 0)
- return (KERN_PROTECTION_FAILURE);
+ bool hardfault, unlock_object;
fs.vp = NULL;
fs.vaddr = vaddr;
@@ -1571,6 +1627,7 @@
fs.oom_started = false;
fs.nera = -1;
fs.can_read_lock = true;
+ fs.can_sbusy = true;
faultcount = 0;
hardfault = false;
@@ -1654,6 +1711,7 @@
res = vm_fault_object(&fs, &behind, &ahead);
switch (res) {
case FAULT_SOFT:
+ case FAULT_SOFT_MSHAREDBUSY:
goto found;
case FAULT_HARD:
faultcount = behind + 1 + ahead;
@@ -1701,11 +1759,17 @@
found:
/*
- * A valid page has been found and exclusively busied. The
- * object lock must no longer be held.
+ * A valid page has been found and busied. The object lock
+ * must no longer be held.
*/
- vm_page_assert_xbusied(fs.m);
- VM_OBJECT_ASSERT_UNLOCKED(fs.object);
+ vm_page_assert_busied(fs.m);
+ if (res != FAULT_SOFT_MSHAREDBUSY) {
+ unlock_object = false;
+ VM_OBJECT_ASSERT_UNLOCKED(fs.object);
+ } else {
+ unlock_object = true;
+ VM_OBJECT_ASSERT_LOCKED(fs.object);
+ }
/*
* If the page is being written, but isn't already owned by the
@@ -1717,7 +1781,9 @@
* We only really need to copy if we want to write it.
*/
if ((fs.fault_type & (VM_PROT_COPY | VM_PROT_WRITE)) != 0) {
- vm_fault_cow(&fs);
+ vm_fault_cow(&fs, res);
+ unlock_object = false;
+
/*
* We only try to prefault read-only mappings to the
* neighboring pages when this copy-on-write fault is
@@ -1731,6 +1797,8 @@
fs.prot &= ~VM_PROT_WRITE;
}
}
+ if (unlock_object)
+ VM_OBJECT_UNLOCK(fs.object);
/*
* We must verify that the maps have not changed since our last
@@ -1773,7 +1841,7 @@
* Page must be completely valid or it is not fit to
* map into user space. vm_pager_get_pages() ensures this.
*/
- vm_page_assert_xbusied(fs.m);
+ vm_page_assert_busied(fs.m);
KASSERT(vm_page_all_valid(fs.m),
("vm_fault: page %p partially invalid", fs.m));
@@ -1805,7 +1873,10 @@
(*fs.m_hold) = fs.m;
vm_page_wire(fs.m);
}
- vm_page_xunbusy(fs.m);
+ if (vm_page_xbusied(fs.m))
+ vm_page_xunbusy(fs.m);
+ else
+ vm_page_sunbusy(fs.m);
fs.m = NULL;
/*
@@ -1836,6 +1907,24 @@
return (KERN_SUCCESS);
}
+int
+vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
+ int fault_flags, vm_page_t *m_hold)
+{
+ void *cookie;
+ int rv;
+
+ VM_CNT_INC(v_vm_faults);
+
+ if ((curthread->td_pflags & TDP_NOFAULTING) != 0)
+ return (KERN_PROTECTION_FAILURE);
+
+ cookie = rangelock_wlock(&map->fltlock, vaddr, vaddr + PAGE_SIZE);
+ rv = vm_fault_rangelocked(map, vaddr, fault_type, fault_flags, m_hold);
+ rangelock_unlock(&map->fltlock, cookie);
+ return (rv);
+}
+
/*
* Speed up the reclamation of pages that precede the faulting pindex within
* the first object of the shadow chain. Essentially, perform the equivalent
diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h
--- a/sys/vm/vm_map.h
+++ b/sys/vm/vm_map.h
@@ -65,6 +65,7 @@
#define _VM_MAP_
#include <sys/lock.h>
+#include <sys/rangelock.h>
#include <sys/sx.h>
#include <sys/_mutex.h>
@@ -206,6 +207,7 @@
struct sx lock; /* Lock for map data */
struct mtx system_mtx;
};
+ struct rangelock fltlock;
int nentries; /* Number of entries */
vm_size_t size; /* virtual size */
u_int timestamp; /* Version number */
diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
--- a/sys/vm/vm_map.c
+++ b/sys/vm/vm_map.c
@@ -896,6 +896,7 @@
map->timestamp = 0;
map->busy = 0;
map->anon_loc = 0;
+ rangelock_init(&map->fltlock);
#ifdef DIAGNOSTIC
map->nupdates = 0;
#endif

File Metadata

Mime Type
text/plain
Expires
Thu, Mar 12, 8:17 AM (16 h, 13 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
29563319
Default Alt Text
D51474.id159004.diff (9 KB)

Event Timeline