D51474.id159004.diff
No OneTemporary
Actions

Size

9 KB

Referenced Files

None

Subscribers

None

D51474.id159004.diff
View Options

	diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c
	--- a/sys/vm/vm_fault.c
	+++ b/sys/vm/vm_fault.c
	@@ -130,6 +130,7 @@
	bool oom_started;
	int nera;
	bool can_read_lock;
	+ bool can_sbusy;

	/* Page reference for cow. */
	vm_page_t m_cow;
	@@ -165,6 +166,7 @@
	FAULT_OUT_OF_BOUNDS, /* Invalid address for pager. */
	FAULT_HARD, /* Performed I/O. */
	FAULT_SOFT, /* Found valid page. */
	+ FAULT_SOFT_MSHAREDBUSY, /* Found valid page and busied it shared. */
	FAULT_PROTECTION_FAILURE, /* Invalid access. */
	};

	@@ -204,7 +206,10 @@
	* pageout while optimizing fault restarts.
	*/
	vm_page_deactivate(m);
	- vm_page_xunbusy(m);
	+ if (vm_page_xbusied(m))
	+ vm_page_xunbusy(m);
	+ else
	+ vm_page_sunbusy(m);
	*mp = NULL;
	}
	}
	@@ -354,7 +359,7 @@
	*/
	m = vm_page_lookup_unlocked(fs->first_object, fs->first_pindex);
	if (m == NULL \|\| !vm_page_all_valid(m) \|\|
	- ((fs->prot & VM_PROT_WRITE) != 0 && vm_page_busied(m))) {
	+ ((fs->prot & VM_PROT_WRITE) != 0 && vm_page_xbusied(m))) {
	VM_OBJECT_WLOCK(fs->first_object);
	return (FAULT_FAILURE);
	}
	@@ -374,7 +379,7 @@
	vm_object_busy(fs->first_object);

	if (!vm_page_all_valid(m) \|\|
	- ((fs->prot & VM_PROT_WRITE) != 0 && vm_page_busied(m)))
	+ ((fs->prot & VM_PROT_WRITE) != 0 && vm_page_xbusied(m)))
	goto fail_busy;

	m_map = m;
	@@ -1002,10 +1007,20 @@
	return (KERN_SUCCESS);
	}

	-static void
	-vm_fault_cow(struct faultstate *fs)
	+static bool
	+vm_fault_can_cow_rename(struct faultstate *fs)
	{
	- bool is_first_object_locked;
	+ return (
	+ /* Only one shadow object and no other refs. */
	+ fs->object->shadow_count == 1 && fs->object->ref_count == 1 &&
	+ /* No other ways to look the object up. */
	+ fs->object->handle == NULL && (fs->object->flags & OBJ_ANON) != 0);
	+}
	+
	+static void
	+vm_fault_cow(struct faultstate *fs, int res)
	+{
	+ bool is_first_object_locked, fast_cow;

	KASSERT(fs->object != fs->first_object,
	("source and target COW objects are identical"));
	@@ -1019,21 +1034,28 @@
	* object so that it will go out to swap when needed.
	*/
	is_first_object_locked = false;
	- if (
	- /*
	- * Only one shadow object and no other refs.
	- */
	- fs->object->shadow_count == 1 && fs->object->ref_count == 1 &&
	- /*
	- * No other ways to look the object up
	- */
	- fs->object->handle == NULL && (fs->object->flags & OBJ_ANON) != 0 &&
	- /*
	- * We don't chase down the shadow chain and we can acquire locks.
	- */
	- (is_first_object_locked = VM_OBJECT_TRYWLOCK(fs->first_object)) &&
	- fs->object == fs->first_object->backing_object &&
	- VM_OBJECT_TRYWLOCK(fs->object)) {
	+ fast_cow = false;
	+
	+ if (vm_fault_can_cow_rename(fs)) {
	+ /*
	+ * Check that we don't chase down the shadow chain and
	+ * we can acquire locks.
	+ */
	+ is_first_object_locked = VM_OBJECT_TRYWLOCK(fs->first_object);
	+ if (is_first_object_locked &&
	+ fs->object == fs->first_object->backing_object) {
	+ if (res == FAULT_SOFT_MSHAREDBUSY) {
	+ fast_cow = VM_OBJECT_WOWNED(fs->object) \|\|
	+ VM_OBJECT_TRYUPGRADE(fs->object);
	+ } else {
	+ fast_cow = VM_OBJECT_TRYWLOCK(fs->object);
	+ }
	+ }
	+ }
	+
	+ if (fast_cow) {
	+ vm_page_assert_xbusied(fs->m);
	+
	/*
	* Remove but keep xbusy for replace. fs->m is moved into
	* fs->first_object and left busy while fs->first_m is
	@@ -1084,16 +1106,23 @@
	* removed from those other address spaces.
	*
	* The flag check is racy, but this is tolerable: if
	- * OBJ_ONEMAPPING is cleared after the check, the busy state
	- * ensures that new mappings of m_cow can't be created.
	- * pmap_enter() will replace an existing mapping in the current
	- * address space. If OBJ_ONEMAPPING is set after the check,
	- * removing mappings will at worse trigger some unnecessary page
	- * faults.
	+ * OBJ_ONEMAPPING is cleared after the check, either
	+ * the exclusive busy state or the check for
	+ * shadow_count in vm_fault_object() ensures that new
	+ * mappings of m_cow can't be created. pmap_enter()
	+ * will replace an existing mapping in the current
	+ * address space. If OBJ_ONEMAPPING is set after the
	+ * check, removing mappings will at worst trigger some
	+ * unnecessary page faults.
	*/
	- vm_page_assert_xbusied(fs->m_cow);
	+ if (res == FAULT_SOFT_MSHAREDBUSY)
	+ vm_page_assert_busied(fs->m_cow);
	+ else
	+ vm_page_assert_xbusied(fs->m_cow);
	if ((fs->first_object->flags & OBJ_ONEMAPPING) == 0)
	pmap_remove_all(fs->m_cow);
	+ if (res == FAULT_SOFT_MSHAREDBUSY)
	+ VM_OBJECT_UNLOCK(fs->object);
	}

	vm_object_pip_wakeup(fs->object);
	@@ -1487,6 +1516,38 @@
	vm_page_iter_init(&pages, fs->object);
	fs->m = vm_radix_iter_lookup(&pages, fs->pindex);
	if (fs->m != NULL) {
	+ /*
	+ * If the found page is valid, either will be shadowed
	+ * or mapped for read, and would not be renamed, then
	+ * busy it in shared mode. This allows other faults
	+ * needing this page to proceed in parallel.
	+ *
	+ * Unlocked check for validity, rechecked after busy
	+ * is obtained.
	+ */
	+ if (vm_page_all_valid(fs->m) && fs->can_sbusy &&
	+ (((fs->prot & VM_PROT_WRITE) == 0 &&
	+ (fs->fault_type & (VM_PROT_COPY \| VM_PROT_WRITE)) == 0) \|\|
	+ fs->object != fs->first_object) &&
	+ !(vm_fault_can_cow_rename(fs) &&
	+ fs->object == fs->first_object->backing_object)) {
	+ if (!vm_page_trysbusy(fs->m)) {
	+restart:
	+ fs->can_sbusy = false;
	+ vm_fault_busy_sleep(fs);
	+ return (FAULT_RESTART);
	+ }
	+ if (!vm_page_all_valid(fs->m)) {
	+ vm_page_sunbusy(fs->m);
	+ goto restart;
	+ }
	+ /*
	+ * Keep fs->object locked for validity of the
	+ * CoW checks.
	+ */
	+ return (FAULT_SOFT_MSHAREDBUSY);
	+ }
	+
	if (!vm_page_tryxbusy(fs->m)) {
	vm_fault_busy_sleep(fs);
	return (FAULT_RESTART);
	@@ -1546,8 +1607,8 @@
	return (res);
	}

	-int
	-vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
	+static int
	+vm_fault_rangelocked(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
	int fault_flags, vm_page_t *m_hold)
	{
	struct pctrie_iter pages;
	@@ -1555,12 +1616,7 @@
	int ahead, behind, faultcount, rv;
	enum fault_status res;
	enum fault_next_status res_next;
	- bool hardfault;
	-
	- VM_CNT_INC(v_vm_faults);
	-
	- if ((curthread->td_pflags & TDP_NOFAULTING) != 0)
	- return (KERN_PROTECTION_FAILURE);
	+ bool hardfault, unlock_object;

	fs.vp = NULL;
	fs.vaddr = vaddr;
	@@ -1571,6 +1627,7 @@
	fs.oom_started = false;
	fs.nera = -1;
	fs.can_read_lock = true;
	+ fs.can_sbusy = true;
	faultcount = 0;
	hardfault = false;

	@@ -1654,6 +1711,7 @@
	res = vm_fault_object(&fs, &behind, &ahead);
	switch (res) {
	case FAULT_SOFT:
	+ case FAULT_SOFT_MSHAREDBUSY:
	goto found;
	case FAULT_HARD:
	faultcount = behind + 1 + ahead;
	@@ -1701,11 +1759,17 @@

	found:
	/*
	- * A valid page has been found and exclusively busied. The
	- * object lock must no longer be held.
	+ * A valid page has been found and busied. The object lock
	+ * must no longer be held.
	*/
	- vm_page_assert_xbusied(fs.m);
	- VM_OBJECT_ASSERT_UNLOCKED(fs.object);
	+ vm_page_assert_busied(fs.m);
	+ if (res != FAULT_SOFT_MSHAREDBUSY) {
	+ unlock_object = false;
	+ VM_OBJECT_ASSERT_UNLOCKED(fs.object);
	+ } else {
	+ unlock_object = true;
	+ VM_OBJECT_ASSERT_LOCKED(fs.object);
	+ }

	/*
	* If the page is being written, but isn't already owned by the
	@@ -1717,7 +1781,9 @@
	* We only really need to copy if we want to write it.
	*/
	if ((fs.fault_type & (VM_PROT_COPY \| VM_PROT_WRITE)) != 0) {
	- vm_fault_cow(&fs);
	+ vm_fault_cow(&fs, res);
	+ unlock_object = false;
	+
	/*
	* We only try to prefault read-only mappings to the
	* neighboring pages when this copy-on-write fault is
	@@ -1731,6 +1797,8 @@
	fs.prot &= ~VM_PROT_WRITE;
	}
	}
	+ if (unlock_object)
	+ VM_OBJECT_UNLOCK(fs.object);

	/*
	* We must verify that the maps have not changed since our last
	@@ -1773,7 +1841,7 @@
	* Page must be completely valid or it is not fit to
	* map into user space. vm_pager_get_pages() ensures this.
	*/
	- vm_page_assert_xbusied(fs.m);
	+ vm_page_assert_busied(fs.m);
	KASSERT(vm_page_all_valid(fs.m),
	("vm_fault: page %p partially invalid", fs.m));

	@@ -1805,7 +1873,10 @@
	(*fs.m_hold) = fs.m;
	vm_page_wire(fs.m);
	}
	- vm_page_xunbusy(fs.m);
	+ if (vm_page_xbusied(fs.m))
	+ vm_page_xunbusy(fs.m);
	+ else
	+ vm_page_sunbusy(fs.m);
	fs.m = NULL;

	/*
	@@ -1836,6 +1907,24 @@
	return (KERN_SUCCESS);
	}

	+int
	+vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
	+ int fault_flags, vm_page_t *m_hold)
	+{
	+ void *cookie;
	+ int rv;
	+
	+ VM_CNT_INC(v_vm_faults);
	+
	+ if ((curthread->td_pflags & TDP_NOFAULTING) != 0)
	+ return (KERN_PROTECTION_FAILURE);
	+
	+ cookie = rangelock_wlock(&map->fltlock, vaddr, vaddr + PAGE_SIZE);
	+ rv = vm_fault_rangelocked(map, vaddr, fault_type, fault_flags, m_hold);
	+ rangelock_unlock(&map->fltlock, cookie);
	+ return (rv);
	+}
	+
	/*
	* Speed up the reclamation of pages that precede the faulting pindex within
	* the first object of the shadow chain. Essentially, perform the equivalent
	diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h
	--- a/sys/vm/vm_map.h
	+++ b/sys/vm/vm_map.h
	@@ -65,6 +65,7 @@
	#define _VM_MAP_

	#include <sys/lock.h>
	+#include <sys/rangelock.h>
	#include <sys/sx.h>
	#include <sys/_mutex.h>

	@@ -206,6 +207,7 @@
	struct sx lock; /* Lock for map data */
	struct mtx system_mtx;
	};
	+ struct rangelock fltlock;
	int nentries; /* Number of entries */
	vm_size_t size; /* virtual size */
	u_int timestamp; /* Version number */
	diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
	--- a/sys/vm/vm_map.c
	+++ b/sys/vm/vm_map.c
	@@ -896,6 +896,7 @@
	map->timestamp = 0;
	map->busy = 0;
	map->anon_loc = 0;
	+ rangelock_init(&map->fltlock);
	#ifdef DIAGNOSTIC
	map->nupdates = 0;
	#endif

File Metadata

Mime Type: text/plain
Expires: Thu, Mar 12, 8:17 AM (16 h, 13 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 29563319
Default Alt Text: D51474.id159004.diff (9 KB)

D51474.id159004.diffNo OneTemporaryActions

D51474.id159004.diffView Options

File Metadata

Event Timeline

D51474.id159004.diff
No OneTemporary
Actions

D51474.id159004.diff
View Options