diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c
--- a/sys/vm/vm_fault.c
+++ b/sys/vm/vm_fault.c
@@ -130,6 +130,7 @@
 	bool		oom_started;
 	int		nera;
 	bool		can_read_lock;
+	bool		can_sbusy;
 
 	/* Page reference for cow. */
 	vm_page_t m_cow;
@@ -165,6 +166,7 @@
 	FAULT_OUT_OF_BOUNDS,	/* Invalid address for pager. */
 	FAULT_HARD,		/* Performed I/O. */
 	FAULT_SOFT,		/* Found valid page. */
+	FAULT_SOFT_MSHAREDBUSY,	/* Found valid page and busied it shared. */
 	FAULT_PROTECTION_FAILURE, /* Invalid access. */
 };
 
@@ -204,7 +206,10 @@
 		 * pageout while optimizing fault restarts.
 		 */
 		vm_page_deactivate(m);
-		vm_page_xunbusy(m);
+		if (vm_page_xbusied(m))
+			vm_page_xunbusy(m);
+		else
+			vm_page_sunbusy(m);
 		*mp = NULL;
 	}
 }
@@ -1002,10 +1007,20 @@
 	return (KERN_SUCCESS);
 }
 
-static void
-vm_fault_cow(struct faultstate *fs)
+static bool
+vm_fault_can_cow_rename(struct faultstate *fs)
 {
-	bool is_first_object_locked;
+	return (
+	    /* Only one shadow object and no other refs. */
+	    fs->object->shadow_count == 1 && fs->object->ref_count == 1 &&
+	    /* No other ways to look the object up. */
+	    fs->object->handle == NULL && (fs->object->flags & OBJ_ANON) != 0);
+}
+
+static void
+vm_fault_cow(struct faultstate *fs, int res)
+{
+	bool is_first_object_locked, fast_cow;
 
 	KASSERT(fs->object != fs->first_object,
 	    ("source and target COW objects are identical"));
@@ -1019,21 +1034,28 @@
 	 * object so that it will go out to swap when needed.
 	 */
 	is_first_object_locked = false;
-	if (
-	    /*
-	     * Only one shadow object and no other refs.
-	     */
-	    fs->object->shadow_count == 1 && fs->object->ref_count == 1 &&
-	    /*
-	     * No other ways to look the object up
-	     */
-	    fs->object->handle == NULL && (fs->object->flags & OBJ_ANON) != 0 &&
-	    /*
-	     * We don't chase down the shadow chain and we can acquire locks.
-	     */
-	    (is_first_object_locked = VM_OBJECT_TRYWLOCK(fs->first_object)) &&
-	    fs->object == fs->first_object->backing_object &&
-	    VM_OBJECT_TRYWLOCK(fs->object)) {
+	fast_cow = false;
+
+	if (vm_fault_can_cow_rename(fs)) {
+		/*
+		 * Check that we don't chase down the shadow chain and
+		 * we can acquire locks.
+		 */
+		is_first_object_locked = VM_OBJECT_TRYWLOCK(fs->first_object);
+		if (is_first_object_locked &&
+		    fs->object == fs->first_object->backing_object) {
+			if (res == FAULT_SOFT_MSHAREDBUSY) {
+				fast_cow = VM_OBJECT_WOWNED(fs->object) ||
+				    VM_OBJECT_TRYUPGRADE(fs->object);
+			} else {
+				fast_cow = VM_OBJECT_TRYWLOCK(fs->object);
+			}
+		}
+	}
+
+	if (fast_cow) {
+		vm_page_assert_xbusied(fs->m);
+
 		/*
 		 * Remove but keep xbusy for replace.  fs->m is moved into
 		 * fs->first_object and left busy while fs->first_m is
@@ -1084,16 +1106,23 @@
 		 * removed from those other address spaces.
 		 *
 		 * The flag check is racy, but this is tolerable: if
-		 * OBJ_ONEMAPPING is cleared after the check, the busy state
-		 * ensures that new mappings of m_cow can't be created.
-		 * pmap_enter() will replace an existing mapping in the current
-		 * address space.  If OBJ_ONEMAPPING is set after the check,
-		 * removing mappings will at worse trigger some unnecessary page
-		 * faults.
+		 * OBJ_ONEMAPPING is cleared after the check, either
+		 * the exclusive busy state or the check for
+		 * shadow_count in vm_fault_object() ensures that new
+		 * mappings of m_cow can't be created.  pmap_enter()
+		 * will replace an existing mapping in the current
+		 * address space.  If OBJ_ONEMAPPING is set after the
+		 * check, removing mappings will at worse trigger some
+		 * unnecessary page faults.
 		 */
-		vm_page_assert_xbusied(fs->m_cow);
+		if (res == FAULT_SOFT_MSHAREDBUSY)
+			vm_page_assert_busied(fs->m_cow);
+		else
+			vm_page_assert_xbusied(fs->m_cow);
 		if ((fs->first_object->flags & OBJ_ONEMAPPING) == 0)
 			pmap_remove_all(fs->m_cow);
+		if (res == FAULT_SOFT_MSHAREDBUSY)
+			VM_OBJECT_UNLOCK(fs->object);
 	}
 
 	vm_object_pip_wakeup(fs->object);
@@ -1487,6 +1516,32 @@
 	vm_page_iter_init(&pages, fs->object);
 	fs->m = vm_radix_iter_lookup(&pages, fs->pindex);
 	if (fs->m != NULL) {
+		/*
+		 * Unlocked check for validity, rechecked after busy
+		 * is obtained.
+		 */
+		if (vm_page_all_valid(fs->m) && fs->can_sbusy &&
+		    ((fs->fault_type & (VM_PROT_COPY | VM_PROT_WRITE)) == 0 ||
+		    fs->object != fs->first_object) &&
+		    !(vm_fault_can_cow_rename(fs) &&
+		    fs->object == fs->first_object->backing_object)) {
+			if (!vm_page_trysbusy(fs->m)) {
+restart:
+				fs->can_sbusy = false;
+				vm_fault_busy_sleep(fs);
+				return (FAULT_RESTART);
+			}
+			if (!vm_page_all_valid(fs->m)) {
+				vm_page_sunbusy(fs->m);
+				goto restart;
+			}
+			/*
+			 * Keep fs->object locked for validity of the
+			 * CoW checks.
+			 */
+			return (FAULT_SOFT_MSHAREDBUSY);
+		}
+
 		if (!vm_page_tryxbusy(fs->m)) {
 			vm_fault_busy_sleep(fs);
 			return (FAULT_RESTART);
@@ -1546,8 +1601,8 @@
 	return (res);
 }
 
-int
-vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
+static int
+vm_fault_rglocked(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
     int fault_flags, vm_page_t *m_hold)
 {
 	struct pctrie_iter pages;
@@ -1555,12 +1610,7 @@
 	int ahead, behind, faultcount, rv;
 	enum fault_status res;
 	enum fault_next_status res_next;
-	bool hardfault;
-
-	VM_CNT_INC(v_vm_faults);
-
-	if ((curthread->td_pflags & TDP_NOFAULTING) != 0)
-		return (KERN_PROTECTION_FAILURE);
+	bool hardfault, unlock_object;
 
 	fs.vp = NULL;
 	fs.vaddr = vaddr;
@@ -1571,6 +1621,7 @@
 	fs.oom_started = false;
 	fs.nera = -1;
 	fs.can_read_lock = true;
+	fs.can_sbusy = true;
 	faultcount = 0;
 	hardfault = false;
 
@@ -1654,6 +1705,7 @@
 		res = vm_fault_object(&fs, &behind, &ahead);
 		switch (res) {
 		case FAULT_SOFT:
+		case FAULT_SOFT_MSHAREDBUSY:
 			goto found;
 		case FAULT_HARD:
 			faultcount = behind + 1 + ahead;
@@ -1704,8 +1756,14 @@
 	 * A valid page has been found and exclusively busied.  The
 	 * object lock must no longer be held.
 	 */
-	vm_page_assert_xbusied(fs.m);
-	VM_OBJECT_ASSERT_UNLOCKED(fs.object);
+	vm_page_assert_busied(fs.m);
+	if (res != FAULT_SOFT_MSHAREDBUSY) {
+		unlock_object = false;
+		VM_OBJECT_ASSERT_UNLOCKED(fs.object);
+	} else {
+		unlock_object = true;
+		VM_OBJECT_ASSERT_LOCKED(fs.object);
+	}
 
 	/*
 	 * If the page is being written, but isn't already owned by the
@@ -1713,11 +1771,14 @@
 	 * top-level object.
 	 */
 	if (fs.object != fs.first_object) {
+
 		/*
 		 * We only really need to copy if we want to write it.
 		 */
 		if ((fs.fault_type & (VM_PROT_COPY | VM_PROT_WRITE)) != 0) {
-			vm_fault_cow(&fs);
+			vm_fault_cow(&fs, res);
+			unlock_object = false;
+
 			/*
 			 * We only try to prefault read-only mappings to the
 			 * neighboring pages when this copy-on-write fault is
@@ -1731,6 +1792,8 @@
 			fs.prot &= ~VM_PROT_WRITE;
 		}
 	}
+	if (unlock_object)
+		VM_OBJECT_UNLOCK(fs.object);
 
 	/*
 	 * We must verify that the maps have not changed since our last
@@ -1773,7 +1836,7 @@
 	 * Page must be completely valid or it is not fit to
 	 * map into user space.  vm_pager_get_pages() ensures this.
 	 */
-	vm_page_assert_xbusied(fs.m);
+	vm_page_assert_busied(fs.m);
 	KASSERT(vm_page_all_valid(fs.m),
 	    ("vm_fault: page %p partially invalid", fs.m));
 
@@ -1805,7 +1868,10 @@
 		(*fs.m_hold) = fs.m;
 		vm_page_wire(fs.m);
 	}
-	vm_page_xunbusy(fs.m);
+	if (vm_page_xbusied(fs.m))
+		vm_page_xunbusy(fs.m);
+	else
+		vm_page_sunbusy(fs.m);
 	fs.m = NULL;
 
 	/*
@@ -1836,6 +1902,24 @@
 	return (KERN_SUCCESS);
 }
 
+int
+vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
+    int fault_flags, vm_page_t *m_hold)
+{
+	void *cookie;
+	int rv;
+
+	VM_CNT_INC(v_vm_faults);
+
+	if ((curthread->td_pflags & TDP_NOFAULTING) != 0)
+		return (KERN_PROTECTION_FAILURE);
+
+	cookie = rangelock_wlock(&map->fltlock, vaddr, vaddr + PAGE_SIZE);
+	rv = vm_fault_rglocked(map, vaddr, fault_type, fault_flags, m_hold);
+	rangelock_unlock(&map->fltlock, cookie);
+	return (rv);
+}
+
 /*
  * Speed up the reclamation of pages that precede the faulting pindex within
  * the first object of the shadow chain.  Essentially, perform the equivalent
diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h
--- a/sys/vm/vm_map.h
+++ b/sys/vm/vm_map.h
@@ -65,6 +65,7 @@
 #define	_VM_MAP_
 
 #include <sys/lock.h>
+#include <sys/rangelock.h>
 #include <sys/sx.h>
 #include <sys/_mutex.h>
 
@@ -206,6 +207,7 @@
 		struct sx lock;			/* Lock for map data */
 		struct mtx system_mtx;
 	};
+	struct rangelock fltlock;
 	int nentries;			/* Number of entries */
 	vm_size_t size;			/* virtual size */
 	u_int timestamp;		/* Version number */
diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
--- a/sys/vm/vm_map.c
+++ b/sys/vm/vm_map.c
@@ -896,6 +896,7 @@
 	map->timestamp = 0;
 	map->busy = 0;
 	map->anon_loc = 0;
+	rangelock_init(&map->fltlock);
 #ifdef DIAGNOSTIC
 	map->nupdates = 0;
 #endif