Index: sys/vm/vm_fault.c =================================================================== --- sys/vm/vm_fault.c +++ sys/vm/vm_fault.c @@ -334,16 +334,18 @@ #endif rv = pmap_enter(fs->map->pmap, vaddr, m_map, prot, fault_type | PMAP_ENTER_NOSLEEP | (wired ? PMAP_ENTER_WIRED : 0), psind); + if (rv == KERN_SUCCESS) { + if (psind == 0 && !wired) + vm_fault_prefault(fs, vaddr, PFBAK, PFFOR, true); + vm_fault_dirty(fs->entry, m, prot, fault_type, fault_flags); + } + VM_OBJECT_RUNLOCK(fs->first_object); if (rv != KERN_SUCCESS) goto out; if (m_hold != NULL) { *m_hold = m; vm_page_wire(m); } - vm_fault_dirty(fs->entry, m, prot, fault_type, fault_flags); - if (psind == 0 && !wired) - vm_fault_prefault(fs, vaddr, PFBAK, PFFOR, true); - VM_OBJECT_RUNLOCK(fs->first_object); vm_map_lookup_done(fs->map, fs->entry); curthread->td_ru.ru_minflt++; @@ -630,7 +632,7 @@ } static int -vm_fault_lock_vnode(struct faultstate *fs) +vm_fault_lock_vnode(struct faultstate *fs, bool objlocked) { struct vnode *vp; int error, locked; @@ -666,7 +668,10 @@ } vhold(vp); - unlock_and_deallocate(fs); + if (objlocked) + unlock_and_deallocate(fs); + else + fault_deallocate(fs); error = vget(vp, locked | LK_RETRY | LK_CANRECURSE, curthread); vdrop(vp); fs->vp = vp; @@ -861,9 +866,11 @@ */ if (!vm_page_all_valid(fs.m)) goto readrest; - break; /* break to PAGE HAS BEEN FOUND */ + VM_OBJECT_WUNLOCK(fs.object); + break; /* break to PAGE HAS BEEN FOUND. */ } KASSERT(fs.m == NULL, ("fs.m should be NULL, not %p", fs.m)); + VM_OBJECT_ASSERT_WLOCKED(fs.object); /* * Page is not resident. If the pager might contain the page @@ -874,7 +881,7 @@ if (fs.object->type != OBJT_DEFAULT || fs.object == fs.first_object) { if ((fs.object->flags & OBJ_SIZEVNLOCK) != 0) { - rv = vm_fault_lock_vnode(&fs); + rv = vm_fault_lock_vnode(&fs, true); MPASS(rv == KERN_SUCCESS || rv == KERN_RESOURCE_SHORTAGE); if (rv == KERN_RESOURCE_SHORTAGE) @@ -953,13 +960,24 @@ } readrest: + /* + * Default objects have no pager so no exclusive busy exists + * to protect this page in the chain. Skip to the next + * object without dropping the lock to preserve atomicity of + * shadow faults. + */ + if (fs.object->type == OBJT_DEFAULT) + goto next; + /* * At this point, we have either allocated a new page or found * an existing page that is only partially valid. * * We hold a reference on the current object and the page is - * exclusive busied. + * exclusive busied. The exclusive busy prevents simultaneous + * faults and collapses while the object lock is dropped. */ + VM_OBJECT_WUNLOCK(fs.object); /* * If the pager for the current object might have the page, @@ -970,8 +988,7 @@ * have the page, the number of additional pages to read will * apply to subsequent objects in the shadow chain. */ - if (fs.object->type != OBJT_DEFAULT && nera == -1 && - !P_KILLED(curproc)) { + if (nera == -1 && !P_KILLED(curproc)) { KASSERT(fs.lookup_still_valid, ("map unlocked")); era = fs.entry->read_ahead; behavior = vm_map_entry_behavior(fs.entry); @@ -1037,7 +1054,7 @@ */ unlock_map(&fs); - rv = vm_fault_lock_vnode(&fs); + rv = vm_fault_lock_vnode(&fs, false); MPASS(rv == KERN_SUCCESS || rv == KERN_RESOURCE_SHORTAGE); if (rv == KERN_RESOURCE_SHORTAGE) @@ -1078,15 +1095,14 @@ } ahead = ulmin(ahead, atop(e_end - vaddr) - 1); } - VM_OBJECT_WUNLOCK(fs.object); rv = vm_pager_get_pages(fs.object, &fs.m, 1, &behind, &ahead); - VM_OBJECT_WLOCK(fs.object); if (rv == VM_PAGER_OK) { faultcount = behind + 1 + ahead; hardfault = true; - break; /* break to PAGE HAS BEEN FOUND */ + break; /* break to PAGE HAS BEEN FOUND. */ } + VM_OBJECT_WLOCK(fs.object); if (rv == VM_PAGER_ERROR) printf("vm_fault: pager read error, pid %d (%s)\n", curproc->p_pid, curproc->p_comm); @@ -1104,6 +1120,7 @@ } +next: /* * The requested page does not exist at this object/ * offset. Remove the invalid page from the object, @@ -1124,19 +1141,18 @@ * Move on to the next object. Lock the next object before * unlocking the current one. */ + VM_OBJECT_ASSERT_WLOCKED(fs.object); next_object = fs.object->backing_object; if (next_object == NULL) { /* * If there's no object left, fill the page in the top * object with zeros. */ + VM_OBJECT_WUNLOCK(fs.object); if (fs.object != fs.first_object) { vm_object_pip_wakeup(fs.object); - VM_OBJECT_WUNLOCK(fs.object); - fs.object = fs.first_object; fs.pindex = fs.first_pindex; - VM_OBJECT_WLOCK(fs.object); } MPASS(fs.first_m != NULL); MPASS(fs.m == NULL); @@ -1155,7 +1171,7 @@ vm_page_valid(fs.m); /* Don't try to prefault neighboring pages. */ faultcount = 1; - break; /* break to PAGE HAS BEEN FOUND */ + break; /* break to PAGE HAS BEEN FOUND. */ } else { MPASS(fs.first_m != NULL); KASSERT(fs.object != next_object, @@ -1171,12 +1187,12 @@ } } - vm_page_assert_xbusied(fs.m); - /* - * PAGE HAS BEEN FOUND. [Loop invariant still holds -- the object lock - * is held.] + * PAGE HAS BEEN FOUND. A valid page has been found and exclusively + * busied. The object lock must no longer be held. */ + vm_page_assert_xbusied(fs.m); + VM_OBJECT_ASSERT_UNLOCKED(fs.object); /* * If the page is being written, but isn't already owned by the @@ -1200,27 +1216,28 @@ */ is_first_object_locked = false; if ( - /* - * Only one shadow object - */ - (fs.object->shadow_count == 1) && - /* - * No COW refs, except us - */ - (fs.object->ref_count == 1) && - /* - * No one else can look this object up - */ - (fs.object->handle == NULL) && - /* - * No other ways to look the object up - */ - ((fs.object->flags & OBJ_ANON) != 0) && + /* + * Only one shadow object + */ + fs.object->shadow_count == 1 && + /* + * No COW refs, except us + */ + fs.object->ref_count == 1 && + /* + * No one else can look this object up + */ + fs.object->handle == NULL && + /* + * No other ways to look the object up + */ + (fs.object->flags & OBJ_ANON) != 0 && (is_first_object_locked = VM_OBJECT_TRYWLOCK(fs.first_object)) && - /* - * We don't chase down the shadow chain - */ - fs.object == fs.first_object->backing_object) { + /* + * We don't chase down the shadow chain + */ + fs.object == fs.first_object->backing_object && + VM_OBJECT_TRYWLOCK(fs.object)) { /* * Remove but keep xbusy for replace. fs.m is @@ -1240,11 +1257,13 @@ fs.first_object->backing_object_offset)); #endif VM_OBJECT_WUNLOCK(fs.object); + VM_OBJECT_WUNLOCK(fs.first_object); fs.first_m = fs.m; fs.m = NULL; VM_CNT_INC(v_cow_optim); } else { - VM_OBJECT_WUNLOCK(fs.object); + if (is_first_object_locked) + VM_OBJECT_WUNLOCK(fs.first_object); /* * Oh, well, lets copy it. */ @@ -1281,8 +1300,6 @@ fs.object = fs.first_object; fs.pindex = fs.first_pindex; fs.m = fs.first_m; - if (!is_first_object_locked) - VM_OBJECT_WLOCK(fs.object); VM_CNT_INC(v_cow_faults); curthread->td_cow++; } else { @@ -1296,7 +1313,7 @@ */ if (!fs.lookup_still_valid) { if (!vm_map_trylock_read(fs.map)) { - unlock_and_deallocate(&fs); + fault_deallocate(&fs); goto RetryFault; } fs.lookup_still_valid = true; @@ -1310,7 +1327,7 @@ * pageout will grab it eventually. */ if (result != KERN_SUCCESS) { - unlock_and_deallocate(&fs); + fault_deallocate(&fs); /* * If retry of map lookup would have blocked then @@ -1322,7 +1339,7 @@ } if ((retry_object != fs.first_object) || (retry_pindex != fs.first_pindex)) { - unlock_and_deallocate(&fs); + fault_deallocate(&fs); goto RetryFault; } @@ -1337,7 +1354,7 @@ prot &= retry_prot; fault_type &= retry_prot; if (prot == 0) { - unlock_and_deallocate(&fs); + fault_deallocate(&fs); goto RetryFault; } @@ -1346,6 +1363,7 @@ ("!wired && VM_FAULT_WIRE")); } } + VM_OBJECT_ASSERT_UNLOCKED(fs.object); /* * If the page was filled by a pager, save the virtual address that @@ -1356,16 +1374,15 @@ if (hardfault) fs.entry->next_read = vaddr + ptoa(ahead) + PAGE_SIZE; - vm_page_assert_xbusied(fs.m); - vm_fault_dirty(fs.entry, fs.m, prot, fault_type, fault_flags); - /* * Page must be completely valid or it is not fit to * map into user space. vm_pager_get_pages() ensures this. */ + vm_page_assert_xbusied(fs.m); KASSERT(vm_page_all_valid(fs.m), ("vm_fault: page %p partially invalid", fs.m)); - VM_OBJECT_WUNLOCK(fs.object); + + vm_fault_dirty(fs.entry, fs.m, prot, fault_type, fault_flags); /* * Put this page into the physical map. We had to do the unlock above @@ -1447,17 +1464,11 @@ vm_size_t size; object = fs->object; - VM_OBJECT_ASSERT_WLOCKED(object); + VM_OBJECT_ASSERT_UNLOCKED(object); first_object = fs->first_object; - if (first_object != object) { - if (!VM_OBJECT_TRYWLOCK(first_object)) { - VM_OBJECT_WUNLOCK(object); - VM_OBJECT_WLOCK(first_object); - VM_OBJECT_WLOCK(object); - } - } /* Neither fictitious nor unmanaged pages can be reclaimed. */ if ((first_object->flags & (OBJ_FICTITIOUS | OBJ_UNMANAGED)) == 0) { + VM_OBJECT_RLOCK(first_object); size = VM_FAULT_DONTNEED_MIN; if (MAXPAGESIZES > 1 && size < pagesizes[1]) size = pagesizes[1]; @@ -1497,9 +1508,8 @@ vm_page_deactivate(m); } } + VM_OBJECT_RUNLOCK(first_object); } - if (first_object != object) - VM_OBJECT_WUNLOCK(first_object); } /*