Index: sys/vm/vm_fault.c =================================================================== --- sys/vm/vm_fault.c +++ sys/vm/vm_fault.c @@ -123,6 +123,7 @@ vm_map_t map; vm_map_entry_t entry; int lookup_still_valid; + int ra_done; struct vnode *vp; }; @@ -137,7 +138,12 @@ vm_page_xunbusy(fs->m); vm_page_lock(fs->m); - vm_page_deactivate(fs->m); + if (fs->m->valid == 0) { + if (fs->m->wire_count == 0) + vm_page_free(fs->m); + } else { + vm_page_deactivate(fs->m); + } vm_page_unlock(fs->m); fs->m = NULL; } @@ -292,13 +298,15 @@ struct faultstate fs; struct vnode *vp; vm_page_t m; - int ahead, behind, cluster_offset, error, locked; + int ahead, behind, cluster_offset, dead, error, locked, rv; + u_char behavior; hardfault = 0; growstack = TRUE; PCPU_INC(cnt.v_vm_faults); fs.vp = NULL; faultcount = 0; + fs.ra_done = FALSE; RetryFault:; @@ -412,7 +420,7 @@ fs.lookup_still_valid = TRUE; - fs.first_m = NULL; + fs.m = fs.first_m = NULL; /* * Search for the page at object/offset. @@ -421,11 +429,20 @@ fs.pindex = fs.first_pindex; while (TRUE) { /* - * If the object is dead, we stop here + * If the object is marked for imminent termination, + * we retry here, since the collapse pass has raced + * with us. Otherwise, if we see terminally dead + * object, return fail. */ - if (fs.object->flags & OBJ_DEAD) { + if ((fs.object->flags & OBJ_DEAD) != 0) { + dead = fs.object->type == OBJT_DEAD; + if (fs.m != NULL && fs.m != fs.first_m) + release_page(&fs); unlock_and_deallocate(&fs); - return (KERN_PROTECTION_FAILURE); + if (dead) + return (KERN_PROTECTION_FAILURE); + pause("vmf_de", 1); + goto RetryFault; } /* @@ -550,9 +567,18 @@ * at the same time. */ if (fs.object->type != OBJT_DEFAULT) { - int rv; - u_char behavior = vm_map_entry_behavior(fs.entry); - + if (!fs.lookup_still_valid) { + locked = vm_map_trylock_read(fs.map); + if (locked) + fs.lookup_still_valid = TRUE; + if (!locked || fs.map->timestamp != + map_generation) { + release_page(&fs); + unlock_and_deallocate(&fs); + goto RetryFault; + } + } + behavior = vm_map_entry_behavior(fs.entry); era = fs.entry->read_ahead; if (behavior == MAP_ENTRY_BEHAV_RANDOM || P_KILLED(curproc)) { @@ -563,7 +589,8 @@ behind = 0; nera = VM_FAULT_READ_AHEAD_MAX; ahead = nera; - if (fs.pindex == fs.entry->next_read) + if (fs.pindex == fs.entry->next_read && + !fs.ra_done) vm_fault_dontneed(&fs, vaddr, ahead); } else if (fs.pindex == fs.entry->next_read) { /* @@ -574,14 +601,17 @@ * x (read ahead min + 1) + read ahead min" */ behind = 0; - nera = VM_FAULT_READ_AHEAD_MIN; - if (era > 0) { - nera += era + 1; - if (nera > VM_FAULT_READ_AHEAD_MAX) - nera = VM_FAULT_READ_AHEAD_MAX; + if (!fs.ra_done) { + if (era > 0) { + nera += era + 1; + if (nera > VM_FAULT_READ_AHEAD_MAX) + nera = VM_FAULT_READ_AHEAD_MAX; + } else + nera = VM_FAULT_READ_AHEAD_MIN; } ahead = nera; - if (era == VM_FAULT_READ_AHEAD_MAX) + if (era == VM_FAULT_READ_AHEAD_MAX && + !fs.ra_done) vm_fault_dontneed(&fs, vaddr, ahead); } else { /* @@ -603,7 +633,18 @@ } ahead = ulmin(ahead, atop(fs.entry->end - vaddr) - 1); if (era != nera) + /* + * Only read-lock on map is held + * there. It is fine for other thread + * faulting on the same entry to race + * with us for this update, causing + * some inaccuracy in the read-ahead + * heuristic. We do not separate two + * different streams of sequential + * faults on one entry anyway. + */ fs.entry->read_ahead = nera; + fs.ra_done = TRUE; /* * Call the pager to retrieve the data, if any, after @@ -924,8 +965,10 @@ * If the page was filled by a pager, update the map entry's * last read offset. * - * XXX The following assignment modifies the map - * without holding a write lock on it. + * The next_read assignment modifies the map without holding a + * write lock on it, which is acceptable. See the motivation + * in the comment above, before updating the entry->read_ahead + * field in the same manner. */ if (hardfault) fs.entry->next_read = fs.pindex + ahead + 1; Index: sys/vm/vm_meter.c =================================================================== --- sys/vm/vm_meter.c +++ sys/vm/vm_meter.c @@ -108,7 +108,15 @@ mtx_lock(&vm_object_list_mtx); TAILQ_FOREACH(object, &vm_object_list, object_list) { VM_OBJECT_WLOCK(object); - vm_object_clear_flag(object, OBJ_ACTIVE); + /* + * Do not modify the object if _vm_object_allocate() + * could be executed in parallel. OBJ_DEAD is set + * during object termination and reset on + * (re-)allocation. Since object allocation runs + * unlocked, we must avoid tampering with the memory. + */ + if ((object->flags & OBJ_DEAD) == 0) + vm_object_clear_flag(object, OBJ_ACTIVE); VM_OBJECT_WUNLOCK(object); } mtx_unlock(&vm_object_list_mtx); @@ -169,7 +177,8 @@ (object = entry->object.vm_object) == NULL) continue; VM_OBJECT_WLOCK(object); - vm_object_set_flag(object, OBJ_ACTIVE); + if ((object->flags & OBJ_DEAD) == 0) + vm_object_set_flag(object, OBJ_ACTIVE); paging |= object->paging_in_progress; VM_OBJECT_WUNLOCK(object); } Index: sys/vm/vm_object.c =================================================================== --- sys/vm/vm_object.c +++ sys/vm/vm_object.c @@ -206,6 +206,7 @@ /* These are true for any object that has been freed */ object->type = OBJT_DEAD; + object->flags = OBJ_DEAD; object->ref_count = 0; object->rtree.rt_root = 0; object->rtree.rt_flags = 0; @@ -1584,7 +1585,7 @@ continue; } - KASSERT(pp == NULL || pp->valid != 0, + KASSERT(pp == NULL || pp->wire_count > 0 || pp->valid != 0, ("unbusy invalid page %p", pp)); if (pp != NULL || vm_pager_has_page(object, new_pindex, NULL, Index: sys/vm/vnode_pager.c =================================================================== --- sys/vm/vnode_pager.c +++ sys/vm/vnode_pager.c @@ -169,10 +169,16 @@ /* * don't double-terminate the object */ - if ((obj->flags & OBJ_DEAD) == 0) + if ((obj->flags & OBJ_DEAD) == 0) { vm_object_terminate(obj); - else + } else { + if ((obj->flags & OBJ_DISCONNECTWNT) != 0) { + vm_object_clear_flag(obj, OBJ_DISCONNECTWNT); + wakeup(obj); + } + vp->v_object = NULL; VM_OBJECT_WUNLOCK(obj); + } } else { /* * Woe to the process that tries to page now :-). @@ -180,7 +186,7 @@ vm_pager_deallocate(obj); VM_OBJECT_WUNLOCK(obj); } - vp->v_object = NULL; + KASSERT(vp->v_object == NULL, ("vp %p obj %p", vp, vp->v_object)); }