Index: sys/vm/swap_pager.c =================================================================== --- sys/vm/swap_pager.c +++ sys/vm/swap_pager.c @@ -690,7 +690,7 @@ * Remove from list right away so lookups will fail if we block for * pageout completion. */ - if (object->handle != NULL) { + if ((object->flags & OBJ_ANON) == 0 && object->handle != NULL) { VM_OBJECT_WUNLOCK(object); sx_xlock(&sw_alloc_sx); TAILQ_REMOVE(NOBJLIST(object->handle), object, @@ -995,7 +995,8 @@ * If destroysource is set, we remove the source object from the * swap_pager internal queue now. */ - if (destroysource && srcobject->handle != NULL) { + if (destroysource && (srcobject->flags & OBJ_ANON) == 0 && + srcobject->handle != NULL) { vm_object_pip_add(srcobject, 1); VM_OBJECT_WUNLOCK(srcobject); vm_object_pip_add(dstobject, 1); @@ -1948,7 +1949,10 @@ object->type = OBJT_SWAP; object->un_pager.swp.writemappings = 0; - KASSERT(object->handle == NULL, ("default pager with handle")); + KASSERT((object->flags & OBJ_ANON) != 0 || + object->handle == NULL, + ("default pager %p with handle %p", + object, object->handle)); } rdpi = rounddown(pindex, SWAP_META_PAGES); Index: sys/vm/vm_fault.c =================================================================== --- sys/vm/vm_fault.c +++ sys/vm/vm_fault.c @@ -1735,11 +1735,12 @@ vm_object_reference(dst_object); } else { /* - * Create the top-level object for the destination entry. (Doesn't - * actually shadow anything - we copy the pages directly.) + * Create the top-level object for the destination entry. + * Doesn't actually shadow anything - we copy the pages + * directly. */ - dst_object = vm_object_allocate_anon( - atop(dst_entry->end - dst_entry->start)); + dst_object = vm_object_allocate_anon(atop(dst_entry->end - + dst_entry->start), NULL, NULL, 0); #if VM_NRESERVLEVEL > 0 dst_object->flags |= OBJ_COLORED; dst_object->pg_color = atop(dst_entry->start); Index: sys/vm/vm_map.c =================================================================== --- sys/vm/vm_map.c +++ sys/vm/vm_map.c @@ -507,8 +507,9 @@ void vm_map_entry_set_vnode_text(vm_map_entry_t entry, bool add) { - vm_object_t object, object1; + vm_object_t object; struct vnode *vp; + bool vp_held; if ((entry->eflags & MAP_ENTRY_VN_EXEC) == 0) return; @@ -516,14 +517,21 @@ ("Submap with execs")); object = entry->object.vm_object; KASSERT(object != NULL, ("No object for text, entry %p", entry)); - VM_OBJECT_RLOCK(object); - while ((object1 = object->backing_object) != NULL) { - VM_OBJECT_RLOCK(object1); - VM_OBJECT_RUNLOCK(object); - object = object1; - } + if ((object->flags & OBJ_ANON) != 0) + object = object->handle; + else + KASSERT(object->backing_object == NULL, + ("non-anon object %p shadows", object)); + KASSERT(object != NULL, ("No content object for text, entry %p obj %p", + entry, entry->object.vm_object)); + /* + * Mostly, we do not lock the backing object. It is + * referenced by the entry we are processing, so it cannot go + * away. + */ vp = NULL; + vp_held = false; if (object->type == OBJT_DEAD) { /* * For OBJT_DEAD objects, v_writecount was handled in @@ -540,8 +548,15 @@ * OBJ_TMPFS_NODE flag set, but not OBJ_TMPFS. In * this case there is no v_writecount to adjust. */ - if ((object->flags & OBJ_TMPFS) != 0) + VM_OBJECT_RLOCK(object); + if ((object->flags & OBJ_TMPFS) != 0) { vp = object->un_pager.swp.swp_tmpfs; + if (vp != NULL) { + vhold(vp); + vp_held = true; + } + } + VM_OBJECT_RUNLOCK(object); } else { KASSERT(0, ("vm_map_entry_set_vnode_text: wrong object type, " @@ -550,17 +565,13 @@ if (vp != NULL) { if (add) { VOP_SET_TEXT_CHECKED(vp); - VM_OBJECT_RUNLOCK(object); } else { - vhold(vp); - VM_OBJECT_RUNLOCK(object); vn_lock(vp, LK_SHARED | LK_RETRY); VOP_UNSET_TEXT_CHECKED(vp); VOP_UNLOCK(vp, 0); - vdrop(vp); } - } else { - VM_OBJECT_RUNLOCK(object); + if (vp_held) + vdrop(vp); } } @@ -2203,14 +2214,11 @@ ("map entry %p has backing object", entry)); KASSERT((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0, ("map entry %p is a submap", entry)); - object = vm_object_allocate_anon(atop(entry->end - entry->start)); + object = vm_object_allocate_anon(atop(entry->end - entry->start), NULL, + entry->cred, entry->end - entry->start); entry->object.vm_object = object; entry->offset = 0; - if (entry->cred != NULL) { - object->cred = entry->cred; - object->charge = entry->end - entry->start; - entry->cred = NULL; - } + entry->cred = NULL; } /* @@ -4071,9 +4079,12 @@ if (old_entry->eflags & MAP_ENTRY_NEEDS_COPY) { vm_object_shadow(&old_entry->object.vm_object, &old_entry->offset, - old_entry->end - old_entry->start); + old_entry->end - old_entry->start, + old_entry->cred, + /* Transfer the second reference too. */ + true); old_entry->eflags &= ~MAP_ENTRY_NEEDS_COPY; - /* Transfer the second reference too. */ + old_entry->cred = NULL; vm_object_reference( old_entry->object.vm_object); @@ -4084,32 +4095,37 @@ */ vm_object_deallocate(object); object = old_entry->object.vm_object; - } - VM_OBJECT_WLOCK(object); - vm_object_clear_flag(object, OBJ_ONEMAPPING); - if (old_entry->cred != NULL) { - KASSERT(object->cred == NULL, ("vmspace_fork both cred")); - object->cred = old_entry->cred; - object->charge = old_entry->end - old_entry->start; - old_entry->cred = NULL; - } + } else { + VM_OBJECT_WLOCK(object); + vm_object_clear_flag(object, OBJ_ONEMAPPING); + if (old_entry->cred != NULL) { + KASSERT(object->cred == NULL, + ("vmspace_fork both cred")); + object->cred = old_entry->cred; + object->charge = old_entry->end - + old_entry->start; + old_entry->cred = NULL; + } - /* - * Assert the correct state of the vnode - * v_writecount while the object is locked, to - * not relock it later for the assertion - * correctness. - */ - if (old_entry->eflags & MAP_ENTRY_WRITECNT && - object->type == OBJT_VNODE) { - KASSERT(((struct vnode *)object->handle)-> - v_writecount > 0, - ("vmspace_fork: v_writecount %p", object)); - KASSERT(object->un_pager.vnp.writemappings > 0, - ("vmspace_fork: vnp.writecount %p", - object)); + /* + * Assert the correct state of the vnode + * v_writecount while the object is locked, to + * not relock it later for the assertion + * correctness. + */ + if (old_entry->eflags & MAP_ENTRY_WRITECNT && + object->type == OBJT_VNODE) { + KASSERT(((struct vnode *)object-> + handle)->v_writecount > 0, + ("vmspace_fork: v_writecount %p", + object)); + KASSERT(object->un_pager.vnp. + writemappings > 0, + ("vmspace_fork: vnp.writecount %p", + object)); + } + VM_OBJECT_WUNLOCK(object); } - VM_OBJECT_WUNLOCK(object); /* * Clone the entry, referencing the shared object. @@ -4737,6 +4753,7 @@ if (*wired) fault_type = entry->protection; size = entry->end - entry->start; + /* * If the entry was copy-on-write, we either ... */ @@ -4773,24 +4790,18 @@ } entry->cred = cred; } - vm_object_shadow(&entry->object.vm_object, - &entry->offset, size); - entry->eflags &= ~MAP_ENTRY_NEEDS_COPY; eobject = entry->object.vm_object; - if (eobject->cred != NULL) { + vm_object_shadow(&entry->object.vm_object, + &entry->offset, size, entry->cred, false); + if (eobject == entry->object.vm_object) { /* * The object was not shadowed. */ swap_release_by_cred(size, entry->cred); crfree(entry->cred); - entry->cred = NULL; - } else if (entry->cred != NULL) { - VM_OBJECT_WLOCK(eobject); - eobject->cred = entry->cred; - eobject->charge = size; - VM_OBJECT_WUNLOCK(eobject); - entry->cred = NULL; } + entry->cred = NULL; + entry->eflags &= ~MAP_ENTRY_NEEDS_COPY; vm_map_lock_downgrade(map); } else { @@ -4805,19 +4816,13 @@ /* * Create an object if necessary. */ - if (entry->object.vm_object == NULL && - !map->system_map) { + if (entry->object.vm_object == NULL && !map->system_map) { if (vm_map_lock_upgrade(map)) goto RetryLookup; - entry->object.vm_object = vm_object_allocate_anon(atop(size)); + entry->object.vm_object = vm_object_allocate_anon(atop(size), + NULL, entry->cred, entry->cred != NULL ? size : 0); entry->offset = 0; - if (entry->cred != NULL) { - VM_OBJECT_WLOCK(entry->object.vm_object); - entry->object.vm_object->cred = entry->cred; - entry->object.vm_object->charge = size; - VM_OBJECT_WUNLOCK(entry->object.vm_object); - entry->cred = NULL; - } + entry->cred = NULL; vm_map_lock_downgrade(map); } Index: sys/vm/vm_object.h =================================================================== --- sys/vm/vm_object.h +++ sys/vm/vm_object.h @@ -341,7 +341,8 @@ extern int umtx_shm_vnobj_persistent; vm_object_t vm_object_allocate (objtype_t, vm_pindex_t); -vm_object_t vm_object_allocate_anon(vm_pindex_t); +vm_object_t vm_object_allocate_anon(vm_pindex_t, vm_object_t, struct ucred *, + vm_size_t); boolean_t vm_object_coalesce(vm_object_t, vm_ooffset_t, vm_size_t, vm_size_t, boolean_t); void vm_object_collapse (vm_object_t); @@ -363,7 +364,8 @@ void vm_object_reference (vm_object_t); void vm_object_reference_locked(vm_object_t); int vm_object_set_memattr(vm_object_t object, vm_memattr_t memattr); -void vm_object_shadow (vm_object_t *, vm_ooffset_t *, vm_size_t); +void vm_object_shadow(vm_object_t *, vm_ooffset_t *, vm_size_t, struct ucred *, + bool); void vm_object_split(vm_map_entry_t); boolean_t vm_object_sync(vm_object_t, vm_ooffset_t, vm_size_t, boolean_t, boolean_t); Index: sys/vm/vm_object.c =================================================================== --- sys/vm/vm_object.c +++ sys/vm/vm_object.c @@ -241,7 +241,7 @@ static void _vm_object_allocate(objtype_t type, vm_pindex_t size, u_short flags, - vm_object_t object) + vm_object_t object, void *handle) { TAILQ_INIT(&object->memq); @@ -268,7 +268,7 @@ object->memattr = VM_MEMATTR_DEFAULT; object->cred = NULL; object->charge = 0; - object->handle = NULL; + object->handle = handle; object->backing_object = NULL; object->backing_object_offset = (vm_ooffset_t) 0; #if VM_NRESERVLEVEL > 0 @@ -290,7 +290,7 @@ rw_init(&kernel_object->lock, "kernel vm object"); _vm_object_allocate(OBJT_PHYS, atop(VM_MAX_KERNEL_ADDRESS - - VM_MIN_KERNEL_ADDRESS), OBJ_UNMANAGED, kernel_object); + VM_MIN_KERNEL_ADDRESS), OBJ_UNMANAGED, kernel_object, NULL); #if VM_NRESERVLEVEL > 0 kernel_object->flags |= OBJ_COLORED; kernel_object->pg_color = (u_short)atop(VM_MIN_KERNEL_ADDRESS); @@ -434,7 +434,7 @@ panic("vm_object_allocate: type %d is undefined", type); } object = (vm_object_t)uma_zalloc(obj_zone, M_WAITOK); - _vm_object_allocate(type, size, flags, object); + _vm_object_allocate(type, size, flags, object, NULL); return (object); } @@ -447,14 +447,22 @@ * to be initialized by the caller. */ vm_object_t -vm_object_allocate_anon(vm_pindex_t size) +vm_object_allocate_anon(vm_pindex_t size, vm_object_t backing_object, + struct ucred *cred, vm_size_t charge) { - vm_object_t object; - - object = (vm_object_t)uma_zalloc(obj_zone, M_WAITOK); + vm_object_t handle, object; + + if (backing_object == NULL) + handle = NULL; + else if ((backing_object->flags & OBJ_ANON) != 0) + handle = backing_object->handle; + else + handle = backing_object; + object = uma_zalloc(obj_zone, M_WAITOK); _vm_object_allocate(OBJT_DEFAULT, size, OBJ_ANON | OBJ_ONEMAPPING, - object); - + object, handle); + object->cred = cred; + object->charge = cred != NULL ? charge : 0; return (object); } @@ -1297,10 +1305,8 @@ * are returned in the source parameters. */ void -vm_object_shadow( - vm_object_t *object, /* IN/OUT */ - vm_ooffset_t *offset, /* IN/OUT */ - vm_size_t length) +vm_object_shadow(vm_object_t *object, vm_ooffset_t *offset, vm_size_t length, + struct ucred *cred, bool shared) { vm_object_t source; vm_object_t result; @@ -1322,7 +1328,7 @@ /* * Allocate a new object with the given length. */ - result = vm_object_allocate_anon(atop(length)); + result = vm_object_allocate_anon(atop(length), source, cred, length); /* * Store the offset into the source object, and fix up the offset into @@ -1330,25 +1336,37 @@ */ result->backing_object_offset = *offset; - /* - * The new object shadows the source object, adding a reference to it. - * Our caller changes his reference to point to the new object, - * removing a reference to the source object. Net result: no change - * of reference count. - * - * Try to optimize the result object's page color when shadowing - * in order to maintain page coloring consistency in the combined - * shadowed object. - */ - if (source != NULL) { + if (shared || source != NULL) { VM_OBJECT_WLOCK(result); - vm_object_backing_insert(result, source); - result->domain = source->domain; + + /* + * The new object shadows the source object, adding a + * reference to it. Our caller changes his reference + * to point to the new object, removing a reference to + * the source object. Net result: no change of + * reference count, unless the caller needs to add one + * more reference due to forking a shared map entry. + */ + if (shared) { + vm_object_reference_locked(result); + vm_object_clear_flag(result, OBJ_ONEMAPPING); + } + + /* + * Try to optimize the result object's page color when + * shadowing in order to maintain page coloring + * consistency in the combined shadowed object. + */ + if (source != NULL) { + vm_object_backing_insert(result, source); + result->domain = source->domain; #if VM_NRESERVLEVEL > 0 - result->flags |= source->flags & OBJ_COLORED; - result->pg_color = (source->pg_color + OFF_TO_IDX(*offset)) & - ((1 << (VM_NFREEORDER - 1)) - 1); + result->flags |= source->flags & OBJ_COLORED; + result->pg_color = (source->pg_color + + OFF_TO_IDX(*offset)) & ((1 << (VM_NFREEORDER - + 1)) - 1); #endif + } VM_OBJECT_WUNLOCK(result); } @@ -1388,7 +1406,8 @@ * If swap_pager_copy() is later called, it will convert new_object * into a swap object. */ - new_object = vm_object_allocate_anon(size); + new_object = vm_object_allocate_anon(size, orig_object, + orig_object->cred, ptoa(size)); /* * At this point, the new object is still private, so the order in @@ -1405,6 +1424,7 @@ VM_OBJECT_WUNLOCK(source); VM_OBJECT_WUNLOCK(orig_object); VM_OBJECT_WUNLOCK(new_object); + new_object->cred = NULL; vm_object_deallocate(new_object); VM_OBJECT_WLOCK(orig_object); return; @@ -1421,9 +1441,7 @@ orig_object->backing_object_offset + entry->offset; } if (orig_object->cred != NULL) { - new_object->cred = orig_object->cred; crhold(orig_object->cred); - new_object->charge = ptoa(size); KASSERT(orig_object->charge >= ptoa(size), ("orig_object->charge < 0")); orig_object->charge -= ptoa(size);