Page MenuHomeFreeBSD

D54572.id169499.diff
No OneTemporary

D54572.id169499.diff

diff --git a/share/man/man7/tuning.7 b/share/man/man7/tuning.7
--- a/share/man/man7/tuning.7
+++ b/share/man/man7/tuning.7
@@ -222,6 +222,20 @@
.Va vm.stats.vm.v_wire_count
sysctls, respectively).
.Pp
+Due to the architecture of the
+.Fx
+virtual memory subsystem, the use of copy on write (CoW) anonymous
+memory, e.g. on
+.Xr fork 2 ,
+causes swap reservation for all three regions (VM objects),
+as in the original pre-fork mapping, and its copies in
+the parent and child, instead of only two.
+Eventually the subsystem tries to optimize the internal layout
+of the tracking for CoW and often removes (collapses) no longer
+needed backing objects, re-assigning its pages and swap
+reservations to the copies.
+This frees the swap reserve, but it is not guaranteed to happen.
+.Pp
The
.Va kern.ipc.maxpipekva
loader tunable is used to set a hard limit on the
diff --git a/sys/dev/md/md.c b/sys/dev/md/md.c
--- a/sys/dev/md/md.c
+++ b/sys/dev/md/md.c
@@ -1617,7 +1617,6 @@
0, 0);
swap_release_by_cred(IDX_TO_OFF(oldpages -
newpages), sc->cred);
- sc->s_swap.object->charge = IDX_TO_OFF(newpages);
sc->s_swap.object->size = newpages;
VM_OBJECT_WUNLOCK(sc->s_swap.object);
} else if (newpages > oldpages) {
@@ -1637,7 +1636,6 @@
}
}
VM_OBJECT_WLOCK(sc->s_swap.object);
- sc->s_swap.object->charge = IDX_TO_OFF(newpages);
sc->s_swap.object->size = newpages;
VM_OBJECT_WUNLOCK(sc->s_swap.object);
}
diff --git a/sys/kern/uipc_shm.c b/sys/kern/uipc_shm.c
--- a/sys/kern/uipc_shm.c
+++ b/sys/kern/uipc_shm.c
@@ -759,7 +759,6 @@
/* Free the swap accounted for shm */
swap_release_by_cred(delta, object->cred);
- object->charge -= delta;
} else {
if ((shmfd->shm_seals & F_SEAL_GROW) != 0)
return (EPERM);
@@ -768,7 +767,6 @@
delta = IDX_TO_OFF(nobjsize - object->size);
if (!swap_reserve_by_cred(delta, object->cred))
return (ENOMEM);
- object->charge += delta;
}
shmfd->shm_size = length;
mtx_lock(&shm_timestamp_lock);
diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c
--- a/sys/vm/swap_pager.c
+++ b/sys/vm/swap_pager.c
@@ -239,7 +239,8 @@
#ifdef INVARIANTS
prev = atomic_fetchadd_long(&uip->ui_vmsize, -pdecr);
KASSERT(prev >= pdecr,
- ("negative vmsize for uid %d\n", uip->ui_uid));
+ ("negative vmsize for uid %d, prev %#jx decr %#jx\n",
+ uip->ui_uid, (uintmax_t)prev, (uintmax_t)pdecr));
#else
atomic_subtract_long(&uip->ui_vmsize, pdecr);
#endif
@@ -329,7 +330,7 @@
}
void
-swap_reserve_force(vm_ooffset_t incr)
+swap_reserve_force_by_cred(vm_ooffset_t incr, struct ucred *cred)
{
u_long pincr;
@@ -345,7 +346,13 @@
#endif
pincr = atop(incr);
atomic_add_long(&swap_reserved, pincr);
- swap_reserve_force_rlimit(pincr, curthread->td_ucred);
+ swap_reserve_force_rlimit(pincr, cred);
+}
+
+void
+swap_reserve_force(vm_ooffset_t incr)
+{
+ swap_reserve_force_by_cred(incr, curthread->td_ucred);
}
void
@@ -373,7 +380,8 @@
pdecr = atop(decr);
#ifdef INVARIANTS
prev = atomic_fetchadd_long(&swap_reserved, -pdecr);
- KASSERT(prev >= pdecr, ("swap_reserved < decr"));
+ KASSERT(prev >= pdecr, ("swap_reserved %#jx < decr %#jx",
+ (uintmax_t)prev, (uintmax_t)pdecr));
#else
atomic_subtract_long(&swap_reserved, pdecr);
#endif
@@ -776,10 +784,7 @@
object->un_pager.swp.writemappings = 0;
object->handle = handle;
- if (cred != NULL) {
- object->cred = cred;
- object->charge = size;
- }
+ object->cred = cred;
return (true);
}
@@ -892,8 +897,7 @@
* Release the allocation charge.
*/
if (object->cred != NULL) {
- swap_release_by_cred(object->charge, object->cred);
- object->charge = 0;
+ swap_release_by_cred(ptoa(object->size), object->cred);
crfree(object->cred);
object->cred = NULL;
}
diff --git a/sys/vm/vm.h b/sys/vm/vm.h
--- a/sys/vm/vm.h
+++ b/sys/vm/vm.h
@@ -168,6 +168,7 @@
bool swap_reserve(vm_ooffset_t incr);
bool swap_reserve_by_cred(vm_ooffset_t incr, struct ucred *cred);
void swap_reserve_force(vm_ooffset_t incr);
+void swap_reserve_force_by_cred(vm_ooffset_t incr, struct ucred *cred);
void swap_release(vm_ooffset_t decr);
void swap_release_by_cred(vm_ooffset_t decr, struct ucred *cred);
diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c
--- a/sys/vm/vm_fault.c
+++ b/sys/vm/vm_fault.c
@@ -2310,13 +2310,12 @@
* directly.
*/
dst_object = vm_object_allocate_anon(atop(dst_entry->end -
- dst_entry->start), NULL, NULL, 0);
+ dst_entry->start), NULL, NULL);
#if VM_NRESERVLEVEL > 0
dst_object->flags |= OBJ_COLORED;
dst_object->pg_color = atop(dst_entry->start);
#endif
dst_object->domain = src_object->domain;
- dst_object->charge = dst_entry->end - dst_entry->start;
dst_entry->object.vm_object = dst_object;
dst_entry->offset = 0;
@@ -2329,7 +2328,7 @@
("vm_fault_copy_entry: leaked swp charge"));
dst_object->cred = curthread->td_ucred;
crhold(dst_object->cred);
- *fork_charge += dst_object->charge;
+ *fork_charge += ptoa(dst_object->size);
} else if ((dst_object->flags & OBJ_SWAP) != 0 &&
dst_object->cred == NULL) {
KASSERT(dst_entry->cred != NULL, ("no cred for entry %p",
diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
--- a/sys/vm/vm_map.c
+++ b/sys/vm/vm_map.c
@@ -2428,7 +2428,7 @@
KASSERT((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0,
("map entry %p is a submap", entry));
object = vm_object_allocate_anon(atop(entry->end - entry->start), NULL,
- entry->cred, entry->end - entry->start);
+ entry->cred);
entry->object.vm_object = object;
entry->offset = 0;
entry->cred = NULL;
@@ -2443,21 +2443,26 @@
static inline void
vm_map_entry_charge_object(vm_map_t map, vm_map_entry_t entry)
{
+ vm_object_t object;
VM_MAP_ASSERT_LOCKED(map);
KASSERT((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0,
("map entry %p is a submap", entry));
- if (entry->object.vm_object == NULL && !vm_map_is_system(map) &&
+ object = entry->object.vm_object;
+ if (object == NULL && !vm_map_is_system(map) &&
(entry->eflags & MAP_ENTRY_GUARD) == 0)
vm_map_entry_back(entry);
- else if (entry->object.vm_object != NULL &&
+ else if (object != NULL &&
((entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) &&
entry->cred != NULL) {
- VM_OBJECT_WLOCK(entry->object.vm_object);
- KASSERT(entry->object.vm_object->cred == NULL,
+ VM_OBJECT_WLOCK(object);
+ KASSERT(object->cred == NULL,
("OVERCOMMIT: %s: both cred e %p", __func__, entry));
- entry->object.vm_object->cred = entry->cred;
- entry->object.vm_object->charge = entry->end - entry->start;
+ object->cred = entry->cred;
+ if (entry->end - entry->start < ptoa(object->size)) {
+ swap_reserve_force_by_cred(ptoa(object->size) -
+ entry->end + entry->start, object->cred);
+ }
VM_OBJECT_WUNLOCK(entry->object.vm_object);
entry->cred = NULL;
}
@@ -2956,7 +2961,7 @@
* we cannot distinguish between non-charged and
* charged clipped mapping of the same object later.
*/
- KASSERT(obj->charge == 0,
+ KASSERT(obj->cred == NULL,
("vm_map_protect: object %p overcharged (entry %p)",
obj, entry));
if (!swap_reserve(ptoa(obj->size))) {
@@ -2968,7 +2973,6 @@
crhold(cred);
obj->cred = cred;
- obj->charge = ptoa(obj->size);
VM_OBJECT_WUNLOCK(obj);
}
@@ -3942,7 +3946,7 @@
vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry)
{
vm_object_t object;
- vm_pindex_t offidxstart, offidxend, size1;
+ vm_pindex_t offidxstart, offidxend, oldsize;
vm_size_t size;
vm_map_entry_unlink(map, entry, UNLINK_MERGE_NONE);
@@ -3989,15 +3993,11 @@
OBJPR_NOTMAPPED);
if (offidxend >= object->size &&
offidxstart < object->size) {
- size1 = object->size;
+ oldsize = object->size;
object->size = offidxstart;
if (object->cred != NULL) {
- size1 -= object->size;
- KASSERT(object->charge >= ptoa(size1),
- ("object %p charge < 0", object));
- swap_release_by_cred(ptoa(size1),
- object->cred);
- object->charge -= ptoa(size1);
+ swap_release_by_cred(ptoa(oldsize -
+ ptoa(object->size)), object->cred);
}
}
}
@@ -4198,7 +4198,7 @@
("OVERCOMMIT: vm_map_copy_anon_entry: cred %p",
src_object));
src_object->cred = src_entry->cred;
- src_object->charge = size;
+ *fork_charge += ptoa(src_object->size) - size;
}
dst_entry->object.vm_object = src_object;
if (charged) {
@@ -4455,7 +4455,7 @@
KASSERT(object->cred == NULL,
("vmspace_fork both cred"));
object->cred = old_entry->cred;
- object->charge = old_entry->end -
+ *fork_charge += old_entry->end -
old_entry->start;
old_entry->cred = NULL;
}
@@ -4957,6 +4957,13 @@
if (newvmspace == NULL)
return (ENOMEM);
if (!swap_reserve_by_cred(fork_charge, p->p_ucred)) {
+ /*
+ * The swap reservation failed. The accounting from
+ * the entries of the copied newvmspace will be
+ * subtracted in vmspace_free(), so force the
+ * reservation there.
+ */
+ swap_reserve_force_by_cred(fork_charge, p->p_ucred);
vmspace_free(newvmspace);
return (ENOMEM);
}
@@ -5138,7 +5145,7 @@
if (vm_map_lock_upgrade(map))
goto RetryLookup;
entry->object.vm_object = vm_object_allocate_anon(atop(size),
- NULL, entry->cred, size);
+ NULL, entry->cred);
entry->offset = 0;
entry->cred = NULL;
vm_map_lock_downgrade(map);
@@ -5396,9 +5403,8 @@
(void *)entry->object.vm_object,
(uintmax_t)entry->offset);
if (entry->object.vm_object && entry->object.vm_object->cred)
- db_printf(", obj ruid %d charge %jx",
- entry->object.vm_object->cred->cr_ruid,
- (uintmax_t)entry->object.vm_object->charge);
+ db_printf(", obj ruid %d ",
+ entry->object.vm_object->cred->cr_ruid);
if (entry->eflags & MAP_ENTRY_COW)
db_printf(", copy (%s)",
(entry->eflags & MAP_ENTRY_NEEDS_COPY) ? "needed" : "done");
diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h
--- a/sys/vm/vm_object.h
+++ b/sys/vm/vm_object.h
@@ -175,7 +175,6 @@
} phys;
} un_pager;
struct ucred *cred;
- vm_ooffset_t charge;
void *umtx_data;
};
@@ -356,8 +355,7 @@
extern int umtx_shm_vnobj_persistent;
vm_object_t vm_object_allocate (objtype_t, vm_pindex_t);
-vm_object_t vm_object_allocate_anon(vm_pindex_t, vm_object_t, struct ucred *,
- vm_size_t);
+vm_object_t vm_object_allocate_anon(vm_pindex_t, vm_object_t, struct ucred *);
vm_object_t vm_object_allocate_dyn(objtype_t, vm_pindex_t, u_short);
boolean_t vm_object_coalesce(vm_object_t, vm_ooffset_t, vm_size_t, vm_size_t,
int);
diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c
--- a/sys/vm/vm_object.c
+++ b/sys/vm/vm_object.c
@@ -196,9 +196,9 @@
KASSERT(object->type == OBJT_DEAD,
("object %p has non-dead type %d",
object, object->type));
- KASSERT(object->charge == 0 && object->cred == NULL,
- ("object %p has non-zero charge %ju (%p)",
- object, (uintmax_t)object->charge, object->cred));
+ KASSERT(object->cred == NULL,
+ ("object %p has non-zero charge cred %p",
+ object, object->cred));
}
#endif
@@ -254,7 +254,6 @@
refcount_init(&object->ref_count, 1);
object->memattr = VM_MEMATTR_DEFAULT;
object->cred = NULL;
- object->charge = 0;
object->handle = handle;
object->backing_object = NULL;
object->backing_object_offset = (vm_ooffset_t) 0;
@@ -452,7 +451,7 @@
*/
vm_object_t
vm_object_allocate_anon(vm_pindex_t size, vm_object_t backing_object,
- struct ucred *cred, vm_size_t charge)
+ struct ucred *cred)
{
vm_object_t handle, object;
@@ -466,7 +465,6 @@
_vm_object_allocate(OBJT_SWAP, size,
OBJ_ANON | OBJ_ONEMAPPING | OBJ_SWAP, object, handle);
object->cred = cred;
- object->charge = cred != NULL ? charge : 0;
return (object);
}
@@ -1448,7 +1446,7 @@
/*
* Allocate a new object with the given length.
*/
- result = vm_object_allocate_anon(atop(length), source, cred, length);
+ result = vm_object_allocate_anon(atop(length), source, cred);
/*
* Store the offset into the source object, and fix up the offset into
@@ -1511,6 +1509,7 @@
struct pctrie_iter pages;
vm_page_t m;
vm_object_t orig_object, new_object, backing_object;
+ struct ucred *cred;
vm_pindex_t offidxstart;
vm_size_t size;
@@ -1525,9 +1524,26 @@
offidxstart = OFF_TO_IDX(entry->offset);
size = atop(entry->end - entry->start);
+ if (orig_object->cred != NULL) {
+ /*
+ * vm_object_split() is currently called from
+ * vmspace_fork(), and it might be tempting to add the
+ * charge for the split object to fork_charge. But
+ * fork_charge is discharged on error when the copied
+ * vmspace is destroyed. Since the split object is
+ * inserted into the shadow hierarchy serving the
+ * source vm_map, it is kept even after the
+ * unsuccessful fork, meaning that we have to force
+ * its swap usage.
+ */
+ cred = curthread->td_ucred;
+ crhold(cred);
+ swap_reserve_force_by_cred(ptoa(size), cred);
+ } else {
+ cred = NULL;
+ }
- new_object = vm_object_allocate_anon(size, orig_object,
- orig_object->cred, ptoa(size));
+ new_object = vm_object_allocate_anon(size, orig_object, cred);
/*
* We must wait for the orig_object to complete any in-progress
@@ -1550,12 +1566,6 @@
new_object->backing_object_offset =
orig_object->backing_object_offset + entry->offset;
}
- if (orig_object->cred != NULL) {
- crhold(orig_object->cred);
- KASSERT(orig_object->charge >= ptoa(size),
- ("orig_object->charge < 0"));
- orig_object->charge -= ptoa(size);
- }
/*
* Mark the split operation so that swap_pager_getpages() knows
@@ -2233,7 +2243,6 @@
swap_release_by_cred(ptoa(prev_object->size -
next_pindex), prev_object->cred);
}
- prev_object->charge += charge;
} else if ((cflags & OBJCO_CHARGED) != 0) {
/*
* The caller charged, but the object has
@@ -2786,9 +2795,8 @@
db_iprintf("Object %p: type=%d, size=0x%jx, res=%d, ref=%d, flags=0x%x",
object, (int)object->type, (uintmax_t)object->size,
object->resident_page_count, object->ref_count, object->flags);
- db_iprintf(" ruid %d charge %jx\n",
- object->cred ? object->cred->cr_ruid : -1,
- (uintmax_t)object->charge);
+ db_iprintf(" ruid %d\n",
+ object->cred ? object->cred->cr_ruid : -1);
db_iprintf(" sref=%d, backing_object(%d)=(%p)+0x%jx\n",
atomic_load_int(&object->shadow_count),
object->backing_object ? object->backing_object->ref_count : 0,

File Metadata

Mime Type
text/plain
Expires
Sun, Jan 18, 1:19 PM (8 h, 45 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
27702847
Default Alt Text
D54572.id169499.diff (14 KB)

Event Timeline