Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F142228675
D54572.id169499.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
14 KB
Referenced Files
None
Subscribers
None
D54572.id169499.diff
View Options
diff --git a/share/man/man7/tuning.7 b/share/man/man7/tuning.7
--- a/share/man/man7/tuning.7
+++ b/share/man/man7/tuning.7
@@ -222,6 +222,20 @@
.Va vm.stats.vm.v_wire_count
sysctls, respectively).
.Pp
+Due to the architecture of the
+.Fx
+virtual memory subsystem, the use of copy on write (CoW) anonymous
+memory, e.g. on
+.Xr fork 2 ,
+causes swap reservation for all three regions (VM objects),
+as in the original pre-fork mapping, and its copies in
+the parent and child, instead of only two.
+Eventually the subsystem tries to optimize the internal layout
+of the tracking for CoW and often removes (collapses) no longer
+needed backing objects, re-assigning its pages and swap
+reservations to the copies.
+This frees the swap reserve, but it is not guaranteed to happen.
+.Pp
The
.Va kern.ipc.maxpipekva
loader tunable is used to set a hard limit on the
diff --git a/sys/dev/md/md.c b/sys/dev/md/md.c
--- a/sys/dev/md/md.c
+++ b/sys/dev/md/md.c
@@ -1617,7 +1617,6 @@
0, 0);
swap_release_by_cred(IDX_TO_OFF(oldpages -
newpages), sc->cred);
- sc->s_swap.object->charge = IDX_TO_OFF(newpages);
sc->s_swap.object->size = newpages;
VM_OBJECT_WUNLOCK(sc->s_swap.object);
} else if (newpages > oldpages) {
@@ -1637,7 +1636,6 @@
}
}
VM_OBJECT_WLOCK(sc->s_swap.object);
- sc->s_swap.object->charge = IDX_TO_OFF(newpages);
sc->s_swap.object->size = newpages;
VM_OBJECT_WUNLOCK(sc->s_swap.object);
}
diff --git a/sys/kern/uipc_shm.c b/sys/kern/uipc_shm.c
--- a/sys/kern/uipc_shm.c
+++ b/sys/kern/uipc_shm.c
@@ -759,7 +759,6 @@
/* Free the swap accounted for shm */
swap_release_by_cred(delta, object->cred);
- object->charge -= delta;
} else {
if ((shmfd->shm_seals & F_SEAL_GROW) != 0)
return (EPERM);
@@ -768,7 +767,6 @@
delta = IDX_TO_OFF(nobjsize - object->size);
if (!swap_reserve_by_cred(delta, object->cred))
return (ENOMEM);
- object->charge += delta;
}
shmfd->shm_size = length;
mtx_lock(&shm_timestamp_lock);
diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c
--- a/sys/vm/swap_pager.c
+++ b/sys/vm/swap_pager.c
@@ -239,7 +239,8 @@
#ifdef INVARIANTS
prev = atomic_fetchadd_long(&uip->ui_vmsize, -pdecr);
KASSERT(prev >= pdecr,
- ("negative vmsize for uid %d\n", uip->ui_uid));
+ ("negative vmsize for uid %d, prev %#jx decr %#jx\n",
+ uip->ui_uid, (uintmax_t)prev, (uintmax_t)pdecr));
#else
atomic_subtract_long(&uip->ui_vmsize, pdecr);
#endif
@@ -329,7 +330,7 @@
}
void
-swap_reserve_force(vm_ooffset_t incr)
+swap_reserve_force_by_cred(vm_ooffset_t incr, struct ucred *cred)
{
u_long pincr;
@@ -345,7 +346,13 @@
#endif
pincr = atop(incr);
atomic_add_long(&swap_reserved, pincr);
- swap_reserve_force_rlimit(pincr, curthread->td_ucred);
+ swap_reserve_force_rlimit(pincr, cred);
+}
+
+void
+swap_reserve_force(vm_ooffset_t incr)
+{
+ swap_reserve_force_by_cred(incr, curthread->td_ucred);
}
void
@@ -373,7 +380,8 @@
pdecr = atop(decr);
#ifdef INVARIANTS
prev = atomic_fetchadd_long(&swap_reserved, -pdecr);
- KASSERT(prev >= pdecr, ("swap_reserved < decr"));
+ KASSERT(prev >= pdecr, ("swap_reserved %#jx < decr %#jx",
+ (uintmax_t)prev, (uintmax_t)pdecr));
#else
atomic_subtract_long(&swap_reserved, pdecr);
#endif
@@ -776,10 +784,7 @@
object->un_pager.swp.writemappings = 0;
object->handle = handle;
- if (cred != NULL) {
- object->cred = cred;
- object->charge = size;
- }
+ object->cred = cred;
return (true);
}
@@ -892,8 +897,7 @@
* Release the allocation charge.
*/
if (object->cred != NULL) {
- swap_release_by_cred(object->charge, object->cred);
- object->charge = 0;
+ swap_release_by_cred(ptoa(object->size), object->cred);
crfree(object->cred);
object->cred = NULL;
}
diff --git a/sys/vm/vm.h b/sys/vm/vm.h
--- a/sys/vm/vm.h
+++ b/sys/vm/vm.h
@@ -168,6 +168,7 @@
bool swap_reserve(vm_ooffset_t incr);
bool swap_reserve_by_cred(vm_ooffset_t incr, struct ucred *cred);
void swap_reserve_force(vm_ooffset_t incr);
+void swap_reserve_force_by_cred(vm_ooffset_t incr, struct ucred *cred);
void swap_release(vm_ooffset_t decr);
void swap_release_by_cred(vm_ooffset_t decr, struct ucred *cred);
diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c
--- a/sys/vm/vm_fault.c
+++ b/sys/vm/vm_fault.c
@@ -2310,13 +2310,12 @@
* directly.
*/
dst_object = vm_object_allocate_anon(atop(dst_entry->end -
- dst_entry->start), NULL, NULL, 0);
+ dst_entry->start), NULL, NULL);
#if VM_NRESERVLEVEL > 0
dst_object->flags |= OBJ_COLORED;
dst_object->pg_color = atop(dst_entry->start);
#endif
dst_object->domain = src_object->domain;
- dst_object->charge = dst_entry->end - dst_entry->start;
dst_entry->object.vm_object = dst_object;
dst_entry->offset = 0;
@@ -2329,7 +2328,7 @@
("vm_fault_copy_entry: leaked swp charge"));
dst_object->cred = curthread->td_ucred;
crhold(dst_object->cred);
- *fork_charge += dst_object->charge;
+ *fork_charge += ptoa(dst_object->size);
} else if ((dst_object->flags & OBJ_SWAP) != 0 &&
dst_object->cred == NULL) {
KASSERT(dst_entry->cred != NULL, ("no cred for entry %p",
diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
--- a/sys/vm/vm_map.c
+++ b/sys/vm/vm_map.c
@@ -2428,7 +2428,7 @@
KASSERT((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0,
("map entry %p is a submap", entry));
object = vm_object_allocate_anon(atop(entry->end - entry->start), NULL,
- entry->cred, entry->end - entry->start);
+ entry->cred);
entry->object.vm_object = object;
entry->offset = 0;
entry->cred = NULL;
@@ -2443,21 +2443,26 @@
static inline void
vm_map_entry_charge_object(vm_map_t map, vm_map_entry_t entry)
{
+ vm_object_t object;
VM_MAP_ASSERT_LOCKED(map);
KASSERT((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0,
("map entry %p is a submap", entry));
- if (entry->object.vm_object == NULL && !vm_map_is_system(map) &&
+ object = entry->object.vm_object;
+ if (object == NULL && !vm_map_is_system(map) &&
(entry->eflags & MAP_ENTRY_GUARD) == 0)
vm_map_entry_back(entry);
- else if (entry->object.vm_object != NULL &&
+ else if (object != NULL &&
((entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) &&
entry->cred != NULL) {
- VM_OBJECT_WLOCK(entry->object.vm_object);
- KASSERT(entry->object.vm_object->cred == NULL,
+ VM_OBJECT_WLOCK(object);
+ KASSERT(object->cred == NULL,
("OVERCOMMIT: %s: both cred e %p", __func__, entry));
- entry->object.vm_object->cred = entry->cred;
- entry->object.vm_object->charge = entry->end - entry->start;
+ object->cred = entry->cred;
+ if (entry->end - entry->start < ptoa(object->size)) {
+ swap_reserve_force_by_cred(ptoa(object->size) -
+ entry->end + entry->start, object->cred);
+ }
VM_OBJECT_WUNLOCK(entry->object.vm_object);
entry->cred = NULL;
}
@@ -2956,7 +2961,7 @@
* we cannot distinguish between non-charged and
* charged clipped mapping of the same object later.
*/
- KASSERT(obj->charge == 0,
+ KASSERT(obj->cred == NULL,
("vm_map_protect: object %p overcharged (entry %p)",
obj, entry));
if (!swap_reserve(ptoa(obj->size))) {
@@ -2968,7 +2973,6 @@
crhold(cred);
obj->cred = cred;
- obj->charge = ptoa(obj->size);
VM_OBJECT_WUNLOCK(obj);
}
@@ -3942,7 +3946,7 @@
vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry)
{
vm_object_t object;
- vm_pindex_t offidxstart, offidxend, size1;
+ vm_pindex_t offidxstart, offidxend, oldsize;
vm_size_t size;
vm_map_entry_unlink(map, entry, UNLINK_MERGE_NONE);
@@ -3989,15 +3993,11 @@
OBJPR_NOTMAPPED);
if (offidxend >= object->size &&
offidxstart < object->size) {
- size1 = object->size;
+ oldsize = object->size;
object->size = offidxstart;
if (object->cred != NULL) {
- size1 -= object->size;
- KASSERT(object->charge >= ptoa(size1),
- ("object %p charge < 0", object));
- swap_release_by_cred(ptoa(size1),
- object->cred);
- object->charge -= ptoa(size1);
+ swap_release_by_cred(ptoa(oldsize -
+ ptoa(object->size)), object->cred);
}
}
}
@@ -4198,7 +4198,7 @@
("OVERCOMMIT: vm_map_copy_anon_entry: cred %p",
src_object));
src_object->cred = src_entry->cred;
- src_object->charge = size;
+ *fork_charge += ptoa(src_object->size) - size;
}
dst_entry->object.vm_object = src_object;
if (charged) {
@@ -4455,7 +4455,7 @@
KASSERT(object->cred == NULL,
("vmspace_fork both cred"));
object->cred = old_entry->cred;
- object->charge = old_entry->end -
+ *fork_charge += old_entry->end -
old_entry->start;
old_entry->cred = NULL;
}
@@ -4957,6 +4957,13 @@
if (newvmspace == NULL)
return (ENOMEM);
if (!swap_reserve_by_cred(fork_charge, p->p_ucred)) {
+ /*
+ * The swap reservation failed. The accounting from
+ * the entries of the copied newvmspace will be
+ * subtracted in vmspace_free(), so force the
+ * reservation there.
+ */
+ swap_reserve_force_by_cred(fork_charge, p->p_ucred);
vmspace_free(newvmspace);
return (ENOMEM);
}
@@ -5138,7 +5145,7 @@
if (vm_map_lock_upgrade(map))
goto RetryLookup;
entry->object.vm_object = vm_object_allocate_anon(atop(size),
- NULL, entry->cred, size);
+ NULL, entry->cred);
entry->offset = 0;
entry->cred = NULL;
vm_map_lock_downgrade(map);
@@ -5396,9 +5403,8 @@
(void *)entry->object.vm_object,
(uintmax_t)entry->offset);
if (entry->object.vm_object && entry->object.vm_object->cred)
- db_printf(", obj ruid %d charge %jx",
- entry->object.vm_object->cred->cr_ruid,
- (uintmax_t)entry->object.vm_object->charge);
+ db_printf(", obj ruid %d ",
+ entry->object.vm_object->cred->cr_ruid);
if (entry->eflags & MAP_ENTRY_COW)
db_printf(", copy (%s)",
(entry->eflags & MAP_ENTRY_NEEDS_COPY) ? "needed" : "done");
diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h
--- a/sys/vm/vm_object.h
+++ b/sys/vm/vm_object.h
@@ -175,7 +175,6 @@
} phys;
} un_pager;
struct ucred *cred;
- vm_ooffset_t charge;
void *umtx_data;
};
@@ -356,8 +355,7 @@
extern int umtx_shm_vnobj_persistent;
vm_object_t vm_object_allocate (objtype_t, vm_pindex_t);
-vm_object_t vm_object_allocate_anon(vm_pindex_t, vm_object_t, struct ucred *,
- vm_size_t);
+vm_object_t vm_object_allocate_anon(vm_pindex_t, vm_object_t, struct ucred *);
vm_object_t vm_object_allocate_dyn(objtype_t, vm_pindex_t, u_short);
boolean_t vm_object_coalesce(vm_object_t, vm_ooffset_t, vm_size_t, vm_size_t,
int);
diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c
--- a/sys/vm/vm_object.c
+++ b/sys/vm/vm_object.c
@@ -196,9 +196,9 @@
KASSERT(object->type == OBJT_DEAD,
("object %p has non-dead type %d",
object, object->type));
- KASSERT(object->charge == 0 && object->cred == NULL,
- ("object %p has non-zero charge %ju (%p)",
- object, (uintmax_t)object->charge, object->cred));
+ KASSERT(object->cred == NULL,
+ ("object %p has non-zero charge cred %p",
+ object, object->cred));
}
#endif
@@ -254,7 +254,6 @@
refcount_init(&object->ref_count, 1);
object->memattr = VM_MEMATTR_DEFAULT;
object->cred = NULL;
- object->charge = 0;
object->handle = handle;
object->backing_object = NULL;
object->backing_object_offset = (vm_ooffset_t) 0;
@@ -452,7 +451,7 @@
*/
vm_object_t
vm_object_allocate_anon(vm_pindex_t size, vm_object_t backing_object,
- struct ucred *cred, vm_size_t charge)
+ struct ucred *cred)
{
vm_object_t handle, object;
@@ -466,7 +465,6 @@
_vm_object_allocate(OBJT_SWAP, size,
OBJ_ANON | OBJ_ONEMAPPING | OBJ_SWAP, object, handle);
object->cred = cred;
- object->charge = cred != NULL ? charge : 0;
return (object);
}
@@ -1448,7 +1446,7 @@
/*
* Allocate a new object with the given length.
*/
- result = vm_object_allocate_anon(atop(length), source, cred, length);
+ result = vm_object_allocate_anon(atop(length), source, cred);
/*
* Store the offset into the source object, and fix up the offset into
@@ -1511,6 +1509,7 @@
struct pctrie_iter pages;
vm_page_t m;
vm_object_t orig_object, new_object, backing_object;
+ struct ucred *cred;
vm_pindex_t offidxstart;
vm_size_t size;
@@ -1525,9 +1524,26 @@
offidxstart = OFF_TO_IDX(entry->offset);
size = atop(entry->end - entry->start);
+ if (orig_object->cred != NULL) {
+ /*
+ * vm_object_split() is currently called from
+ * vmspace_fork(), and it might be tempting to add the
+ * charge for the split object to fork_charge. But
+ * fork_charge is discharged on error when the copied
+ * vmspace is destroyed. Since the split object is
+ * inserted into the shadow hierarchy serving the
+ * source vm_map, it is kept even after the
+ * unsuccessful fork, meaning that we have to force
+ * its swap usage.
+ */
+ cred = curthread->td_ucred;
+ crhold(cred);
+ swap_reserve_force_by_cred(ptoa(size), cred);
+ } else {
+ cred = NULL;
+ }
- new_object = vm_object_allocate_anon(size, orig_object,
- orig_object->cred, ptoa(size));
+ new_object = vm_object_allocate_anon(size, orig_object, cred);
/*
* We must wait for the orig_object to complete any in-progress
@@ -1550,12 +1566,6 @@
new_object->backing_object_offset =
orig_object->backing_object_offset + entry->offset;
}
- if (orig_object->cred != NULL) {
- crhold(orig_object->cred);
- KASSERT(orig_object->charge >= ptoa(size),
- ("orig_object->charge < 0"));
- orig_object->charge -= ptoa(size);
- }
/*
* Mark the split operation so that swap_pager_getpages() knows
@@ -2233,7 +2243,6 @@
swap_release_by_cred(ptoa(prev_object->size -
next_pindex), prev_object->cred);
}
- prev_object->charge += charge;
} else if ((cflags & OBJCO_CHARGED) != 0) {
/*
* The caller charged, but the object has
@@ -2786,9 +2795,8 @@
db_iprintf("Object %p: type=%d, size=0x%jx, res=%d, ref=%d, flags=0x%x",
object, (int)object->type, (uintmax_t)object->size,
object->resident_page_count, object->ref_count, object->flags);
- db_iprintf(" ruid %d charge %jx\n",
- object->cred ? object->cred->cr_ruid : -1,
- (uintmax_t)object->charge);
+ db_iprintf(" ruid %d\n",
+ object->cred ? object->cred->cr_ruid : -1);
db_iprintf(" sref=%d, backing_object(%d)=(%p)+0x%jx\n",
atomic_load_int(&object->shadow_count),
object->backing_object ? object->backing_object->ref_count : 0,
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Jan 18, 1:19 PM (8 h, 45 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
27702847
Default Alt Text
D54572.id169499.diff (14 KB)
Attached To
Mode
D54572: Fix charge accounting for objects
Attached
Detach File
Event Timeline
Log In to Comment