Index: lib/libc/sys/mlock.2 =================================================================== --- lib/libc/sys/mlock.2 +++ lib/libc/sys/mlock.2 @@ -28,7 +28,7 @@ .\" @(#)mlock.2 8.2 (Berkeley) 12/11/93 .\" $FreeBSD$ .\" -.Dd March 20, 2018 +.Dd April 8, 2019 .Dt MLOCK 2 .Os .Sh NAME @@ -97,13 +97,13 @@ system-wide .Dq wired pages limit -.Va vm.max_wired . -.Va vm.max_wired +.Va vm.max_user_wired . +.Va vm.max_user_wired applies to the system as a whole, so the amount available to a single process at any given time is the difference between -.Va vm.max_wired +.Va vm.max_user_wired and -.Va vm.stats.vm.v_wire_count . +.Va vm.stats.vm.v_user_wire_count . .Pp If .Va security.bsd.unprivileged_mlock @@ -124,13 +124,11 @@ is set to 0 and the caller is not the super-user. .It Bq Er EINVAL The address range given wraps around zero. -.It Bq Er EAGAIN -Locking the indicated range would exceed the system limit for locked memory. .It Bq Er ENOMEM Some portion of the indicated address range is not allocated. There was an error faulting/mapping a page. -Locking the indicated range would exceed the per-process limit for locked -memory. +Locking the indicated range would exceed the per-process or system-wide limits +for locked memory. .El The .Fn munlock @@ -171,11 +169,11 @@ Allocating too much wired memory can lead to a memory-allocation deadlock which requires a reboot to recover from. .Pp -The per-process resource limit is a limit on the amount of virtual -memory locked, while the system-wide limit is for the number of locked -physical pages. -Hence a process with two distinct locked mappings of the same physical page -counts as 2 pages against the per-process limit and as only a single page -in the system limit. +The per-process and system-wide resource limits of locked memory apply +to the amount of virtual memory locked, not the amount of locked physical +pages. +Hence two distinct locked mappings of the same physical page counts as +2 pages aginst the system limit, and also against the per-process limit +if both mappings belong to the same physical map. .Pp The per-process resource limit is not currently supported. Index: lib/libc/sys/mlockall.2 =================================================================== --- lib/libc/sys/mlockall.2 +++ lib/libc/sys/mlockall.2 @@ -30,7 +30,7 @@ .\" .\" $FreeBSD$ .\" -.Dd December 25, 2012 +.Dd April 8, 2019 .Dt MLOCKALL 2 .Os .Sh NAME @@ -69,7 +69,7 @@ A single process can lock the minimum of a system-wide .Dq wired pages limit -.Va vm.max_wired +.Va vm.max_user_wired and the per-process .Dv RLIMIT_MEMLOCK resource limit. @@ -138,9 +138,9 @@ functions first appeared in .Fx 5.1 . .Sh BUGS -The per-process resource limit is a limit on the amount of virtual -memory locked, while the system-wide limit is for the number of locked -physical pages. -Hence a process with two distinct locked mappings of the same physical page -counts as 2 pages against the per-process limit and as only a single page -in the system limit. +The per-process and system-wide resource limits of locked memory apply +to the amount of virtual memory locked, not the amount of locked physical +pages. +Hence two distinct locked mappings of the same physical page counts as +2 pages aginst the system limit, and also against the per-process limit +if both mappings belong to the same physical map. Index: lib/libc/tests/sys/mlock_helper.c =================================================================== --- lib/libc/tests/sys/mlock_helper.c +++ lib/libc/tests/sys/mlock_helper.c @@ -39,7 +39,7 @@ #include #include -#define VM_MAX_WIRED "vm.max_wired" +#define VM_MAX_WIRED "vm.max_user_wired" static void vm_max_wired_sysctl(int *old_value, int *new_value) Index: sys/sys/vmmeter.h =================================================================== --- sys/sys/vmmeter.h +++ sys/sys/vmmeter.h @@ -73,6 +73,7 @@ /* * System wide statistics counters. * Locking: + * a - atomic * c - constant after initialization * p - uses counter(9) */ @@ -125,6 +126,10 @@ counter_u64_t v_wire_count; /* (p) pages wired down */ #define VM_METER_NCOUNTERS \ (offsetof(struct vmmeter, v_page_size) / sizeof(counter_u64_t)) + /* + * Page counters. + */ + u_long v_user_wire_count; /* (a) user-wired virtual memory */ /* * Distribution of page usages. */ Index: sys/vm/vm_glue.c =================================================================== --- sys/vm/vm_glue.c +++ sys/vm/vm_glue.c @@ -181,21 +181,8 @@ if (last < (vm_offset_t)addr || end < (vm_offset_t)addr) return (EINVAL); npages = atop(end - start); - if (npages > vm_page_max_wired) + if (npages > vm_page_max_user_wired) return (ENOMEM); -#if 0 - /* - * XXX - not yet - * - * The limit for transient usage of wired pages should be - * larger than for "permanent" wired pages (mlock()). - * - * Also, the sysctl code, which is the only present user - * of vslock(), does a hard loop on EAGAIN. - */ - if (npages + vm_wire_count() > vm_page_max_wired) - return (EAGAIN); -#endif error = vm_map_wire(&curproc->p_vmspace->vm_map, start, end, VM_MAP_WIRE_SYSTEM | VM_MAP_WIRE_NOHOLES); if (error == KERN_SUCCESS) { Index: sys/vm/vm_map.c =================================================================== --- sys/vm/vm_map.c +++ sys/vm/vm_map.c @@ -90,6 +90,7 @@ #include #include #include +#include #include #include #include @@ -150,7 +151,7 @@ vm_size_t max_ssize, vm_size_t growsize, vm_prot_t prot, vm_prot_t max, int cow); static void vm_map_wire_entry_failure(vm_map_t map, vm_map_entry_t entry, - vm_offset_t failed_addr); + vm_offset_t failed_addr, bool user_wire); #define ENTRY_CHARGED(e) ((e)->cred != NULL || \ ((e)->object.vm_object != NULL && (e)->object.vm_object->cred != NULL && \ @@ -2680,12 +2681,12 @@ if (rv == KERN_SUCCESS && (!user_unwire || (entry->eflags & MAP_ENTRY_USER_WIRED))) { - if (user_unwire) - entry->eflags &= ~MAP_ENTRY_USER_WIRED; if (entry->wired_count == 1) vm_map_entry_unwire(map, entry); else entry->wired_count--; + if (user_unwire) + entry->eflags &= ~MAP_ENTRY_USER_WIRED; } KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0, ("vm_map_unwire: in-transition flag missing %p", entry)); @@ -2705,6 +2706,13 @@ return (rv); } +static void +vm_user_wire_count_sub(u_long n) +{ + + atomic_subtract_long(&vm_cnt.v_user_wire_count, n); +} + /* * vm_map_wire_entry_failure: * @@ -2714,8 +2722,9 @@ */ static void vm_map_wire_entry_failure(vm_map_t map, vm_map_entry_t entry, - vm_offset_t failed_addr) + vm_offset_t failed_addr, bool user_wire) { + vm_size_t size; VM_MAP_ASSERT_LOCKED(map); KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0 && @@ -2729,9 +2738,12 @@ * then unwire them. */ if (failed_addr > entry->start) { + size = failed_addr - entry->start; + if (user_wire) + vm_user_wire_count_sub(atop(size)); pmap_unwire(map->pmap, entry->start, failed_addr); - vm_object_unwire(entry->object.vm_object, entry->offset, - failed_addr - entry->start, PQ_ACTIVE); + vm_object_unwire(entry->object.vm_object, entry->offset, size, + PQ_ACTIVE); } /* @@ -2752,7 +2764,8 @@ { vm_map_entry_t entry, first_entry, tmp_entry; vm_offset_t faddr, saved_end, saved_start; - unsigned int last_timestamp; + u_long size, wired; + u_int last_timestamp; int rv; boolean_t need_wakeup, result, user_wire; vm_prot_t prot; @@ -2848,6 +2861,21 @@ saved_start = entry->start; saved_end = entry->end; + if (user_wire) { + size = atop(saved_end - saved_start); + wired = vm_cnt.v_user_wire_count; + do { + if (size + wired > + vm_page_max_user_wired) { + end = saved_start; + rv = KERN_RESOURCE_SHORTAGE; + goto done; + } + } while (!atomic_fcmpset_long( + &vm_cnt.v_user_wire_count, &wired, + size + wired)); + } + /* * Release the map lock, relying on the in-transition * mark. Mark the map busy for fork. @@ -2892,13 +2920,14 @@ if (rv != KERN_SUCCESS && faddr < entry->end) vm_map_wire_entry_failure(map, - entry, faddr); + entry, faddr, user_wire); entry = entry->next; } } last_timestamp = map->timestamp; if (rv != KERN_SUCCESS) { - vm_map_wire_entry_failure(map, entry, faddr); + vm_map_wire_entry_failure(map, entry, faddr, + user_wire); end = entry->end; goto done; } @@ -2964,9 +2993,12 @@ * Undo the wiring. Wiring succeeded on this entry * but failed on a later entry. */ - if (entry->wired_count == 1) + if (entry->wired_count == 1) { vm_map_entry_unwire(map, entry); - else + if (user_wire) + vm_user_wire_count_sub(atop(entry->end - + entry->start)); + } else entry->wired_count--; } next_entry_done: @@ -3101,13 +3133,18 @@ static void vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry) { + vm_size_t size; VM_MAP_ASSERT_LOCKED(map); KASSERT(entry->wired_count > 0, ("vm_map_entry_unwire: entry %p isn't wired", entry)); + + size = entry->end - entry->start; + if ((entry->eflags & MAP_ENTRY_USER_WIRED) != 0) + vm_user_wire_count_sub(atop(size)); pmap_unwire(map->pmap, entry->start, entry->end); - vm_object_unwire(entry->object.vm_object, entry->offset, entry->end - - entry->start, PQ_ACTIVE); + vm_object_unwire(entry->object.vm_object, entry->offset, size, + PQ_ACTIVE); entry->wired_count = 0; } @@ -4076,7 +4113,7 @@ */ if (rv == KERN_SUCCESS && (map->flags & MAP_WIREFUTURE) != 0) { vm_map_unlock(map); - vm_map_wire(map, grow_start, grow_start + grow_amount, + (void)vm_map_wire(map, grow_start, grow_start + grow_amount, VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); vm_map_lock_read(map); } else Index: sys/vm/vm_meter.c =================================================================== --- sys/vm/vm_meter.c +++ sys/vm/vm_meter.c @@ -394,6 +394,8 @@ #define VM_STATS_UINT(var, descr) \ SYSCTL_UINT(_vm_stats_vm, OID_AUTO, var, CTLFLAG_RD, &vm_cnt.var, 0, descr) +#define VM_STATS_ULONG(var, descr) \ + SYSCTL_ULONG(_vm_stats_vm, OID_AUTO, var, CTLFLAG_RD, &vm_cnt.var, 0, descr) VM_STATS_UINT(v_page_size, "Page size in bytes"); VM_STATS_UINT(v_page_count, "Total number of pages in system"); @@ -402,6 +404,7 @@ VM_STATS_UINT(v_free_min, "Minimum low-free-pages threshold"); VM_STATS_PROC(v_free_count, "Free pages", vm_free_count); VM_STATS_PROC(v_wire_count, "Wired pages", vm_wire_count); +VM_STATS_ULONG(v_user_wire_count, "User-wired pages"); VM_STATS_PROC(v_active_count, "Active pages", vm_active_count); VM_STATS_UINT(v_inactive_target, "Desired inactive pages"); VM_STATS_PROC(v_inactive_count, "Inactive pages", vm_inactive_count); Index: sys/vm/vm_mmap.c =================================================================== --- sys/vm/vm_mmap.c +++ sys/vm/vm_mmap.c @@ -1003,7 +1003,7 @@ if (last < addr || end < addr) return (EINVAL); npages = atop(end - start); - if (npages > vm_page_max_wired) + if (npages > vm_page_max_user_wired) return (ENOMEM); map = &proc->p_vmspace->vm_map; PROC_LOCK(proc); @@ -1013,8 +1013,6 @@ return (ENOMEM); } PROC_UNLOCK(proc); - if (npages + vm_wire_count() > vm_page_max_wired) - return (EAGAIN); #ifdef RACCT if (racct_enable) { PROC_LOCK(proc); Index: sys/vm/vm_pageout.h =================================================================== --- sys/vm/vm_pageout.h +++ sys/vm/vm_pageout.h @@ -75,7 +75,7 @@ * Exported data structures. */ -extern int vm_page_max_wired; +extern int vm_page_max_user_wired; extern int vm_pageout_page_count; #define VM_OOM_MEM 1 Index: sys/vm/vm_pageout.c =================================================================== --- sys/vm/vm_pageout.c +++ sys/vm/vm_pageout.c @@ -194,9 +194,10 @@ int vm_pageout_page_count = 32; -int vm_page_max_wired; /* XXX max # of wired pages system-wide */ -SYSCTL_INT(_vm, OID_AUTO, max_wired, - CTLFLAG_RW, &vm_page_max_wired, 0, "System-wide limit to wired page count"); +int vm_page_max_user_wired; +SYSCTL_INT(_vm, OID_AUTO, max_user_wired, CTLFLAG_RW, + &vm_page_max_user_wired, 0, + "system-wide limit to user-wired page count"); static u_int isqrt(u_int num); static int vm_pageout_launder(struct vm_domain *vmd, int launder, @@ -2031,8 +2032,8 @@ if (vm_pageout_update_period == 0) vm_pageout_update_period = 600; - if (vm_page_max_wired == 0) - vm_page_max_wired = freecount / 3; + if (vm_page_max_user_wired == 0) + vm_page_max_user_wired = freecount / 3; } /* Index: usr.bin/vmstat/vmstat.c =================================================================== --- usr.bin/vmstat/vmstat.c +++ usr.bin/vmstat/vmstat.c @@ -156,6 +156,7 @@ u_int v_free_min; u_int v_free_count; u_int v_wire_count; + u_long v_user_wire_count; u_int v_active_count; u_int v_inactive_target; u_int v_inactive_count; @@ -566,6 +567,7 @@ GET_VM_STATS(vm, v_free_min); GET_VM_STATS(vm, v_free_count); GET_VM_STATS(vm, v_wire_count); + GET_VM_STATS(vm, v_user_wire_count); GET_VM_STATS(vm, v_active_count); GET_VM_STATS(vm, v_inactive_target); GET_VM_STATS(vm, v_inactive_count); @@ -1057,6 +1059,8 @@ sum.v_laundry_count); xo_emit("{:wired-pages/%9u} {N:pages wired down}\n", sum.v_wire_count); + xo_emit("{:virtual-user-wired-pages/%9ul} {N:virtual user pages wired " + "down}\n", sum.v_user_wire_count); xo_emit("{:free-pages/%9u} {N:pages free}\n", sum.v_free_count); xo_emit("{:bytes-per-page/%9u} {N:bytes per page}\n", sum.v_page_size);