Index: sys/sys/vmmeter.h =================================================================== --- sys/sys/vmmeter.h +++ sys/sys/vmmeter.h @@ -73,6 +73,7 @@ /* * System wide statistics counters. * Locking: + * a - atomic * c - constant after initialization * p - uses counter(9) */ @@ -124,7 +125,11 @@ counter_u64_t v_kthreadpages; /* (p) ... and by kernel fork() */ counter_u64_t v_wire_count; /* (p) pages wired down */ #define VM_METER_NCOUNTERS \ - (offsetof(struct vmmeter, v_page_size) / sizeof(counter_u64_t)) + (offsetof(struct vmmeter, v_user_wire_count) / sizeof(counter_u64_t)) + /* + * Non-counter(9) fields. + */ + u_int v_user_wire_count; /* (a) pages wired into user mappings */ /* * Distribution of page usages. */ Index: sys/vm/vm_fault.c =================================================================== --- sys/vm/vm_fault.c +++ sys/vm/vm_fault.c @@ -199,7 +199,7 @@ VM_OBJECT_ASSERT_LOCKED(m->object); need_dirty = ((fault_type & VM_PROT_WRITE) != 0 && - (fault_flags & VM_FAULT_WIRE) == 0) || + (fault_flags & VM_FAULT_WIRE_MASK) == 0) || (fault_flags & VM_FAULT_DIRTY) != 0; if (set_wd) @@ -499,10 +499,13 @@ m_mtx = NULL; for (i = 0; i < npages; i++) { vm_page_change_lock(&m[i], &m_mtx); - if ((fault_flags & VM_FAULT_WIRE) != 0) + if ((fault_flags & VM_FAULT_WIRE_MASK) != 0) { vm_page_wire(&m[i]); - else + if ((fault_flags & VM_FAULT_USER_WIRE) != 0) + vm_page_wire_user(&m[i]); + } else { vm_page_activate(&m[i]); + } if (m_hold != NULL && m[i].pindex == fs->first_pindex) { *m_hold = &m[i]; vm_page_wire(&m[i]); @@ -620,8 +623,8 @@ if (wired) fault_type = prot | (fault_type & VM_PROT_COPY); else - KASSERT((fault_flags & VM_FAULT_WIRE) == 0, - ("!wired && VM_FAULT_WIRE")); + KASSERT((fault_flags & VM_FAULT_WIRE_MASK) == 0, + ("!wired && VM_FAULT_WIRE_MASK")); /* * Try to avoid lock contention on the top-level object through @@ -633,7 +636,7 @@ * run in parallel on the same top-level object. */ if (fs.vp == NULL /* avoid locked vnode leak */ && - (fault_flags & (VM_FAULT_WIRE | VM_FAULT_DIRTY)) == 0 && + (fault_flags & (VM_FAULT_WIRE_MASK | VM_FAULT_DIRTY)) == 0 && /* avoid calling vm_object_set_writeable_dirty() */ ((prot & VM_PROT_WRITE) == 0 || (fs.first_object->type != OBJT_VNODE && @@ -1175,14 +1178,25 @@ pmap_copy_page(fs.m, fs.first_m); fs.first_m->valid = VM_PAGE_BITS_ALL; if (wired && (fault_flags & - VM_FAULT_WIRE) == 0) { - vm_page_lock(fs.first_m); - vm_page_wire(fs.first_m); - vm_page_unlock(fs.first_m); - + VM_FAULT_WIRE_MASK) == 0) { + bool user_wired; + /* + * XXXMJ can we assert + * (fault_type & VM_PROT_COPY) != 0? + */ vm_page_lock(fs.m); + user_wired = + (fs.m->flags & PG_USER_WIRED) != 0; + if (user_wired) + vm_page_unwire_user(fs.m); vm_page_unwire(fs.m, PQ_INACTIVE); vm_page_unlock(fs.m); + + vm_page_lock(fs.first_m); + vm_page_wire(fs.first_m); + if (user_wired) + vm_page_wire_user(fs.first_m); + vm_page_unlock(fs.first_m); } /* * We no longer need the old page or object. @@ -1276,8 +1290,9 @@ } /* Reassert because wired may have changed. */ - KASSERT(wired || (fault_flags & VM_FAULT_WIRE) == 0, - ("!wired && VM_FAULT_WIRE")); + KASSERT(wired || + (fault_flags & VM_FAULT_WIRE_MASK) == 0, + ("!wired && VM_FAULT_WIRE_MASK")); } } @@ -1309,7 +1324,7 @@ */ pmap_enter(fs.map->pmap, vaddr, fs.m, prot, fault_type | (wired ? PMAP_ENTER_WIRED : 0), 0); - if (faultcount != 1 && (fault_flags & VM_FAULT_WIRE) == 0 && + if (faultcount != 1 && (fault_flags & VM_FAULT_WIRE_MASK) == 0 && wired == 0) vm_fault_prefault(&fs, vaddr, faultcount > 0 ? behind : PFBAK, @@ -1321,10 +1336,13 @@ * If the page is not wired down, then put it where the pageout daemon * can find it. */ - if ((fault_flags & VM_FAULT_WIRE) != 0) + if ((fault_flags & VM_FAULT_WIRE_MASK) != 0) { vm_page_wire(fs.m); - else + if ((fault_flags & VM_FAULT_USER_WIRE) != 0) + vm_page_wire_user(fs.m); + } else { vm_page_activate(fs.m); + } if (m_hold != NULL) { *m_hold = fs.m; vm_page_wire(fs.m); @@ -1666,6 +1684,8 @@ VM_OBJECT_WLOCK(dst_object); KASSERT(upgrade || dst_entry->object.vm_object == NULL, ("vm_fault_copy_entry: vm_object not NULL")); + KASSERT(!upgrade || (src_entry->eflags & MAP_ENTRY_USER_WIRED) != 0, + ("vm_fault_copy_entry: entry %p is not user-wired", src_entry)); if (src_object != dst_object) { dst_entry->object.vm_object = dst_object; dst_entry->offset = 0; @@ -1794,10 +1814,12 @@ if (upgrade) { if (src_m != dst_m) { vm_page_lock(src_m); + vm_page_unwire_user(src_m); vm_page_unwire(src_m, PQ_INACTIVE); vm_page_unlock(src_m); vm_page_lock(dst_m); vm_page_wire(dst_m); + vm_page_wire_user(dst_m); vm_page_unlock(dst_m); } else { KASSERT(dst_m->wire_count > 0, Index: sys/vm/vm_glue.c =================================================================== --- sys/vm/vm_glue.c +++ sys/vm/vm_glue.c @@ -172,7 +172,6 @@ vslock(void *addr, size_t len) { vm_offset_t end, last, start; - vm_size_t npages; int error; last = (vm_offset_t)addr + len; @@ -180,22 +179,13 @@ end = round_page(last); if (last < (vm_offset_t)addr || end < (vm_offset_t)addr) return (EINVAL); - npages = atop(end - start); - if (npages > vm_page_max_wired) - return (ENOMEM); -#if 0 + /* - * XXX - not yet - * - * The limit for transient usage of wired pages should be - * larger than for "permanent" wired pages (mlock()). - * - * Also, the sysctl code, which is the only present user - * of vslock(), does a hard loop on EAGAIN. + * We don't want to enforce the system limit on user-wired pages here, + * but as a safety belt, ensure that this mapping isn't too big. */ - if (npages + vm_wire_count() > vm_page_max_wired) - return (EAGAIN); -#endif + if (atop(end - start) > vm_page_max_user_wired) + return (ENOMEM); error = vm_map_wire(&curproc->p_vmspace->vm_map, start, end, VM_MAP_WIRE_SYSTEM | VM_MAP_WIRE_NOHOLES); if (error == KERN_SUCCESS) { Index: sys/vm/vm_map.h =================================================================== --- sys/vm/vm_map.h +++ sys/vm/vm_map.h @@ -357,9 +357,12 @@ /* * vm_fault option flags */ -#define VM_FAULT_NORMAL 0 /* Nothing special */ -#define VM_FAULT_WIRE 1 /* Wire the mapped page */ -#define VM_FAULT_DIRTY 2 /* Dirty the page; use w/VM_PROT_COPY */ +#define VM_FAULT_NORMAL 0x00 /* Nothing special */ +#define VM_FAULT_WIRE 0x01 /* Wire the mapped page */ +#define VM_FAULT_DIRTY 0x02 /* Dirty the page; use w/VM_PROT_COPY */ +#define VM_FAULT_USER_WIRE 0x04 /* Wire the page for a user mapping */ + +#define VM_FAULT_WIRE_MASK (VM_FAULT_WIRE | VM_FAULT_USER_WIRE) /* * Initially, mappings are slightly sequential. The maximum window size must Index: sys/vm/vm_map.c =================================================================== --- sys/vm/vm_map.c +++ sys/vm/vm_map.c @@ -90,6 +90,7 @@ #include #include #include +#include #include #include #include @@ -2861,6 +2862,7 @@ * it into the physical map. */ if ((rv = vm_fault(map, faddr, VM_PROT_NONE, + user_wire ? VM_FAULT_USER_WIRE : VM_FAULT_WIRE)) != KERN_SUCCESS) break; } while ((faddr += PAGE_SIZE) < saved_end); @@ -3974,7 +3976,9 @@ #endif if (!old_mlock && (map->flags & MAP_WIREFUTURE) != 0) { - if (ptoa(pmap_wired_count(map->pmap)) + grow_amount > lmemlim) { + if (ptoa(pmap_wired_count(map->pmap)) + grow_amount > lmemlim || + atop(grow_amount) + vm_cnt.v_user_wire_count > + vm_page_max_user_wired) { rv = KERN_NO_SPACE; goto out; } Index: sys/vm/vm_meter.c =================================================================== --- sys/vm/vm_meter.c +++ sys/vm/vm_meter.c @@ -401,6 +401,7 @@ VM_STATS_UINT(v_free_target, "Pages desired free"); VM_STATS_UINT(v_free_min, "Minimum low-free-pages threshold"); VM_STATS_PROC(v_free_count, "Free pages", vm_free_count); +VM_STATS_UINT(v_user_wire_count, "User-wired pages"); VM_STATS_PROC(v_wire_count, "Wired pages", vm_wire_count); VM_STATS_PROC(v_active_count, "Active pages", vm_active_count); VM_STATS_UINT(v_inactive_target, "Desired inactive pages"); Index: sys/vm/vm_mmap.c =================================================================== --- sys/vm/vm_mmap.c +++ sys/vm/vm_mmap.c @@ -1003,7 +1003,7 @@ if (last < addr || end < addr) return (EINVAL); npages = atop(end - start); - if (npages > vm_page_max_wired) + if (npages > vm_page_max_user_wired) return (ENOMEM); map = &proc->p_vmspace->vm_map; PROC_LOCK(proc); @@ -1013,7 +1013,7 @@ return (ENOMEM); } PROC_UNLOCK(proc); - if (npages + vm_wire_count() > vm_page_max_wired) + if (npages + vm_cnt.v_user_wire_count > vm_page_max_user_wired) return (EAGAIN); #ifdef RACCT if (racct_enable) { @@ -1061,12 +1061,14 @@ * If wiring all pages in the process would cause it to exceed * a hard resource limit, return ENOMEM. */ - if (!old_mlock && uap->how & MCL_CURRENT) { - if (map->size > lim_cur(td, RLIMIT_MEMLOCK)) + if (!old_mlock && (uap->how & MCL_CURRENT) != 0) { + if (map->size > lim_cur(td, RLIMIT_MEMLOCK) || + vm_cnt.v_user_wire_count + atop(map->size) > + vm_page_max_user_wired) return (ENOMEM); } #ifdef RACCT - if (racct_enable) { + if (racct_enable && (uap->how & MCL_CURRENT) != 0) { PROC_LOCK(td->td_proc); error = racct_set(td->td_proc, RACCT_MEMLOCK, map->size); PROC_UNLOCK(td->td_proc); @@ -1457,7 +1459,12 @@ RACCT_PROC_UNLOCK(td->td_proc); return (ENOMEM); } - if (!old_mlock && map->flags & MAP_WIREFUTURE) { + if (!old_mlock && (map->flags & MAP_WIREFUTURE) != 0) { + if (atop(size) + vm_cnt.v_user_wire_count > + vm_page_max_user_wired) { + RACCT_PROC_UNLOCK(td->td_proc); + return (ENOMEM); + } if (ptoa(pmap_wired_count(map->pmap)) + size > lim_cur(td, RLIMIT_MEMLOCK)) { racct_set_force(td->td_proc, RACCT_VMEM, Index: sys/vm/vm_object.c =================================================================== --- sys/vm/vm_object.c +++ sys/vm/vm_object.c @@ -2293,6 +2293,8 @@ vm_page_busy_sleep(tm, "unwbo", true); goto again; } + if (vm_page_user_wired(tm)) + vm_page_unwire_user(tm); vm_page_unwire(tm, queue); vm_page_unlock(tm); next_page: Index: sys/vm/vm_page.h =================================================================== --- sys/vm/vm_page.h +++ sys/vm/vm_page.h @@ -378,6 +378,7 @@ * Page flags. If changed at any other time than page allocation or * freeing, the modification must be protected by the vm_page lock. */ +#define PG_USER_WIRED 0x0001 /* page is wired by a user mapping */ #define PG_FICTITIOUS 0x0004 /* physical page doesn't exist */ #define PG_ZERO 0x0008 /* page is zeroed */ #define PG_MARKER 0x0010 /* special queue marker page */ @@ -575,8 +576,10 @@ void vm_page_unswappable(vm_page_t m); bool vm_page_unwire(vm_page_t m, uint8_t queue); bool vm_page_unwire_noq(vm_page_t m); +void vm_page_unwire_user(vm_page_t m); void vm_page_updatefake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr); -void vm_page_wire (vm_page_t); +void vm_page_wire(vm_page_t m); +void vm_page_wire_user(vm_page_t m); void vm_page_xunbusy_hard(vm_page_t m); void vm_page_xunbusy_maybelocked(vm_page_t m); void vm_page_set_validclean (vm_page_t, int, int); @@ -806,6 +809,13 @@ return (queue == PQ_LAUNDRY || queue == PQ_UNSWAPPABLE); } +static inline bool +vm_page_user_wired(vm_page_t m) +{ + + return ((m->flags & PG_USER_WIRED) != 0); +} + /* * vm_page_held: * Index: sys/vm/vm_page.c =================================================================== --- sys/vm/vm_page.c +++ sys/vm/vm_page.c @@ -3581,6 +3581,30 @@ KASSERT(m->wire_count != 0, ("vm_page_wire: wire_count overflow m=%p", m)); } +/* + * vm_page_wire_user: + * + * Mark the page as being wired by a user mapping. The flag is unset once the + * last managed, wired mapping of the page is removed. + * + * The page must be locked. + */ +void +vm_page_wire_user(vm_page_t m) +{ + + vm_page_assert_locked(m); + KASSERT(m->wire_count > 0, + ("vm_page_wire_user: page %p has wire_count 0", m)); + + if ((m->flags & PG_FICTITIOUS) != 0 || vm_page_user_wired(m) || + (m->oflags & VPO_UNMANAGED) != 0) + return; + + m->flags |= PG_USER_WIRED; + atomic_add_int(&vm_cnt.v_user_wire_count, 1); +} + /* * vm_page_unwire: * @@ -3659,6 +3683,34 @@ return (false); } +/* + * vm_page_unwire_user: + * + * Test whether the page has any wired mappings remaining and update state + * accordingly if not. + * + * The page must be locked. + */ +void +vm_page_unwire_user(vm_page_t m) +{ + + vm_page_assert_locked(m); + KASSERT(m->wire_count > 0, + ("vm_page_unwire_user: page %p has wire count 0", m)); + + if ((m->flags & PG_FICTITIOUS) != 0 || (m->oflags & VPO_UNMANAGED) != 0) + return; + + KASSERT(vm_page_user_wired(m), + ("vm_page_unwire_user: page %p is not user-wired", m)); + + if (pmap_page_wired_mappings(m) == 0) { + atomic_add_int(&vm_cnt.v_user_wire_count, -1); + m->flags &= ~PG_USER_WIRED; + } +} + /* * Move the specified page to the tail of the inactive queue, or requeue * the page if it is already in the inactive queue. @@ -4456,6 +4508,7 @@ db_printf("vm_cnt.v_active_count: %d\n", vm_active_count()); db_printf("vm_cnt.v_laundry_count: %d\n", vm_laundry_count()); db_printf("vm_cnt.v_wire_count: %d\n", vm_wire_count()); + db_printf("vm_cnt.v_user_wire_count: %d\n", vm_cnt.v_user_wire_count); db_printf("vm_cnt.v_free_reserved: %d\n", vm_cnt.v_free_reserved); db_printf("vm_cnt.v_free_min: %d\n", vm_cnt.v_free_min); db_printf("vm_cnt.v_free_target: %d\n", vm_cnt.v_free_target); Index: sys/vm/vm_pageout.h =================================================================== --- sys/vm/vm_pageout.h +++ sys/vm/vm_pageout.h @@ -75,7 +75,7 @@ * Exported data structures. */ -extern int vm_page_max_wired; +extern int vm_page_max_user_wired; extern int vm_pageout_page_count; #define VM_OOM_MEM 1 Index: sys/vm/vm_pageout.c =================================================================== --- sys/vm/vm_pageout.c +++ sys/vm/vm_pageout.c @@ -194,9 +194,10 @@ int vm_pageout_page_count = 32; -int vm_page_max_wired; /* XXX max # of wired pages system-wide */ -SYSCTL_INT(_vm, OID_AUTO, max_wired, - CTLFLAG_RW, &vm_page_max_wired, 0, "System-wide limit to wired page count"); +int vm_page_max_user_wired; +SYSCTL_INT(_vm, OID_AUTO, max_user_wired, + CTLFLAG_RW, &vm_page_max_user_wired, 0, + "System-wide limit to user-wired page count"); static u_int isqrt(u_int num); static int vm_pageout_launder(struct vm_domain *vmd, int launder, @@ -2031,8 +2032,8 @@ if (vm_pageout_update_period == 0) vm_pageout_update_period = 600; - if (vm_page_max_wired == 0) - vm_page_max_wired = freecount / 3; + if (vm_page_max_user_wired == 0) + vm_page_max_user_wired = freecount / 3; } /* Index: sys/vm/vm_unix.c =================================================================== --- sys/vm/vm_unix.c +++ sys/vm/vm_unix.c @@ -55,6 +55,7 @@ #include #include #include +#include #if defined(__amd64__) || defined(__i386__) /* for i386_read_exec */ #include #endif @@ -63,6 +64,7 @@ #include #include #include +#include #ifndef _SYS_SYSPROTO_H_ struct break_args { @@ -135,7 +137,8 @@ if (new > old) { if (!old_mlock && map->flags & MAP_WIREFUTURE) { if (ptoa(pmap_wired_count(map->pmap)) + - (new - old) > lmemlim) { + (new - old) > lmemlim || atop(new - old) + + vm_cnt.v_user_wire_count > vm_page_max_user_wired) { error = ENOMEM; goto done; } Index: usr.bin/vmstat/vmstat.c =================================================================== --- usr.bin/vmstat/vmstat.c +++ usr.bin/vmstat/vmstat.c @@ -156,6 +156,7 @@ u_int v_free_min; u_int v_free_count; u_int v_wire_count; + u_int v_user_wire_count; u_int v_active_count; u_int v_inactive_target; u_int v_inactive_count; @@ -566,6 +567,7 @@ GET_VM_STATS(vm, v_free_min); GET_VM_STATS(vm, v_free_count); GET_VM_STATS(vm, v_wire_count); + GET_VM_STATS(vm, v_user_wire_count); GET_VM_STATS(vm, v_active_count); GET_VM_STATS(vm, v_inactive_target); GET_VM_STATS(vm, v_inactive_count); @@ -1057,6 +1059,8 @@ sum.v_laundry_count); xo_emit("{:wired-pages/%9u} {N:pages wired down}\n", sum.v_wire_count); + xo_emit("{:user-wired-pages/%9u} {N:pages wired down by user mappings}\n", + sum.v_user_wire_count); xo_emit("{:free-pages/%9u} {N:pages free}\n", sum.v_free_count); xo_emit("{:bytes-per-page/%9u} {N:bytes per page}\n", sum.v_page_size);