Index: sys/kern/kern_resource.c =================================================================== --- sys/kern/kern_resource.c +++ sys/kern/kern_resource.c @@ -1276,7 +1276,6 @@ racct_create(&new_uip->ui_racct); refcount_init(&new_uip->ui_ref, 1); new_uip->ui_uid = uid; - mtx_init(&new_uip->ui_vmsize_mtx, "ui_vmsize", NULL, MTX_DEF); rw_wlock(&uihashtbl_lock); /* @@ -1291,7 +1290,6 @@ } else { rw_wunlock(&uihashtbl_lock); racct_destroy(&new_uip->ui_racct); - mtx_destroy(&new_uip->ui_vmsize_mtx); free(new_uip, M_UIDINFO); } return (uip); @@ -1352,7 +1350,6 @@ if (uip->ui_vmsize != 0) printf("freeing uidinfo: uid = %d, swapuse = %lld\n", uip->ui_uid, (unsigned long long)uip->ui_vmsize); - mtx_destroy(&uip->ui_vmsize_mtx); free(uip, M_UIDINFO); } Index: sys/sys/resourcevar.h =================================================================== --- sys/sys/resourcevar.h +++ sys/sys/resourcevar.h @@ -93,12 +93,10 @@ * (a) Constant from inception * (b) Lockless, updated using atomics * (c) Locked by global uihashtbl_lock - * (d) Locked by the ui_vmsize_mtx */ struct uidinfo { LIST_ENTRY(uidinfo) ui_hash; /* (c) hash chain of uidinfos */ - struct mtx ui_vmsize_mtx; - vm_ooffset_t ui_vmsize; /* (d) swap reservation by uid */ + unsigned long ui_vmsize; /* (b) pages of swap reservation by uid */ long ui_sbsize; /* (b) socket buffer space consumed */ long ui_proccnt; /* (b) number of processes */ long ui_ptscnt; /* (b) number of pseudo-terminals */ Index: sys/vm/swap_pager.c =================================================================== --- sys/vm/swap_pager.c +++ sys/vm/swap_pager.c @@ -151,12 +151,17 @@ int swap_pager_avail; static struct sx swdev_syscall_lock; /* serialize swap(on|off) */ -static vm_ooffset_t swap_total; -SYSCTL_QUAD(_vm, OID_AUTO, swap_total, CTLFLAG_RD, &swap_total, 0, - "Total amount of available swap storage."); -static vm_ooffset_t swap_reserved; -SYSCTL_QUAD(_vm, OID_AUTO, swap_reserved, CTLFLAG_RD, &swap_reserved, 0, +static unsigned long swap_reserved; +static unsigned long swap_total; +static int sysctl_swap_reserved(SYSCTL_HANDLER_ARGS); +SYSCTL_PROC(_vm, OID_AUTO, swap_reserved, CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE, + NULL, 0, sysctl_swap_reserved, "A", "Amount of swap storage needed to back all allocated anonymous memory."); +static int sysctl_swap_total(SYSCTL_HANDLER_ARGS); +SYSCTL_PROC(_vm, OID_AUTO, swap_total, CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE, + NULL, 0, sysctl_swap_total, "A", + "Total amount of available swap storage."); + static int overcommit = 0; SYSCTL_INT(_vm, VM_OVERCOMMIT, overcommit, CTLFLAG_RW, &overcommit, 0, "Configure virtual memory overcommit behavior. See tuning(7) " @@ -173,6 +178,24 @@ #define SWAP_RESERVE_RLIMIT_ON (1 << 1) #define SWAP_RESERVE_ALLOW_NONWIRED (1 << 2) +static int +sysctl_swap_total(SYSCTL_HANDLER_ARGS) +{ + uint64_t val; + + val = swap_total << PAGE_SHIFT; + return (sysctl_handle_64(oidp, &val, 0, req)); +} + +static int +sysctl_swap_reserved(SYSCTL_HANDLER_ARGS) +{ + uint64_t val; + + val = swap_reserved << PAGE_SHIFT; + return (sysctl_handle_64(oidp, &val, 0, req)); +} + int swap_reserve(vm_ooffset_t incr) { @@ -184,6 +207,7 @@ swap_reserve_by_cred(vm_ooffset_t incr, struct ucred *cred) { vm_ooffset_t r, s; + unsigned long prev; int res, error; static int curfail; static struct timeval lastfail; @@ -191,9 +215,8 @@ uip = cred->cr_ruidinfo; - if (incr & PAGE_MASK) - panic("swap_reserve: & PAGE_MASK"); - + KASSERT((incr & PAGE_MASK) == 0, ("swap_reserve: decr: %ju & PAGE_MASK", (uintmax_t)incr)); + #ifdef RACCT if (racct_enable) { PROC_LOCK(curproc); @@ -204,9 +227,10 @@ } #endif + incr >>= PAGE_SHIFT; res = 0; - mtx_lock(&sw_dev_mtx); - r = swap_reserved + incr; + prev = atomic_fetchadd_long(&swap_reserved, incr); + r = prev + incr; if (overcommit & SWAP_RESERVE_ALLOW_NONWIRED) { s = vm_cnt.v_page_count - vm_cnt.v_free_reserved - vm_wire_count(); @@ -217,34 +241,27 @@ if ((overcommit & SWAP_RESERVE_FORCE_ON) == 0 || r <= s || (error = priv_check(curthread, PRIV_VM_SWAP_NOQUOTA)) == 0) { res = 1; - swap_reserved = r; - } - mtx_unlock(&sw_dev_mtx); + } else + atomic_subtract_long(&swap_reserved, incr); if (res) { - UIDINFO_VMSIZE_LOCK(uip); + prev = atomic_fetchadd_long(&uip->ui_vmsize, incr); if ((overcommit & SWAP_RESERVE_RLIMIT_ON) != 0 && - uip->ui_vmsize + incr > lim_cur(curthread, RLIMIT_SWAP) && - priv_check(curthread, PRIV_VM_SWAP_NORLIMIT)) + prev + incr > lim_cur(curthread, RLIMIT_SWAP) && + priv_check(curthread, PRIV_VM_SWAP_NORLIMIT)) { res = 0; - else - uip->ui_vmsize += incr; - UIDINFO_VMSIZE_UNLOCK(uip); - if (!res) { - mtx_lock(&sw_dev_mtx); - swap_reserved -= incr; - mtx_unlock(&sw_dev_mtx); + atomic_subtract_long(&uip->ui_vmsize, incr); } } if (!res && ppsratecheck(&lastfail, &curfail, 1)) { printf("uid %d, pid %d: swap reservation for %jd bytes failed\n", - uip->ui_uid, curproc->p_pid, incr); + uip->ui_uid, curproc->p_pid, (uintmax_t)incr); } #ifdef RACCT - if (!res) { + if (racct_enable && !res) { PROC_LOCK(curproc); - racct_sub(curproc, RACCT_SWAP, incr); + racct_sub(curproc, RACCT_SWAP, incr << PAGE_SHIFT); PROC_UNLOCK(curproc); } #endif @@ -257,21 +274,20 @@ { struct uidinfo *uip; - mtx_lock(&sw_dev_mtx); - swap_reserved += incr; - mtx_unlock(&sw_dev_mtx); + KASSERT((incr & PAGE_MASK) == 0, ("swap_reserve: decr: %ju & PAGE_MASK", (uintmax_t)incr)); #ifdef RACCT - PROC_LOCK(curproc); - racct_add_force(curproc, RACCT_SWAP, incr); - PROC_UNLOCK(curproc); + if (racct_enable) { + PROC_LOCK(curproc); + racct_add_force(curproc, RACCT_SWAP, incr); + PROC_UNLOCK(curproc); + } #endif - - uip = curthread->td_ucred->cr_ruidinfo; + incr >>= PAGE_SHIFT; + atomic_add_long(&swap_reserved, incr); PROC_LOCK(curproc); - UIDINFO_VMSIZE_LOCK(uip); - uip->ui_vmsize += incr; - UIDINFO_VMSIZE_UNLOCK(uip); + uip = curproc->p_ucred->cr_ruidinfo; + atomic_add_long(&uip->ui_vmsize, incr); PROC_UNLOCK(curproc); } @@ -281,7 +297,7 @@ struct ucred *cred; PROC_LOCK(curproc); - cred = curthread->td_ucred; + cred = curproc->p_ucred; swap_release_by_cred(decr, cred); PROC_UNLOCK(curproc); } @@ -289,26 +305,23 @@ void swap_release_by_cred(vm_ooffset_t decr, struct ucred *cred) { + unsigned long prev; struct uidinfo *uip; uip = cred->cr_ruidinfo; - if (decr & PAGE_MASK) - panic("swap_release: & PAGE_MASK"); + KASSERT((decr & PAGE_MASK) == 0, ("swap_reserve: decr: %ju & PAGE_MASK", (uintmax_t)decr)); - mtx_lock(&sw_dev_mtx); - if (swap_reserved < decr) + decr >>= PAGE_SHIFT; + prev = atomic_fetchadd_long(&swap_reserved, -decr); + if (prev < decr) panic("swap_reserved < decr"); - swap_reserved -= decr; - mtx_unlock(&sw_dev_mtx); - UIDINFO_VMSIZE_LOCK(uip); - if (uip->ui_vmsize < decr) + prev = atomic_fetchadd_long(&uip->ui_vmsize, -decr); + if (prev < decr) printf("negative vmsize for uid = %d\n", uip->ui_uid); - uip->ui_vmsize -= decr; - UIDINFO_VMSIZE_UNLOCK(uip); - - racct_sub_cred(cred, RACCT_SWAP, decr); + if (racct_enable) + racct_sub_cred(cred, RACCT_SWAP, decr << PAGE_SHIFT); } #define SWM_POP 0x01 /* pop out */ @@ -2176,7 +2189,7 @@ { unsigned long maxpages, npages; - npages = swap_total / PAGE_SIZE; + npages = swap_total; /* absolute maximum we can handle assuming 100% efficiency */ maxpages = uma_zone_get_max(swblk_zone) * SWAP_META_PAGES; @@ -2254,7 +2267,7 @@ TAILQ_INSERT_TAIL(&swtailq, sp, sw_list); nswapdev++; swap_pager_avail += nblks - 2; - swap_total += (vm_ooffset_t)nblks * PAGE_SIZE; + swap_total += nblks; swapon_check_swzone(); swp_sizecheck(); mtx_unlock(&sw_dev_mtx); @@ -2351,7 +2364,7 @@ mtx_lock(&sw_dev_mtx); sp->sw_flags |= SW_CLOSING; swap_pager_avail -= blist_fill(sp->sw_blist, 0, nblks); - swap_total -= (vm_ooffset_t)nblks * PAGE_SIZE; + swap_total -= nblks; mtx_unlock(&sw_dev_mtx); /*