Index: head/sys/kern/kern_resource.c =================================================================== --- head/sys/kern/kern_resource.c +++ head/sys/kern/kern_resource.c @@ -1276,7 +1276,6 @@ racct_create(&new_uip->ui_racct); refcount_init(&new_uip->ui_ref, 1); new_uip->ui_uid = uid; - mtx_init(&new_uip->ui_vmsize_mtx, "ui_vmsize", NULL, MTX_DEF); rw_wlock(&uihashtbl_lock); /* @@ -1291,7 +1290,6 @@ } else { rw_wunlock(&uihashtbl_lock); racct_destroy(&new_uip->ui_racct); - mtx_destroy(&new_uip->ui_vmsize_mtx); free(new_uip, M_UIDINFO); } return (uip); @@ -1352,7 +1350,6 @@ if (uip->ui_vmsize != 0) printf("freeing uidinfo: uid = %d, swapuse = %lld\n", uip->ui_uid, (unsigned long long)uip->ui_vmsize); - mtx_destroy(&uip->ui_vmsize_mtx); free(uip, M_UIDINFO); } Index: head/sys/sys/resourcevar.h =================================================================== --- head/sys/sys/resourcevar.h +++ head/sys/sys/resourcevar.h @@ -93,12 +93,10 @@ * (a) Constant from inception * (b) Lockless, updated using atomics * (c) Locked by global uihashtbl_lock - * (d) Locked by the ui_vmsize_mtx */ struct uidinfo { LIST_ENTRY(uidinfo) ui_hash; /* (c) hash chain of uidinfos */ - struct mtx ui_vmsize_mtx; - vm_ooffset_t ui_vmsize; /* (d) swap reservation by uid */ + u_long ui_vmsize; /* (b) pages of swap reservation by uid */ long ui_sbsize; /* (b) socket buffer space consumed */ long ui_proccnt; /* (b) number of processes */ long ui_ptscnt; /* (b) number of pseudo-terminals */ Index: head/sys/vm/swap_pager.c =================================================================== --- head/sys/vm/swap_pager.c +++ head/sys/vm/swap_pager.c @@ -151,12 +151,16 @@ int swap_pager_avail; static struct sx swdev_syscall_lock; /* serialize swap(on|off) */ -static vm_ooffset_t swap_total; -SYSCTL_QUAD(_vm, OID_AUTO, swap_total, CTLFLAG_RD, &swap_total, 0, - "Total amount of available swap storage."); -static vm_ooffset_t swap_reserved; -SYSCTL_QUAD(_vm, OID_AUTO, swap_reserved, CTLFLAG_RD, &swap_reserved, 0, +static u_long swap_reserved; +static u_long swap_total; +static int sysctl_page_shift(SYSCTL_HANDLER_ARGS); +SYSCTL_PROC(_vm, OID_AUTO, swap_reserved, CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE, + &swap_reserved, 0, sysctl_page_shift, "A", "Amount of swap storage needed to back all allocated anonymous memory."); +SYSCTL_PROC(_vm, OID_AUTO, swap_total, CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE, + &swap_total, 0, sysctl_page_shift, "A", + "Total amount of available swap storage."); + static int overcommit = 0; SYSCTL_INT(_vm, VM_OVERCOMMIT, overcommit, CTLFLAG_RW, &overcommit, 0, "Configure virtual memory overcommit behavior. See tuning(7) " @@ -173,6 +177,16 @@ #define SWAP_RESERVE_RLIMIT_ON (1 << 1) #define SWAP_RESERVE_ALLOW_NONWIRED (1 << 2) +static int +sysctl_page_shift(SYSCTL_HANDLER_ARGS) +{ + uint64_t newval; + u_long value = *(u_long *)arg1; + + newval = ((uint64_t)value) << PAGE_SHIFT; + return (sysctl_handle_64(oidp, &newval, 0, req)); +} + int swap_reserve(vm_ooffset_t incr) { @@ -183,7 +197,7 @@ int swap_reserve_by_cred(vm_ooffset_t incr, struct ucred *cred) { - vm_ooffset_t r, s; + u_long r, s, prev, pincr; int res, error; static int curfail; static struct timeval lastfail; @@ -191,8 +205,8 @@ uip = cred->cr_ruidinfo; - if (incr & PAGE_MASK) - panic("swap_reserve: & PAGE_MASK"); + KASSERT((incr & PAGE_MASK) == 0, ("%s: incr: %ju & PAGE_MASK", __func__, + (uintmax_t)incr)); #ifdef RACCT if (racct_enable) { @@ -204,36 +218,33 @@ } #endif + pincr = atop(incr); res = 0; - mtx_lock(&sw_dev_mtx); - r = swap_reserved + incr; + prev = atomic_fetchadd_long(&swap_reserved, pincr); + r = prev + pincr; if (overcommit & SWAP_RESERVE_ALLOW_NONWIRED) { s = vm_cnt.v_page_count - vm_cnt.v_free_reserved - vm_wire_count(); - s *= PAGE_SIZE; } else s = 0; s += swap_total; if ((overcommit & SWAP_RESERVE_FORCE_ON) == 0 || r <= s || (error = priv_check(curthread, PRIV_VM_SWAP_NOQUOTA)) == 0) { res = 1; - swap_reserved = r; + } else { + prev = atomic_fetchadd_long(&swap_reserved, -pincr); + if (prev < pincr) + panic("swap_reserved < incr on overcommit fail"); } - mtx_unlock(&sw_dev_mtx); - if (res) { - UIDINFO_VMSIZE_LOCK(uip); + prev = atomic_fetchadd_long(&uip->ui_vmsize, pincr); if ((overcommit & SWAP_RESERVE_RLIMIT_ON) != 0 && - uip->ui_vmsize + incr > lim_cur(curthread, RLIMIT_SWAP) && - priv_check(curthread, PRIV_VM_SWAP_NORLIMIT)) + prev + pincr > lim_cur(curthread, RLIMIT_SWAP) && + priv_check(curthread, PRIV_VM_SWAP_NORLIMIT)) { res = 0; - else - uip->ui_vmsize += incr; - UIDINFO_VMSIZE_UNLOCK(uip); - if (!res) { - mtx_lock(&sw_dev_mtx); - swap_reserved -= incr; - mtx_unlock(&sw_dev_mtx); + prev = atomic_fetchadd_long(&uip->ui_vmsize, -pincr); + if (prev < pincr) + panic("uip->ui_vmsize < incr on overcommit fail"); } } if (!res && ppsratecheck(&lastfail, &curfail, 1)) { @@ -242,7 +253,7 @@ } #ifdef RACCT - if (!res) { + if (racct_enable && !res) { PROC_LOCK(curproc); racct_sub(curproc, RACCT_SWAP, incr); PROC_UNLOCK(curproc); @@ -256,22 +267,20 @@ swap_reserve_force(vm_ooffset_t incr) { struct uidinfo *uip; + u_long pincr; - mtx_lock(&sw_dev_mtx); - swap_reserved += incr; - mtx_unlock(&sw_dev_mtx); + KASSERT((incr & PAGE_MASK) == 0, ("%s: incr: %ju & PAGE_MASK", __func__, + (uintmax_t)incr)); -#ifdef RACCT PROC_LOCK(curproc); - racct_add_force(curproc, RACCT_SWAP, incr); - PROC_UNLOCK(curproc); +#ifdef RACCT + if (racct_enable) + racct_add_force(curproc, RACCT_SWAP, incr); #endif - - uip = curthread->td_ucred->cr_ruidinfo; - PROC_LOCK(curproc); - UIDINFO_VMSIZE_LOCK(uip); - uip->ui_vmsize += incr; - UIDINFO_VMSIZE_UNLOCK(uip); + pincr = atop(incr); + atomic_add_long(&swap_reserved, pincr); + uip = curproc->p_ucred->cr_ruidinfo; + atomic_add_long(&uip->ui_vmsize, pincr); PROC_UNLOCK(curproc); } @@ -281,7 +290,7 @@ struct ucred *cred; PROC_LOCK(curproc); - cred = curthread->td_ucred; + cred = curproc->p_ucred; swap_release_by_cred(decr, cred); PROC_UNLOCK(curproc); } @@ -289,26 +298,26 @@ void swap_release_by_cred(vm_ooffset_t decr, struct ucred *cred) { + u_long prev, pdecr; struct uidinfo *uip; uip = cred->cr_ruidinfo; - if (decr & PAGE_MASK) - panic("swap_release: & PAGE_MASK"); + KASSERT((decr & PAGE_MASK) == 0, ("%s: decr: %ju & PAGE_MASK", __func__, + (uintmax_t)decr)); - mtx_lock(&sw_dev_mtx); - if (swap_reserved < decr) + pdecr = atop(decr); + prev = atomic_fetchadd_long(&swap_reserved, -pdecr); + if (prev < pdecr) panic("swap_reserved < decr"); - swap_reserved -= decr; - mtx_unlock(&sw_dev_mtx); - UIDINFO_VMSIZE_LOCK(uip); - if (uip->ui_vmsize < decr) + prev = atomic_fetchadd_long(&uip->ui_vmsize, -pdecr); + if (prev < pdecr) printf("negative vmsize for uid = %d\n", uip->ui_uid); - uip->ui_vmsize -= decr; - UIDINFO_VMSIZE_UNLOCK(uip); - - racct_sub_cred(cred, RACCT_SWAP, decr); +#ifdef RACCT + if (racct_enable) + racct_sub_cred(cred, RACCT_SWAP, decr); +#endif } #define SWM_POP 0x01 /* pop out */ @@ -2176,7 +2185,7 @@ { unsigned long maxpages, npages; - npages = swap_total / PAGE_SIZE; + npages = swap_total; /* absolute maximum we can handle assuming 100% efficiency */ maxpages = uma_zone_get_max(swblk_zone) * SWAP_META_PAGES; @@ -2254,7 +2263,7 @@ TAILQ_INSERT_TAIL(&swtailq, sp, sw_list); nswapdev++; swap_pager_avail += nblks - 2; - swap_total += (vm_ooffset_t)nblks * PAGE_SIZE; + swap_total += nblks; swapon_check_swzone(); swp_sizecheck(); mtx_unlock(&sw_dev_mtx); @@ -2351,7 +2360,7 @@ mtx_lock(&sw_dev_mtx); sp->sw_flags |= SW_CLOSING; swap_pager_avail -= blist_fill(sp->sw_blist, 0, nblks); - swap_total -= (vm_ooffset_t)nblks * PAGE_SIZE; + swap_total -= nblks; mtx_unlock(&sw_dev_mtx); /*