diff --git a/contrib/jemalloc/src/pages.c b/contrib/jemalloc/src/pages.c --- a/contrib/jemalloc/src/pages.c +++ b/contrib/jemalloc/src/pages.c @@ -12,6 +12,7 @@ #include #ifdef __FreeBSD__ #include +#include #endif #endif @@ -437,9 +438,14 @@ #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT static bool os_overcommits_sysctl(void) { - int vm_overcommit; + int vm_overcommit, bsdflags; size_t sz; +#ifdef ELF_BSDF_VMNOOVERCOMMIT + if (_elf_aux_info(AT_BSDFLAGS, &bsdflags, sizeof(bsdflags)) == 0) + return ((bsdflags & ELF_BSDF_VMNOOVERCOMMIT) == 0); +#endif + sz = sizeof(vm_overcommit); #if defined(__FreeBSD__) && defined(VM_OVERCOMMIT) int mib[2]; diff --git a/lib/libc/gen/auxv.c b/lib/libc/gen/auxv.c --- a/lib/libc/gen/auxv.c +++ b/lib/libc/gen/auxv.c @@ -73,6 +73,7 @@ static void *ps_strings, *timekeep; static u_long hwcap, hwcap2; static void *fxrng_seed_version; +static u_long usrstackbase, usrstacklim; #ifdef __powerpc__ static int powerpc_new_auxv_format = 0; @@ -144,6 +145,14 @@ case AT_FXRNG: fxrng_seed_version = aux->a_un.a_ptr; break; + + case AT_USRSTACKBASE: + usrstackbase = aux->a_un.a_val; + break; + + case AT_USRSTACKLIM: + usrstacklim = aux->a_un.a_val; + break; #ifdef __powerpc__ /* * Since AT_STACKPROT is always set, and the common @@ -370,6 +379,20 @@ } else res = EINVAL; break; + case AT_USRSTACKBASE: + if (buflen == sizeof(u_long)) { + *(u_long *)buf = usrstackbase; + res = 0; + } else + res = EINVAL; + break; + case AT_USRSTACKLIM: + if (buflen == sizeof(u_long)) { + *(u_long *)buf = usrstacklim; + res = 0; + } else + res = EINVAL; + break; default: res = ENOENT; break; diff --git a/lib/libc/gen/elf_utils.c b/lib/libc/gen/elf_utils.c --- a/lib/libc/gen/elf_utils.c +++ b/lib/libc/gen/elf_utils.c @@ -28,7 +28,8 @@ * $FreeBSD$ */ -#include +#include +#include #include #include #include @@ -77,19 +78,23 @@ { int mib[2]; struct rlimit rlim; - u_long usrstack; + u_long usrstack, stacksz; size_t len; - mib[0] = CTL_KERN; - mib[1] = KERN_USRSTACK; - len = sizeof(usrstack); - if (sysctl(mib, sizeof(mib) / sizeof(mib[0]), &usrstack, &len, NULL, 0) - == -1) - return; - if (getrlimit(RLIMIT_STACK, &rlim) == -1) - return; - mprotect((void *)(uintptr_t)(usrstack - rlim.rlim_cur), - rlim.rlim_cur, _rtld_get_stack_prot()); + if (_elf_aux_info(AT_USRSTACKBASE, &usrstack, sizeof(usrstack)) != 0) { + mib[0] = CTL_KERN; + mib[1] = KERN_USRSTACK; + len = sizeof(usrstack); + if (sysctl(mib, nitems(mib), &usrstack, &len, NULL, 0) == -1) + return; + } + if (_elf_aux_info(AT_USRSTACKLIM, &stacksz, sizeof(stacksz)) != 0) { + if (getrlimit(RLIMIT_STACK, &rlim) == -1) + return; + stacksz = rlim.rlim_cur; + } + mprotect((void *)(uintptr_t)(usrstack - stacksz), stacksz, + _rtld_get_stack_prot()); } #pragma weak __pthread_map_stacks_exec diff --git a/lib/libthr/thread/thr_stack.c b/lib/libthr/thread/thr_stack.c --- a/lib/libthr/thread/thr_stack.c +++ b/lib/libthr/thread/thr_stack.c @@ -30,7 +30,8 @@ #include __FBSDID("$FreeBSD$"); -#include +#include +#include #include #include #include @@ -149,19 +150,26 @@ { int mib[2]; struct rlimit rlim; - u_long usrstack; + u_long usrstack, stacksz; size_t len; - mib[0] = CTL_KERN; - mib[1] = KERN_USRSTACK; - len = sizeof(usrstack); - if (sysctl(mib, sizeof(mib) / sizeof(mib[0]), &usrstack, &len, NULL, 0) - == -1) - return; - if (getrlimit(RLIMIT_STACK, &rlim) == -1) + if (elf_aux_info(AT_USRSTACKBASE, &usrstack, sizeof(usrstack)) != 0) { + mib[0] = CTL_KERN; + mib[1] = KERN_USRSTACK; + len = sizeof(usrstack); + if (sysctl(mib, nitems(mib), &usrstack, &len, NULL, 0) == -1) + return; + } + if (elf_aux_info(AT_USRSTACKLIM, &len, sizeof(len)) != 0 && + getrlimit(RLIMIT_STACK, &rlim) == -1) return; - mprotect((void *)(uintptr_t)(usrstack - rlim.rlim_cur), - rlim.rlim_cur, _rtld_get_stack_prot()); + if (elf_aux_info(AT_USRSTACKLIM, &stacksz, sizeof(stacksz)) != 0) { + if (getrlimit(RLIMIT_STACK, &rlim) == -1) + return; + stacksz = rlim.rlim_cur; + } + mprotect((void *)(uintptr_t)(usrstack - stacksz), stacksz, + _rtld_get_stack_prot()); } void diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c --- a/sys/kern/imgact_elf.c +++ b/sys/kern/imgact_elf.c @@ -1448,7 +1448,8 @@ Elf_Auxargs *args = (Elf_Auxargs *)imgp->auxargs; Elf_Auxinfo *argarray, *pos; struct vmspace *vmspace; - int error; + rlim_t stacksz; + int error, bsdflags, oc; argarray = pos = malloc(AT_COUNT * sizeof(*pos), M_TEMP, M_WAITOK | M_ZERO); @@ -1489,8 +1490,12 @@ AUXARGS_ENTRY(pos, AT_HWCAP, *imgp->sysent->sv_hwcap); if (imgp->sysent->sv_hwcap2 != NULL) AUXARGS_ENTRY(pos, AT_HWCAP2, *imgp->sysent->sv_hwcap2); - AUXARGS_ENTRY(pos, AT_BSDFLAGS, __elfN(sigfastblock) ? - ELF_BSDF_SIGFASTBLK : 0); + bsdflags = 0; + bsdflags |= __elfN(sigfastblock) ? ELF_BSDF_SIGFASTBLK : 0; + oc = atomic_load_int(&vm_overcommit); + bsdflags |= (oc & (SWAP_RESERVE_FORCE_ON | SWAP_RESERVE_RLIMIT_ON)) != + 0 ? ELF_BSDF_VMNOOVERCOMMIT : 0; + AUXARGS_ENTRY(pos, AT_BSDFLAGS, bsdflags); AUXARGS_ENTRY(pos, AT_ARGC, imgp->args->argc); AUXARGS_ENTRY_PTR(pos, AT_ARGV, imgp->argv); AUXARGS_ENTRY(pos, AT_ENVC, imgp->args->envc); @@ -1506,6 +1511,11 @@ AUXARGS_ENTRY(pos, AT_KPRELOAD, vmspace->vm_shp_base + imgp->sysent->sv_vdso_offset); } + AUXARGS_ENTRY(pos, AT_USRSTACKBASE, round_page(vmspace->vm_stacktop)); + PROC_LOCK(imgp->proc); + stacksz = lim_cur_proc(imgp->proc, RLIMIT_STACK); + PROC_UNLOCK(imgp->proc); + AUXARGS_ENTRY(pos, AT_USRSTACKLIM, stacksz); AUXARGS_ENTRY(pos, AT_NULL, 0); free(imgp->auxargs, M_TEMP); diff --git a/sys/sys/elf_common.h b/sys/sys/elf_common.h --- a/sys/sys/elf_common.h +++ b/sys/sys/elf_common.h @@ -986,8 +986,10 @@ #define AT_PS_STRINGS 32 /* struct ps_strings */ #define AT_FXRNG 33 /* Pointer to root RNG seed version. */ #define AT_KPRELOAD 34 /* Base of vdso, preloaded by rtld */ +#define AT_USRSTACKBASE 35 +#define AT_USRSTACKLIM 36 -#define AT_COUNT 35 /* Count of defined aux entry types. */ +#define AT_COUNT 37 /* Count of defined aux entry types. */ /* * Relocation types. @@ -1501,5 +1503,6 @@ #define R_X86_64_REX_GOTPCRELX 42 #define ELF_BSDF_SIGFASTBLK 0x0001 /* Kernel supports fast sigblock */ +#define ELF_BSDF_VMNOOVERCOMMIT 0x0002 #endif /* !_SYS_ELF_COMMON_H_ */ diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -169,8 +169,8 @@ &swap_total, 0, sysctl_page_shift, "QU", "Total amount of available swap storage."); -static int overcommit = 0; -SYSCTL_INT(_vm, VM_OVERCOMMIT, overcommit, CTLFLAG_RW, &overcommit, 0, +int vm_overcommit = 0; +SYSCTL_INT(_vm, VM_OVERCOMMIT, overcommit, CTLFLAG_RW, &vm_overcommit, 0, "Configure virtual memory overcommit behavior. See tuning(7) " "for details."); static unsigned long swzone; @@ -190,11 +190,6 @@ CTLFLAG_RD, &swap_free_completed, "Number of deferred frees completed"); -/* bits from overcommit */ -#define SWAP_RESERVE_FORCE_ON (1 << 0) -#define SWAP_RESERVE_RLIMIT_ON (1 << 1) -#define SWAP_RESERVE_ALLOW_NONWIRED (1 << 2) - static int sysctl_page_shift(SYSCTL_HANDLER_ARGS) { @@ -286,7 +281,7 @@ prev = atomic_fetchadd_long(&swap_reserved, pincr); r = prev + pincr; s = swap_total; - oc = atomic_load_int(&overcommit); + oc = atomic_load_int(&vm_overcommit); if (r > s && (oc & SWAP_RESERVE_ALLOW_NONWIRED) != 0) { s += vm_cnt.v_page_count - vm_cnt.v_free_reserved - vm_wire_count(); diff --git a/sys/vm/vm.h b/sys/vm/vm.h --- a/sys/vm/vm.h +++ b/sys/vm/vm.h @@ -165,6 +165,12 @@ extern int vm_ndomains; +/* bits from overcommit */ +#define SWAP_RESERVE_FORCE_ON (1 << 0) +#define SWAP_RESERVE_RLIMIT_ON (1 << 1) +#define SWAP_RESERVE_ALLOW_NONWIRED (1 << 2) +extern int vm_overcommit; + #ifdef _KERNEL struct ucred; bool swap_reserve(vm_ooffset_t incr);