Index: lib/libc/gen/auxv.c =================================================================== --- lib/libc/gen/auxv.c +++ lib/libc/gen/auxv.c @@ -67,7 +67,7 @@ } static pthread_once_t aux_once = PTHREAD_ONCE_INIT; -static int pagesize, osreldate, canary_len, ncpus, pagesizes_len; +static int pagesize, osreldate, canary_len, ncpus, pagesizes_len, bsdflags; static int hwcap_present, hwcap2_present; static char *canary, *pagesizes, *execpath; static void *timekeep; @@ -80,6 +80,10 @@ for (aux = __elf_aux_vector; aux->a_type != AT_NULL; aux++) { switch (aux->a_type) { + case AT_BSDFLAGS: + bsdflags = aux->a_un.a_val; + break; + case AT_CANARY: canary = (char *)(aux->a_un.a_ptr); break; @@ -224,6 +228,13 @@ } else res = EINVAL; break; + case AT_BSDFLAGS: + if (buflen == sizeof(int)) { + *(int *)buf = bsdflags; + res = 0; + } else + res = EINVAL; + break; default: res = ENOENT; break; Index: lib/libc/sys/Makefile.inc =================================================================== --- lib/libc/sys/Makefile.inc +++ lib/libc/sys/Makefile.inc @@ -186,6 +186,7 @@ execve.2 \ _exit.2 \ extattr_get_file.2 \ + fast_sigblock.2 \ fcntl.2 \ ffclock.2 \ fhlink.2 \ Index: lib/libc/sys/Symbol.map =================================================================== --- lib/libc/sys/Symbol.map +++ lib/libc/sys/Symbol.map @@ -567,6 +567,7 @@ __sys_extattr_set_link; _extattrctl; __sys_extattrctl; + __sys_fast_sigblock; _fchdir; __sys_fchdir; _fchflags; Index: lib/libc/sys/fast_sigblock.2 =================================================================== --- /dev/null +++ lib/libc/sys/fast_sigblock.2 @@ -0,0 +1,166 @@ +.\" Copyright (c) 2016 The FreeBSD Foundation, Inc. +.\" +.\" This documentation was written by +.\" Konstantin Belousov under sponsorship +.\" from the FreeBSD Foundation. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd December 13, 2019 +.Dt FAST_SIGBLOCK 2 +.Os +.Sh NAME +.Nm fast_sigblock +.Nd controls signals blocking with a simple memory write +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In sys/signalvar.h +.Ft int +.Fn fast_sigblock "int cmd" "void *ptr" +.Sh DESCRIPTION +.Bf -symbolic +This function is not intended for a direct usage by applications. +The functionality is provided for implementing some optimizations in +.Xr ld-elf.so.1 8 +and +.Lb libthr . +.Ef +.Pp +The function configures the kernel facility that allows a thread to +block asynchronous signals delivery with a single write to userspace +memory, avoiding overhead of system calls like +.Xr sigprocmask 2 +for establishing critical sections. +The C runtime uses it to optimize implementation of async-signal-safe +functionality. +.Pp +A thread might register a +.Dv sigblock +variable of type +.Vt int +as a location which is consulted by kernel when calculating the +blocked signal mask for delivery of asynchronous signals. +If the variable contains non-zero count of blocks (see below), +then kernel effectively operates as if the mask contained all +blockable signals was supplied to +.Xr sigprocmask 2 . +.Pp +The variable is supposed to be modified only from the owning thread, +there is no way to guarantee visibility of update from other thread +to kernel when signals are delivered. +.Pp +Lower bits of the sigblock variable are reserved as flags, +which might be set or cleared by kernel at arbitrary moments. +Userspace code should use +.Xr atomic 9 +operations of incrementing and decrementing by +.Dv FAST_SIGBLOCK_INC +quantity to recursively block or unblock signals delivery. +.Pp +If a signal would be delivered when unmasked, kernel might set the +.Dv FAST_SIGBLOCK_PEND +.Dq pending signal +flag in the sigblock variable. +Userspace should perform +.Dv FAST_SIGBLOCK_UNBLOCK +operation when clearing the variable if it notes the pending signal +bit is set, which would deliver the pending signals immediately. +Otherwise, signals delivery might be postponed. +.Pp +The +.Fa cmd +argument specifies one of the following operations: +.Bl -tag -width FAST_SIGBLOCK_UNSETPTR +.It Dv FAST_SIGBLOCK_SETPTR +Register the variable of type +.Vt int +at location pointed to by the +.Fa ptr +argument as sigblock variable for the calling thread. +.It Dv FAST_SIGBLOCK_UNSETPTR +Unregister the currently registered sigblock location. +Kernel stops inferring the blocked mask from non-zero value of its +blocked count. +New location can be registered after previous one is deregistered. +.It Dv FAST_SIGBLOCK_UNBLOCK +If there are pending signals which should be delivered to the calling +thread, they are delivered before returning from the call. +The sigblock variable should have zero blocking count, and indicate +that the pending signal exists. +Effectively this means that the variable should have the value +.Dv FAST_SIGBLOCK_PEND . +.El +.Sh RETURN VALUES +.Rv -std +.Sh ERRORS +The operation may fail with the following errors: +.Bl -tag -width Er +.It Bq Er EBUSY +The +.Dv FAST_SIGBLOCK_SETPTR +attempted while the sigblock address was already registered. +The +.Dv FAST_SIGBLOCK_UNBLOCK +was called while sigblock variable value is not equal to +.Dv FAST_SIGBLOCK_PEND . +.It Bq Er EINVAL +The variable address passed to +.Dv FAST_SIGBLOCK_SETPTR +is not aligned naturally. +The +.Dv FAST_SIGBLOCK_UNSETPTR +operation was attempted without prior successfull call to +.Dv FAST_SIGBLOCK_SETPTR . +.It Bq Er EFAULT +Attempt to read or write to the sigblock variable failed. +Note that kernel generates the +.Dv SIGSEGV +signal if an attempt to read from the sigblock variable faulted +during implicit accesses from syscall entry. +.El +.Sh SEE ALSO +.Xr kill 2 , +.Xr signal 2 , +.Xr sigprocmask 2 , +.Xr libthr 3 , +.Xr ld-elf.so.1 8 +.Sh STANDARDS +The +.Nm +function is non-standard, although a similar functionality is a common +optimization provided by several other systems. +.Sh HISTORY +The +.Nm +function was introduced in +.Fx 13.0 . +.Sh BUGS +The +.Nm +symbol is currently not exported by libc, on purpose. +Consumers should either use the +.Dv __sys_fast_sigblock +symbol from the private libc namespace, or utilize +.Xr syscall 2 . Index: lib/libthr/thread/thr_create.c =================================================================== --- lib/libthr/thread/thr_create.c +++ lib/libthr/thread/thr_create.c @@ -257,6 +257,7 @@ if (curthread->attr.suspend == THR_CREATE_SUSPENDED) set = curthread->sigmask; + _thr_signal_block_setup(curthread); /* * This is used as a serialization point to allow parent Index: lib/libthr/thread/thr_private.h =================================================================== --- lib/libthr/thread/thr_private.h +++ lib/libthr/thread/thr_private.h @@ -396,6 +396,9 @@ /* Signal blocked counter. */ int sigblock; + /* Fast sigblock var. */ + uint32_t fsigblock; + /* Queue entry for list of all threads. */ TAILQ_ENTRY(pthread) tle; /* link for all threads in process */ @@ -813,6 +816,8 @@ void _thr_testcancel(struct pthread *) __hidden; void _thr_signal_block(struct pthread *) __hidden; void _thr_signal_unblock(struct pthread *) __hidden; +void _thr_signal_block_check_fast(void) __hidden; +void _thr_signal_block_setup(struct pthread *) __hidden; void _thr_signal_init(int) __hidden; void _thr_signal_deinit(void) __hidden; int _thr_send_sig(struct pthread *, int sig) __hidden; Index: lib/libthr/thread/thr_rtld.c =================================================================== --- lib/libthr/thread/thr_rtld.c +++ lib/libthr/thread/thr_rtld.c @@ -236,6 +236,8 @@ _thr_signal_block(curthread); _rtld_thread_init(&li); _thr_signal_unblock(curthread); + _thr_signal_block_check_fast(); + _thr_signal_block_setup(curthread); uc_len = __getcontextx_size(); uc = alloca(uc_len); Index: lib/libthr/thread/thr_sig.c =================================================================== --- lib/libthr/thread/thr_sig.c +++ lib/libthr/thread/thr_sig.c @@ -31,7 +31,8 @@ #include "namespace.h" #include -#include +#include +#include #include #include #include @@ -92,10 +93,9 @@ 0xffffffff, 0xffffffff}}; -void -_thr_signal_block(struct pthread *curthread) +static void +thr_signal_block_slow(struct pthread *curthread) { - if (curthread->sigblock > 0) { curthread->sigblock++; return; @@ -104,13 +104,68 @@ curthread->sigblock++; } -void -_thr_signal_unblock(struct pthread *curthread) +static void +thr_signal_unblock_slow(struct pthread *curthread) { if (--curthread->sigblock == 0) __sys_sigprocmask(SIG_SETMASK, &curthread->sigmask, NULL); } +static void +thr_signal_block_fast(struct pthread *curthread) +{ + atomic_add_32(&curthread->fsigblock, FAST_SIGBLOCK_INC); +} + +static void +thr_signal_unblock_fast(struct pthread *curthread) +{ + uint32_t oldval; + + oldval = atomic_fetchadd_32(&curthread->fsigblock, -FAST_SIGBLOCK_INC); + if (oldval == (FAST_SIGBLOCK_PEND | FAST_SIGBLOCK_INC)) + __sys_fast_sigblock(FAST_SIGBLOCK_UNBLOCK, NULL); +} + +static bool fast_sigblock; + +void +_thr_signal_block(struct pthread *curthread) +{ + if (fast_sigblock) + thr_signal_block_fast(curthread); + else + thr_signal_block_slow(curthread); +} + +void +_thr_signal_unblock(struct pthread *curthread) +{ + if (fast_sigblock) + thr_signal_unblock_fast(curthread); + else + thr_signal_unblock_slow(curthread); +} + +void +_thr_signal_block_check_fast(void) +{ + int bsdflags, error; + + error = elf_aux_info(AT_BSDFLAGS, &bsdflags, sizeof(bsdflags)); + if (error != 0) + return; + fast_sigblock = (bsdflags & ELF_BSDF_FASTSIGBLK) != 0; +} + +void +_thr_signal_block_setup(struct pthread *curthread) +{ + if (!fast_sigblock) + return; + __sys_fast_sigblock(FAST_SIGBLOCK_SETPTR, &curthread->fsigblock); +} + int _thr_send_sig(struct pthread *thread, int sig) { Index: libexec/rtld-elf/rtld-libc/Makefile.inc =================================================================== --- libexec/rtld-elf/rtld-libc/Makefile.inc +++ libexec/rtld-elf/rtld-libc/Makefile.inc @@ -45,8 +45,9 @@ strncpy strrchr strsep strspn strstr strtok # Also use all the syscall .o files from libc_nossp_pic: _libc_other_objects= sigsetjmp lstat stat fstat fstatat fstatfs syscall \ - cerror geteuid getegid munmap mprotect sysarch __sysctl issetugid __getcwd \ - utrace thr_self thr_kill pread mmap lseek _exit _fstat _fstatat _fstatfs \ + cerror geteuid getegid fast_sigblock munmap mprotect \ + sysarch __sysctl issetugid __getcwd utrace \ + thr_self thr_kill pread mmap lseek _exit _fstat _fstatat _fstatfs \ getdirentries _getdirentries _close _fcntl _open _openat _read \ _sigprocmask _write readlink _setjmp setjmp setjmperr Index: libexec/rtld-elf/rtld.h =================================================================== --- libexec/rtld-elf/rtld.h +++ libexec/rtld-elf/rtld.h @@ -365,6 +365,7 @@ extern Elf_Addr _GLOBAL_OFFSET_TABLE_[]; extern Elf_Sym sym_zero; /* For resolving undefined weak refs. */ extern bool ld_bind_not; +extern bool ld_fast_sigblock; void dump_relocations(Obj_Entry *); void dump_obj_relocations(Obj_Entry *); Index: libexec/rtld-elf/rtld.c =================================================================== --- libexec/rtld-elf/rtld.c +++ libexec/rtld-elf/rtld.c @@ -286,6 +286,7 @@ int tls_max_index = 1; /* Largest module index allocated */ static bool ld_library_path_rpath = false; +bool ld_fast_sigblock = false; /* * Globals for path names, and such @@ -444,6 +445,10 @@ main_argc = argc; main_argv = argv; + if (aux_info[AT_BSDFLAGS] != NULL && + (aux_info[AT_BSDFLAGS]->a_un.a_val & ELF_BSDF_FASTSIGBLK) != 0) + ld_fast_sigblock = true; + trust = !issetugid(); md_abi_variant_hook(aux_info); Index: libexec/rtld-elf/rtld_lock.c =================================================================== --- libexec/rtld-elf/rtld_lock.c +++ libexec/rtld-elf/rtld_lock.c @@ -45,6 +45,7 @@ */ #include +#include #include #include #include @@ -68,6 +69,7 @@ static sigset_t fullsigmask, oldsigmask; static int thread_flag, wnested; +static uint32_t fsigblock; static void * def_lock_create(void) @@ -117,6 +119,17 @@ ; /* Spin */ } +static void +sig_fastunblock(void) +{ + uint32_t oldval; + + assert((fsigblock & ~FAST_SIGBLOCK_FLAGS) >= FAST_SIGBLOCK_INC); + oldval = atomic_fetchadd_32(&fsigblock, -FAST_SIGBLOCK_INC); + if (oldval == (FAST_SIGBLOCK_PEND | FAST_SIGBLOCK_INC)) + __sys_fast_sigblock(FAST_SIGBLOCK_UNBLOCK, NULL); +} + static void def_wlock_acquire(void *lock) { @@ -124,14 +137,23 @@ sigset_t tmp_oldsigmask; l = (Lock *)lock; - for (;;) { - sigprocmask(SIG_BLOCK, &fullsigmask, &tmp_oldsigmask); - if (atomic_cmpset_acq_int(&l->lock, 0, WAFLAG)) - break; - sigprocmask(SIG_SETMASK, &tmp_oldsigmask, NULL); + if (ld_fast_sigblock) { + for (;;) { + atomic_add_32(&fsigblock, FAST_SIGBLOCK_INC); + if (atomic_cmpset_acq_int(&l->lock, 0, WAFLAG)) + break; + sig_fastunblock(); + } + } else { + for (;;) { + sigprocmask(SIG_BLOCK, &fullsigmask, &tmp_oldsigmask); + if (atomic_cmpset_acq_int(&l->lock, 0, WAFLAG)) + break; + sigprocmask(SIG_SETMASK, &tmp_oldsigmask, NULL); + } + if (atomic_fetchadd_int(&wnested, 1) == 0) + oldsigmask = tmp_oldsigmask; } - if (atomic_fetchadd_int(&wnested, 1) == 0) - oldsigmask = tmp_oldsigmask; } static void @@ -143,9 +165,10 @@ if ((l->lock & WAFLAG) == 0) atomic_add_rel_int(&l->lock, -RC_INCR); else { - assert(wnested > 0); atomic_add_rel_int(&l->lock, -WAFLAG); - if (atomic_fetchadd_int(&wnested, -1) == 1) + if (ld_fast_sigblock) + sig_fastunblock(); + else if (atomic_fetchadd_int(&wnested, -1) == 1) sigprocmask(SIG_SETMASK, &oldsigmask, NULL); } } @@ -279,38 +302,36 @@ void lockdflt_init(void) { - int i; - - deflockinfo.rtli_version = RTLI_VERSION; - deflockinfo.lock_create = def_lock_create; - deflockinfo.lock_destroy = def_lock_destroy; - deflockinfo.rlock_acquire = def_rlock_acquire; - deflockinfo.wlock_acquire = def_wlock_acquire; - deflockinfo.lock_release = def_lock_release; - deflockinfo.thread_set_flag = def_thread_set_flag; - deflockinfo.thread_clr_flag = def_thread_clr_flag; - deflockinfo.at_fork = NULL; - - for (i = 0; i < RTLD_LOCK_CNT; i++) { - rtld_locks[i].mask = (1 << i); - rtld_locks[i].handle = NULL; - } + int i; + + deflockinfo.rtli_version = RTLI_VERSION; + deflockinfo.lock_create = def_lock_create; + deflockinfo.lock_destroy = def_lock_destroy; + deflockinfo.rlock_acquire = def_rlock_acquire; + deflockinfo.wlock_acquire = def_wlock_acquire; + deflockinfo.lock_release = def_lock_release; + deflockinfo.thread_set_flag = def_thread_set_flag; + deflockinfo.thread_clr_flag = def_thread_clr_flag; + deflockinfo.at_fork = NULL; - memcpy(&lockinfo, &deflockinfo, sizeof(lockinfo)); - _rtld_thread_init(NULL); - /* - * Construct a mask to block all signals except traps which might - * conceivably be generated within the dynamic linker itself. - */ - sigfillset(&fullsigmask); - sigdelset(&fullsigmask, SIGILL); - sigdelset(&fullsigmask, SIGTRAP); - sigdelset(&fullsigmask, SIGABRT); - sigdelset(&fullsigmask, SIGEMT); - sigdelset(&fullsigmask, SIGFPE); - sigdelset(&fullsigmask, SIGBUS); - sigdelset(&fullsigmask, SIGSEGV); - sigdelset(&fullsigmask, SIGSYS); + for (i = 0; i < RTLD_LOCK_CNT; i++) { + rtld_locks[i].mask = (1 << i); + rtld_locks[i].handle = NULL; + } + + memcpy(&lockinfo, &deflockinfo, sizeof(lockinfo)); + _rtld_thread_init(NULL); + if (ld_fast_sigblock) { + __sys_fast_sigblock(FAST_SIGBLOCK_SETPTR, &fsigblock); + } else { + /* + * Construct a mask to block all signals. Note that + * blocked traps mean that the process is terminated + * if trap occurs while we are in locked section, with + * the default settings for kern.forcesigexit. + */ + sigfillset(&fullsigmask); + } } /* @@ -331,7 +352,10 @@ if (pli == NULL) pli = &deflockinfo; - + else if (ld_fast_sigblock) { + fsigblock = 0; + __sys_fast_sigblock(FAST_SIGBLOCK_UNSETPTR, NULL); + } for (i = 0; i < RTLD_LOCK_CNT; i++) if ((locks[i] = pli->lock_create()) == NULL) Index: sys/compat/freebsd32/syscalls.master =================================================================== --- sys/compat/freebsd32/syscalls.master +++ sys/compat/freebsd32/syscalls.master @@ -1159,5 +1159,6 @@ int shmflags, const char *name); } 572 AUE_SHMRENAME NOPROTO { int shm_rename(const char *path_from, \ const char *path_to, int flags); } +573 AUE_NULL NOPROTO { int fast_sigblock(int cmd, uint32_t *ptr); } ; vim: syntax=off Index: sys/kern/capabilities.conf =================================================================== --- sys/kern/capabilities.conf +++ sys/kern/capabilities.conf @@ -664,7 +664,9 @@ ## ## Allow signal control on current process. +## fast_sigblock is same as sigprocmask. ## +fast_sigblock sigaction sigaltstack sigblock Index: sys/kern/imgact_elf.c =================================================================== --- sys/kern/imgact_elf.c +++ sys/kern/imgact_elf.c @@ -183,6 +183,11 @@ __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) ": maximum percentage of main stack to waste on a random gap"); +static int __elfN(fast_sigblock) = 1; +SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, fast_sigblock, + CTLFLAG_RWTUN, &__elfN(fast_sigblock), 0, + "report fast sigblock support"); + static Elf_Brandinfo *elf_brand_list[MAX_BRANDS]; #define aligned(a, t) (rounddown2((u_long)(a), sizeof(t)) == (u_long)(a)) @@ -1367,6 +1372,8 @@ AUXARGS_ENTRY(pos, AT_HWCAP, *imgp->sysent->sv_hwcap); if (imgp->sysent->sv_hwcap2 != NULL) AUXARGS_ENTRY(pos, AT_HWCAP2, *imgp->sysent->sv_hwcap2); + AUXARGS_ENTRY(pos, AT_BSDFLAGS, __elfN(fast_sigblock) ? + ELF_BSDF_FASTSIGBLK : 0); AUXARGS_ENTRY(pos, AT_NULL, 0); free(imgp->auxargs, M_TEMP); Index: sys/kern/kern_exec.c =================================================================== --- sys/kern/kern_exec.c +++ sys/kern/kern_exec.c @@ -1025,6 +1025,7 @@ int error; struct proc *p = imgp->proc; struct vmspace *vmspace = p->p_vmspace; + struct thread *td = curthread; vm_object_t obj; struct rlimit rlim_stack; vm_offset_t sv_minuser, stack_addr; @@ -1034,6 +1035,10 @@ imgp->vmspace_destroyed = 1; imgp->sysent = sv; + td->td_pflags &= ~TDP_FAST_SIGBLOCK; + td->td_sigblock_ptr = NULL; + td->td_sigblock_val = 0; + /* May be called with Giant held */ EVENTHANDLER_DIRECT_INVOKE(process_exec, p, imgp); Index: sys/kern/kern_fork.c =================================================================== --- sys/kern/kern_fork.c +++ sys/kern/kern_fork.c @@ -563,7 +563,8 @@ * been preserved. */ p2->p_flag |= p1->p_flag & P_SUGID; - td2->td_pflags |= (td->td_pflags & TDP_ALTSTACK) | TDP_FORKING; + td2->td_pflags |= (td->td_pflags & (TDP_ALTSTACK | + TDP_FAST_SIGBLOCK)) | TDP_FORKING; SESS_LOCK(p1->p_session); if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT) p2->p_flag |= P_CONTROLT; Index: sys/kern/kern_proc.c =================================================================== --- sys/kern/kern_proc.c +++ sys/kern/kern_proc.c @@ -2967,6 +2967,77 @@ return (error); } +static int +sysctl_kern_proc_fastsigblk(SYSCTL_HANDLER_ARGS) +{ + int *name = (int *)arg1; + u_int namelen = arg2; + pid_t pid; + struct proc *p; + struct thread *td1; + uintptr_t addr; +#ifdef COMPAT_FREEBSD32 + uint32_t addr32; +#endif + int error; + + if (namelen != 1 || req->newptr != NULL) + return (EINVAL); + + pid = (pid_t)name[0]; + error = pget(pid, PGET_HOLD | PGET_NOTWEXIT | PGET_CANDEBUG, &p); + if (error != 0) + return (error); + + PROC_LOCK(p); +#ifdef COMPAT_FREEBSD32 + if (SV_CURPROC_FLAG(SV_ILP32)) { + if (!SV_PROC_FLAG(p, SV_ILP32)) { + error = EINVAL; + goto errlocked; + } + } +#endif + if (pid <= PID_MAX) { + td1 = FIRST_THREAD_IN_PROC(p); + } else { + FOREACH_THREAD_IN_PROC(p, td1) { + if (td1->td_tid == pid) + break; + } + } + if (td1 == NULL) { + error = ESRCH; + goto errlocked; + } + /* + * The access to the private thread flags. It is fine as far + * as no out-of-thin-air values are read from td_pflags, and + * usermode read of the td_sigblock_ptr is racy inherently, + * since target process might have already changed it + * meantime. + */ + if ((td1->td_pflags & TDP_FAST_SIGBLOCK) != 0) + addr = (uintptr_t)td1->td_sigblock_ptr; + else + error = ENOTTY; + +errlocked: + _PRELE(p); + PROC_UNLOCK(p); + if (error != 0) + return (error); + +#ifdef COMPAT_FREEBSD32 + if (SV_CURPROC_FLAG(SV_ILP32)) { + addr32 = addr; + error = SYSCTL_OUT(req, &addr32, sizeof(addr32)); + } else +#endif + error = SYSCTL_OUT(req, &addr, sizeof(addr)); + return (error); +} + SYSCTL_NODE(_kern, KERN_PROC, proc, CTLFLAG_RD, 0, "Process table"); SYSCTL_PROC(_kern_proc, KERN_PROC_ALL, all, CTLFLAG_RD|CTLTYPE_STRUCT| @@ -3080,6 +3151,10 @@ CTLFLAG_MPSAFE, sysctl_kern_proc_sigtramp, "Process signal trampoline location"); +static SYSCTL_NODE(_kern_proc, KERN_PROC_FASTSIGBLK, fastsigblk, CTLFLAG_RD | + CTLFLAG_ANYBODY | CTLFLAG_MPSAFE, sysctl_kern_proc_fastsigblk, + "Thread fast_sigblock address"); + int allproc_gen; /* Index: sys/kern/kern_sig.c =================================================================== --- sys/kern/kern_sig.c +++ sys/kern/kern_sig.c @@ -239,6 +239,7 @@ }; static void reschedule_signals(struct proc *p, sigset_t block, int flags); +static sigset_t fastblock_mask; static void sigqueue_start(void) @@ -249,6 +250,8 @@ p31b_setcfg(CTL_P1003_1B_REALTIME_SIGNALS, _POSIX_REALTIME_SIGNALS); p31b_setcfg(CTL_P1003_1B_RTSIG_MAX, SIGRTMAX - SIGRTMIN + 1); p31b_setcfg(CTL_P1003_1B_SIGQUEUE_MAX, max_pending_per_proc); + SIGFILLSET(fastblock_mask); + SIG_CANTMASK(fastblock_mask); } ksiginfo_t * @@ -1998,8 +2001,8 @@ { struct sigacts *ps; struct proc *p; - int sig; - int code; + sigset_t sigmask; + int code, sig; p = td->td_proc; sig = ksi->ksi_signo; @@ -2009,8 +2012,11 @@ PROC_LOCK(p); ps = p->p_sigacts; mtx_lock(&ps->ps_mtx); + sigmask = td->td_sigmask; + if (td->td_sigblock_val != 0) + SIGSETOR(sigmask, fastblock_mask); if ((p->p_flag & P_TRACED) == 0 && SIGISMEMBER(ps->ps_sigcatch, sig) && - !SIGISMEMBER(td->td_sigmask, sig)) { + !SIGISMEMBER(sigmask, sig)) { #ifdef KTRACE if (KTRPOINT(curthread, KTR_PSIG)) ktrpsig(sig, ps->ps_sigact[_SIG_IDX(sig)], @@ -2026,13 +2032,13 @@ * masking the signal or process is ignoring the * signal. */ - if (kern_forcesigexit && - (SIGISMEMBER(td->td_sigmask, sig) || - ps->ps_sigact[_SIG_IDX(sig)] == SIG_IGN)) { + if (kern_forcesigexit && (SIGISMEMBER(sigmask, sig) || + ps->ps_sigact[_SIG_IDX(sig)] == SIG_IGN)) { SIGDELSET(td->td_sigmask, sig); SIGDELSET(ps->ps_sigcatch, sig); SIGDELSET(ps->ps_sigignore, sig); ps->ps_sigact[_SIG_IDX(sig)] = SIG_DFL; + td->td_sigblock_val = 0; } mtx_unlock(&ps->ps_mtx); p->p_sig = sig; /* XXX to verify code */ @@ -2847,6 +2853,24 @@ SIG_STOPSIGMASK(sigpending); if (SIGISEMPTY(sigpending)) /* no signal to send */ return (0); + + /* + * Do fast sigblock if requested by usermode. Since + * we do know that there was a signal pending at this + * point, set the FAST_SIGBLOCK_PEND as indicator for + * usermode to perform a dummy call to + * FAST_SIGBLOCK_UNBLOCK, which causes immediate + * delivery of postponed pending signal. + */ + if ((td->td_pflags & TDP_FAST_SIGBLOCK) != 0) { + if (td->td_sigblock_val != 0) + SIGSETNAND(sigpending, fastblock_mask); + if (SIGISEMPTY(sigpending)) { + td->td_pflags |= TDP_FAST_SIGPENDING; + return (0); + } + } + if ((p->p_flag & (P_TRACED | P_PPTRACE)) == P_TRACED && (p->p_flag2 & P2_PTRACE_FSTP) != 0 && SIGISMEMBER(sigpending, SIGSTOP)) { @@ -3915,3 +3939,118 @@ sigqueue_delete_proc(p, sig); } } + +int +sys_fast_sigblock(struct thread *td, struct fast_sigblock_args *uap) +{ + struct proc *p; + int error, res; + uint32_t oldval; + + error = 0; + switch (uap->cmd) { + case FAST_SIGBLOCK_SETPTR: + if ((td->td_pflags & TDP_FAST_SIGBLOCK) != 0) { + error = EBUSY; + break; + } + if (((uintptr_t)(uap->ptr) & (sizeof(uint32_t) - 1)) != 0) { + error = EINVAL; + break; + } + td->td_pflags |= TDP_FAST_SIGBLOCK; + td->td_sigblock_ptr = uap->ptr; + break; + + case FAST_SIGBLOCK_UNBLOCK: + if ((td->td_pflags & TDP_FAST_SIGBLOCK) != 0) { + error = EINVAL; + break; + } +again: + res = casueword32(td->td_sigblock_ptr, FAST_SIGBLOCK_PEND, + &oldval, 0); + if (res == -1) { + error = EFAULT; + break; + } + if (res == 1) { + if (oldval != FAST_SIGBLOCK_PEND) { + error = EBUSY; + break; + } + error = thread_check_susp(td, false); + if (error != 0) + break; + goto again; + } + td->td_sigblock_val = 0; + + /* + * Rely on normal ast mechanism to deliver pending + * signals to current thread. But notify others about + * fake unblock. + */ + p = td->td_proc; + if (error == 0 && p->p_numthreads != 1) { + PROC_LOCK(p); + reschedule_signals(p, td->td_sigmask, 0); + PROC_UNLOCK(p); + } + break; + + case FAST_SIGBLOCK_UNSETPTR: + if ((td->td_pflags & TDP_FAST_SIGBLOCK) == 0) { + error = EINVAL; + break; + } + res = fueword32(td->td_sigblock_ptr, &oldval); + if (res == -1) { + error = EFAULT; + break; + } + if (oldval != 0 && oldval != FAST_SIGBLOCK_PEND) { + error = EBUSY; + break; + } + td->td_pflags &= ~TDP_FAST_SIGBLOCK; + td->td_sigblock_val = 0; + break; + + default: + error = EINVAL; + break; + } + return (error); +} + +void +fetch_fast_sigblock(struct thread *td) +{ + + if ((td->td_pflags & TDP_FAST_SIGBLOCK) == 0) + return; + if (fueword32(td->td_sigblock_ptr, &td->td_sigblock_val) == -1) { + fetch_fast_sigblock_failed(td, false); + return; + } + td->td_sigblock_val &= ~FAST_SIGBLOCK_FLAGS; +} + +void +fetch_fast_sigblock_failed(struct thread *td, bool write) +{ + ksiginfo_t ksi; + + /* + * Prevent further fetches and SIGSEGVs, allowing thread to + * issue syscalls despite corruption. + */ + td->td_pflags &= ~TDP_FAST_SIGBLOCK; + + ksiginfo_init_trap(&ksi); + ksi.ksi_signo = SIGSEGV; + ksi.ksi_code = write ? SEGV_ACCERR : SEGV_MAPERR; + ksi.ksi_addr = td->td_sigblock_ptr; + trapsignal(td, &ksi); +} Index: sys/kern/kern_thread.c =================================================================== --- sys/kern/kern_thread.c +++ sys/kern/kern_thread.c @@ -82,9 +82,9 @@ "struct thread KBI td_flags"); _Static_assert(offsetof(struct thread, td_pflags) == 0x104, "struct thread KBI td_pflags"); -_Static_assert(offsetof(struct thread, td_frame) == 0x480, +_Static_assert(offsetof(struct thread, td_frame) == 0x490, "struct thread KBI td_frame"); -_Static_assert(offsetof(struct thread, td_emuldata) == 0x690, +_Static_assert(offsetof(struct thread, td_emuldata) == 0x6a0, "struct thread KBI td_emuldata"); _Static_assert(offsetof(struct proc, p_flag) == 0xb0, "struct proc KBI p_flag"); @@ -102,9 +102,9 @@ "struct thread KBI td_flags"); _Static_assert(offsetof(struct thread, td_pflags) == 0xa0, "struct thread KBI td_pflags"); -_Static_assert(offsetof(struct thread, td_frame) == 0x2f0, +_Static_assert(offsetof(struct thread, td_frame) == 0x2f8, "struct thread KBI td_frame"); -_Static_assert(offsetof(struct thread, td_emuldata) == 0x338, +_Static_assert(offsetof(struct thread, td_emuldata) == 0x340, "struct thread KBI td_emuldata"); _Static_assert(offsetof(struct proc, p_flag) == 0x68, "struct proc KBI p_flag"); Index: sys/kern/subr_syscall.c =================================================================== --- sys/kern/subr_syscall.c +++ sys/kern/subr_syscall.c @@ -140,6 +140,13 @@ /* Let system calls set td_errno directly. */ td->td_pflags &= ~TDP_NERRNO; + /* + * Fetch fast sigblock value at the time of syscall + * entry because sleepqueue primitives might call + * cursig(). + */ + fetch_fast_sigblock(td); + AUDIT_SYSCALL_ENTER(sa->code, td); error = (sa->callp->sy_call)(td, sa->args); AUDIT_SYSCALL_EXIT(error, td); Index: sys/kern/subr_trap.c =================================================================== --- sys/kern/subr_trap.c +++ sys/kern/subr_trap.c @@ -218,8 +218,8 @@ { struct thread *td; struct proc *p; - int flags; - int sig; + uint32_t oldval; + int flags, sig, res; td = curthread; p = td->td_proc; @@ -317,6 +317,7 @@ */ if (flags & TDF_NEEDSIGCHK || p->p_pendingcnt > 0 || !SIGISEMPTY(p->p_siglist)) { + fetch_fast_sigblock(td); PROC_LOCK(p); mtx_lock(&p->p_sigacts->ps_mtx); while ((sig = cursig(td)) != 0) { @@ -326,6 +327,38 @@ mtx_unlock(&p->p_sigacts->ps_mtx); PROC_UNLOCK(p); } + + /* + * Handle deferred update of the fast sigblock value, after + * the postsig() loop was performed. + */ + if (td->td_pflags & TDP_FAST_SIGPENDING) { + td->td_pflags &= ~TDP_FAST_SIGPENDING; + res = fueword32(td->td_sigblock_ptr, &oldval); + if (res == -1) { + fetch_fast_sigblock_failed(td, false); + } else { + for (;;) { + oldval |= FAST_SIGBLOCK_PEND; + res = casueword32(td->td_sigblock_ptr, oldval, + &oldval, oldval | FAST_SIGBLOCK_PEND); + if (res == -1) { + fetch_fast_sigblock_failed(td, true); + break; + } + if (res == 0) { + td->td_sigblock_val = oldval & + ~FAST_SIGBLOCK_FLAGS; + break; + } + MPASS(res == 1); + res = thread_check_susp(td, false); + if (res != 0) + break; + } + } + } + /* * We need to check to see if we have to exit or wait due to a * single threading requirement or some other STOP condition. Index: sys/kern/syscalls.master =================================================================== --- sys/kern/syscalls.master +++ sys/kern/syscalls.master @@ -3212,6 +3212,12 @@ int flags ); } +573 AUE_NULL STD { + int fast_sigblock( + int cmd, + _Inout_opt_ uint32_t *ptr + ); + } ; Please copy any additions and changes to the following compatability tables: ; sys/compat/freebsd32/syscalls.master Index: sys/sys/elf_common.h =================================================================== --- sys/sys/elf_common.h +++ sys/sys/elf_common.h @@ -950,8 +950,9 @@ #define AT_EHDRFLAGS 24 /* e_flags field from elf hdr */ #define AT_HWCAP 25 /* CPU feature flags. */ #define AT_HWCAP2 26 /* CPU feature flags 2. */ +#define AT_BSDFLAGS 27 /* ELF BSD Flags. */ -#define AT_COUNT 27 /* Count of defined aux entry types. */ +#define AT_COUNT 28 /* Count of defined aux entry types. */ /* * Relocation types. @@ -1452,5 +1453,6 @@ #define R_X86_64_TLSDESC 36 #define R_X86_64_IRELATIVE 37 +#define ELF_BSDF_FASTSIGBLK 0x0001 /* Kernel supports fast sigblock */ #endif /* !_SYS_ELF_COMMON_H_ */ Index: sys/sys/proc.h =================================================================== --- sys/sys/proc.h +++ sys/sys/proc.h @@ -322,6 +322,9 @@ uintptr_t td_rb_inact; /* (k) Current in-action mutex loc. */ struct syscall_args td_sa; /* (kx) Syscall parameters. Copied on fork for child tracing. */ + void *td_sigblock_ptr; /* (k) uptr for fast sigblock. */ + uint32_t td_sigblock_val; /* (k) fast sigblock value read at + td_sigblock_ptr on kern entry */ #define td_endcopy td_pcb /* @@ -486,7 +489,7 @@ #define TDP_ALTSTACK 0x00000020 /* Have alternate signal stack. */ #define TDP_DEADLKTREAT 0x00000040 /* Lock acquisition - deadlock treatment. */ #define TDP_NOFAULTING 0x00000080 /* Do not handle page faults. */ -#define TDP_UNUSED9 0x00000100 /* --available-- */ +#define TDP_FAST_SIGBLOCK 0x00000100 /* Fast sigblock active */ #define TDP_OWEUPC 0x00000200 /* Call addupc() at next AST. */ #define TDP_ITHREAD 0x00000400 /* Thread is an interrupt thread. */ #define TDP_SYNCIO 0x00000800 /* Local override, disable async i/o. */ @@ -509,6 +512,7 @@ #define TDP_UIOHELD 0x10000000 /* Current uio has pages held in td_ma */ #define TDP_FORKING 0x20000000 /* Thread is being created through fork() */ #define TDP_EXECVMSPC 0x40000000 /* Execve destroyed old vmspace */ +#define TDP_FAST_SIGPENDING 0x80000000 /* * Reasons that the current thread can not be run yet. Index: sys/sys/signalvar.h =================================================================== --- sys/sys/signalvar.h +++ sys/sys/signalvar.h @@ -256,6 +256,21 @@ /* Flags for ksi_flags */ #define SQ_INIT 0x01 +/* + * Fast_sigblock + */ +#define FAST_SIGBLOCK_SETPTR 1 +#define FAST_SIGBLOCK_UNBLOCK 2 +#define FAST_SIGBLOCK_UNSETPTR 3 + +#define FAST_SIGBLOCK_PEND 0x1 +#define FAST_SIGBLOCK_FLAGS 0xf +#define FAST_SIGBLOCK_INC 0x10 + +#ifndef _KERNEL +int __sys_fast_sigblock(int cmd, void *ptr); +#endif + #ifdef _KERNEL /* Return nonzero if process p has an unmasked pending signal. */ @@ -365,6 +380,8 @@ int cursig(struct thread *td); void execsigs(struct proc *p); +void fetch_fast_sigblock(struct thread *td); +void fetch_fast_sigblock_failed(struct thread *td, bool write); void gsignal(int pgid, int sig, ksiginfo_t *ksi); void killproc(struct proc *p, char *why); ksiginfo_t * ksiginfo_alloc(int wait); Index: sys/sys/sysctl.h =================================================================== --- sys/sys/sysctl.h +++ sys/sys/sysctl.h @@ -988,6 +988,7 @@ #define KERN_PROC_SIGTRAMP 41 /* signal trampoline location */ #define KERN_PROC_CWD 42 /* process current working directory */ #define KERN_PROC_NFDS 43 /* number of open file descriptors */ +#define KERN_PROC_FASTSIGBLK 44 /* address of fastsigblk magic word */ /* * KERN_IPC identifiers Index: sys/sys/systm.h =================================================================== --- sys/sys/systm.h +++ sys/sys/systm.h @@ -399,6 +399,7 @@ uint32_t newval); int casueword(volatile u_long *p, u_long oldval, u_long *oldvalp, u_long newval); +int casueword_check_susp(struct thread *td, bool sleep); void realitexpire(void *); Index: usr.bin/procstat/procstat.h =================================================================== --- usr.bin/procstat/procstat.h +++ usr.bin/procstat/procstat.h @@ -63,6 +63,7 @@ void procstat_cs(struct procstat *prstat, struct kinfo_proc *kipp); void procstat_env(struct procstat *prstat, struct kinfo_proc *kipp); void procstat_files(struct procstat *prstat, struct kinfo_proc *kipp); +void procstat_fsigblock(struct procstat *procstat, struct kinfo_proc *kipp); void procstat_kstack(struct procstat *prstat, struct kinfo_proc *kipp); void procstat_ptlwpinfo(struct procstat *prstat, struct kinfo_proc *kipp); void procstat_rlimit(struct procstat *prstat, struct kinfo_proc *kipp); Index: usr.bin/procstat/procstat.c =================================================================== --- usr.bin/procstat/procstat.c +++ usr.bin/procstat/procstat.c @@ -82,6 +82,8 @@ PS_CMP_PLURAL | PS_CMP_SUBSTR }, { "environment", "environment", NULL, &procstat_env, &cmdopt_none, PS_CMP_SUBSTR }, + { "fsigblock", "fsigblock", NULL, &procstat_fsigblock, &cmdopt_none, + PS_CMP_NORMAL }, { "fd", "files", "[-C]", &procstat_files, &cmdopt_files, PS_CMP_PLURAL }, { "file", "files", "[-C]", &procstat_files, &cmdopt_files, Index: usr.bin/procstat/procstat_auxv.c =================================================================== --- usr.bin/procstat/procstat_auxv.c +++ usr.bin/procstat/procstat_auxv.c @@ -196,6 +196,12 @@ xo_emit("{dw:/%s}{Lw:/%-16s/%s}{:AT_HWCAP2/%#lx}\n", prefix, "AT_HWCAP2", (u_long)auxv[i].a_un.a_val); break; +#endif +#ifdef AT_BSDFLAGS + case AT_BSDFLAGS: + xo_emit("{dw:/%s}{Lw:/%-16s/%s}{:AT_BSDFLAGS/%#lx}\n", + prefix, "AT_BSDFLAGS", (u_long)auxv[i].a_un.a_val); + break; #endif default: xo_emit("{dw:/%s}{Lw:/%16ld/%ld}{:UNKNOWN/%#lx}\n", Index: usr.bin/procstat/procstat_sigs.c =================================================================== --- usr.bin/procstat/procstat_sigs.c +++ usr.bin/procstat/procstat_sigs.c @@ -37,6 +37,8 @@ #include #include #include +#include +#include #include #include #include @@ -162,6 +164,7 @@ xo_open_container(threadid); xo_emit("{e:thread_id/%6d/%d}", kipp->ki_tid); xo_open_container("signals"); + for (j = 1; j <= _SIG_MAXSIG; j++) { xo_emit("{dk:process_id/%5d/%d} ", kipp->ki_pid); xo_emit("{d:thread_id/%6d/%d} ", kipp->ki_tid); @@ -180,3 +183,63 @@ xo_close_container("threads"); procstat_freeprocs(procstat, kip); } + +void +procstat_fsigblock(struct procstat *procstat, struct kinfo_proc *kipp) +{ + struct kinfo_proc *kip; + char *threadid; + uintptr_t fastsigblk_addr; + int error, name[4]; + unsigned int count, i; + size_t len; + bool has_fastsigblk_addr; + + if ((procstat_opts & PS_OPT_NOHEADER) == 0) + xo_emit("{T:/%5s %6s %-16s %-16s}\n", "PID", "TID", + "COMM", "FSIGBLK"); + + kip = procstat_getprocs(procstat, KERN_PROC_PID | KERN_PROC_INC_THREAD, + kipp->ki_pid, &count); + if (kip == NULL) + return; + xo_emit("{ek:process_id/%5d/%d}", kipp->ki_pid); + xo_emit("{e:command/%-16s/%s}", kipp->ki_comm); + xo_open_container("threads"); + kinfo_proc_sort(kip, count); + for (i = 0; i < count; i++) { + kipp = &kip[i]; + len = sizeof(fastsigblk_addr); + name[0] = CTL_KERN; + name[1] = KERN_PROC; + name[2] = KERN_PROC_FASTSIGBLK; + name[3] = kipp->ki_tid; + error = sysctl(name, 4, &fastsigblk_addr, &len, NULL, 0); + if (error < 0) { + if (errno != ESRCH && errno != ENOTTY) { + warn("sysctl: kern.proc.fastsigblk: %d", + kipp->ki_tid); + } + has_fastsigblk_addr = false; + } else + has_fastsigblk_addr = true; + + asprintf(&threadid, "%d", kipp->ki_tid); + if (threadid == NULL) + xo_errc(1, ENOMEM, "Failed to allocate memory in " + "procstat_threads_sigs()"); + xo_open_container(threadid); + xo_emit("{dk:process_id/%5d/%d} ", kipp->ki_pid); + xo_emit("{d:thread_id/%6d/%d} ", kipp->ki_tid); + xo_emit("{d:command/%-16s/%s} ", kipp->ki_comm); + xo_emit("{e:fsigblock/%#-16jx/%#jx}", has_fastsigblk_addr ? + (uintmax_t)fastsigblk_addr : (uintmax_t)-1); + xo_emit("{d:fsigblock/%#-16jx/%#jx}", has_fastsigblk_addr ? + (uintmax_t)fastsigblk_addr : (uintmax_t)-1); + xo_emit("\n"); + xo_close_container(threadid); + free(threadid); + } + xo_close_container("threads"); + procstat_freeprocs(procstat, kip); +}