Index: lib/libc/gen/auxv.c
===================================================================
--- lib/libc/gen/auxv.c
+++ lib/libc/gen/auxv.c
@@ -67,7 +67,7 @@
 }
 
 static pthread_once_t aux_once = PTHREAD_ONCE_INIT;
-static int pagesize, osreldate, canary_len, ncpus, pagesizes_len;
+static int pagesize, osreldate, canary_len, ncpus, pagesizes_len, bsdflags;
 static int hwcap_present, hwcap2_present;
 static char *canary, *pagesizes, *execpath;
 static void *timekeep;
@@ -125,6 +125,10 @@
 		case AT_TIMEKEEP:
 			timekeep = aux->a_un.a_ptr;
 			break;
+
+		case AT_BSDFLAGS:
+			bsdflags = aux->a_un.a_val;
+			break;
 		}
 	}
 }
@@ -224,6 +228,12 @@
 		} else
 			res = EINVAL;
 		break;
+	case AT_BSDFLAGS:
+		if (buflen == sizeof(int))
+			*(int *)buf = bsdflags;
+		else
+			res = EINVAL;
+		break;
 	default:
 		res = ENOENT;
 		break;
Index: lib/libc/sys/Makefile.inc
===================================================================
--- lib/libc/sys/Makefile.inc
+++ lib/libc/sys/Makefile.inc
@@ -186,6 +186,7 @@
 	execve.2 \
 	_exit.2 \
 	extattr_get_file.2 \
+	fast_sigblock.2 \
 	fcntl.2 \
 	ffclock.2 \
 	fhlink.2 \
Index: lib/libc/sys/Symbol.map
===================================================================
--- lib/libc/sys/Symbol.map
+++ lib/libc/sys/Symbol.map
@@ -567,6 +567,7 @@
 	__sys_extattr_set_link;
 	_extattrctl;
 	__sys_extattrctl;
+	__sys_fast_sigblock;
 	_fchdir;
 	__sys_fchdir;
 	_fchflags;
Index: lib/libc/sys/fast_sigblock.2
===================================================================
--- /dev/null
+++ lib/libc/sys/fast_sigblock.2
@@ -0,0 +1,167 @@
+.\" Copyright (c) 2016 The FreeBSD Foundation, Inc.
+.\" All rights reserved.
+.\"
+.\" This documentation was written by
+.\" Konstantin Belousov <kib@FreeBSD.org> under sponsorship
+.\" from the FreeBSD Foundation.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd December 13, 2019
+.Dt FAST_SIGBLOCK 2
+.Os
+.Sh NAME
+.Nm fast_sigblock
+.Nd controls signals blocking with a simple memory write
+.Sh LIBRARY
+.Lb libc
+.Sh SYNOPSIS
+.In sys/signalvar.h
+.Ft int
+.Fn fast_sigblock "int cmd" "void *ptr"
+.Sh DESCRIPTION
+.Bf -symbolic
+This function is not intended for a direct usage by applications.
+The functionality is provided for implementing some optimizations in
+.Xr ld-elf.so.1 8
+and
+.Lb libthr .
+.Ef
+.Pp
+The function configures the kernel facility that allows a thread to
+block asynchronous signals delivery with a single write to userspace
+memory, avoiding overhead of system calls like
+.Xr sigprocmask 2
+for establishing critical sections.
+The C runtime uses it to optimize implementation of async-signal-safe
+functionality.
+.Pp
+A thread might register a
+.Dv sigblock
+variable of type
+.Vt int
+as a location which is consulted by kernel when calculating the
+blocked signal mask for delivery of asynchronous signals.
+If the variable contains non-zero count of blocks (see below),
+then kernel effectively operates as if the mask contained all
+blockable signals was supplied to
+.Xr sigprocmask 2 .
+.Pp
+The variable is supposed to be modified only from the owning thread,
+there is no way to guarantee visibility of update from other thread
+to kernel when signals are delivered.
+.Pp
+Lower bits of the sigblock variable are reserved as flags,
+which might be set or cleared by kernel at arbitrary moments.
+Userspace code should use
+.Xr atomic 9
+operations of incrementing and decrementing by
+.Dv FAST_SIGBLOCK_INC
+quantity to recursively block or unblock signals delivery.
+.Pp
+If a signal would be delivered when unmasked, kernel might set the
+.Dv FAST_SIGBLOCK_PEND
+.Dq pending signal
+flag in the sigblock variable.
+Userspace should perform
+.Dv FAST_SIGBLOCK_UNBLOCK
+operation when clearing the variable if it notes the pending signal
+bit is set, which would deliver the pending signals immediately.
+Otherwise, signals delivery might be postponed.
+.Pp
+The
+.Fa cmd
+argument specifies one of the following operations:
+.Bl -tag -width FAST_SIGBLOCK_UNSETPTR
+.It Dv FAST_SIGBLOCK_SETPTR
+Register the variable of type
+.Vt int
+at location pointed to by the
+.Fa ptr
+argument as sigblock variable for the calling thread.
+.It Dv FAST_SIGBLOCK_UNSETPTR
+Unregister the currently registered sigblock location.
+Kernel stops inferring the blocked mask from non-zero value of its
+blocked count.
+New location can be registered after previous one is deregistered.
+.It Dv FAST_SIGBLOCK_UNBLOCK
+If there are pending signals which should be delivered to the calling
+thread, they are delivered before returning from the call.
+The sigblock variable should have zero blocking count, and indicate
+that the pending signal exists.
+Effectively this means that the variable should have the value
+.Dv FAST_SIGBLOCK_PEND .
+.El
+.Sh RETURN VALUES
+.Rv -std
+.Sh ERRORS
+The operation may fail with the following errors:
+.Bl -tag -width Er
+.It Bq Er EBUSY
+The
+.Dv FAST_SIGBLOCK_SETPTR
+attempted while the sigblock address was already registered.
+The
+.Dv FAST_SIGBLOCK_UNBLOCK
+was called while sigblock variable value is not equal to
+.Dv FAST_SIGBLOCK_PEND .
+.It Bq Er EINVAL
+The variable address passed to
+.Dv FAST_SIGBLOCK_SETPTR
+is not aligned naturally.
+The
+.Dv FAST_SIGBLOCK_UNSETPTR
+operation was attempted without prior successfull call to
+.Dv FAST_SIGBLOCK_SETPTR .
+.It Bq Er EFAULT
+Attempt to read or write to the sigblock variable failed.
+Note that kernel generates the
+.Dv SIGSEGV
+signal if an attempt to read from the sigblock variable faulted
+during implicit accesses from syscall entry.
+.El
+.Sh SEE ALSO
+.Xr kill 2 ,
+.Xr signal 2 ,
+.Xr sigprocmask 2 ,
+.Xr libthr 3 ,
+.Xr ld-elf.so.1 8
+.Sh STANDARDS
+The
+.Nm
+function is non-standard, although a similar functionality is a common
+optimization provided by several other systems.
+.Sh HISTORY
+The
+.Nm
+function was introduced in
+.Fx 13.0 .
+.Sh BUGS
+The
+.Nm
+symbol is currently not exported by libc, on purpose.
+Consumers should either use the
+.Dv __sys_fast_sigblock
+symbol from the private libc namespace, or utilize
+.Xr syscall 2 .
Index: lib/libthr/thread/thr_create.c
===================================================================
--- lib/libthr/thread/thr_create.c
+++ lib/libthr/thread/thr_create.c
@@ -257,6 +257,7 @@
 
 	if (curthread->attr.suspend == THR_CREATE_SUSPENDED)
 		set = curthread->sigmask;
+	_thr_signal_block_setup(curthread);
 
 	/*
 	 * This is used as a serialization point to allow parent
Index: lib/libthr/thread/thr_private.h
===================================================================
--- lib/libthr/thread/thr_private.h
+++ lib/libthr/thread/thr_private.h
@@ -396,6 +396,9 @@
 	/* Signal blocked counter. */
 	int			sigblock;
 
+	/* Fast sigblock var. */
+	uint32_t		fsigblock;
+
 	/* Queue entry for list of all threads. */
 	TAILQ_ENTRY(pthread)	tle;	/* link for all threads in process */
 
@@ -813,6 +816,8 @@
 void	_thr_testcancel(struct pthread *) __hidden;
 void	_thr_signal_block(struct pthread *) __hidden;
 void	_thr_signal_unblock(struct pthread *) __hidden;
+void	_thr_signal_block_check_fast(void) __hidden;
+void	_thr_signal_block_setup(struct pthread *) __hidden;
 void	_thr_signal_init(int) __hidden;
 void	_thr_signal_deinit(void) __hidden;
 int	_thr_send_sig(struct pthread *, int sig) __hidden;
Index: lib/libthr/thread/thr_rtld.c
===================================================================
--- lib/libthr/thread/thr_rtld.c
+++ lib/libthr/thread/thr_rtld.c
@@ -236,6 +236,8 @@
 	_thr_signal_block(curthread);
 	_rtld_thread_init(&li);
 	_thr_signal_unblock(curthread);
+	_thr_signal_block_check_fast();
+	_thr_signal_block_setup(curthread);
 
 	uc_len = __getcontextx_size();
 	uc = alloca(uc_len);
Index: lib/libthr/thread/thr_sig.c
===================================================================
--- lib/libthr/thread/thr_sig.c
+++ lib/libthr/thread/thr_sig.c
@@ -31,7 +31,8 @@
 
 #include "namespace.h"
 #include <sys/param.h>
-#include <sys/types.h>
+#include <sys/auxv.h>
+#include <sys/elf.h>
 #include <sys/signalvar.h>
 #include <sys/syscall.h>
 #include <signal.h>
@@ -92,10 +93,9 @@
 	0xffffffff,
 	0xffffffff}};
 
-void
-_thr_signal_block(struct pthread *curthread)
+static void
+thr_signal_block_slow(struct pthread *curthread)
 {
-	
 	if (curthread->sigblock > 0) {
 		curthread->sigblock++;
 		return;
@@ -104,13 +104,68 @@
 	curthread->sigblock++;
 }
 
-void
-_thr_signal_unblock(struct pthread *curthread)
+static void
+thr_signal_unblock_slow(struct pthread *curthread)
 {
 	if (--curthread->sigblock == 0)
 		__sys_sigprocmask(SIG_SETMASK, &curthread->sigmask, NULL);
 }
 
+static void
+thr_signal_block_fast(struct pthread *curthread)
+{
+	atomic_add_32(&curthread->fsigblock, FAST_SIGBLOCK_INC);
+}
+
+static void
+thr_signal_unblock_fast(struct pthread *curthread)
+{
+	uint32_t oldval;
+
+	oldval = atomic_fetchadd_32(&curthread->fsigblock, -FAST_SIGBLOCK_INC);
+	if (oldval == (FAST_SIGBLOCK_PEND | FAST_SIGBLOCK_INC))
+		__sys_fast_sigblock(FAST_SIGBLOCK_UNBLOCK, NULL);
+}
+
+static bool fast_sigblock;
+
+void
+_thr_signal_block(struct pthread *curthread)
+{
+	if (fast_sigblock)
+		thr_signal_block_fast(curthread);
+	else
+		thr_signal_block_slow(curthread);
+}
+
+void
+_thr_signal_unblock(struct pthread *curthread)
+{
+	if (fast_sigblock)
+		thr_signal_unblock_fast(curthread);
+	else
+		thr_signal_unblock_slow(curthread);
+}
+
+void
+_thr_signal_block_check_fast(void)
+{
+	int bsdflags, error;
+
+	error = elf_aux_info(AT_BSDFLAGS, &bsdflags, sizeof(bsdflags));
+	if (error != 0)
+		return;
+	fast_sigblock = (bsdflags & ELF_BSDF_FASTSIGBLK) != 0;
+}
+
+void
+_thr_signal_block_setup(struct pthread *curthread)
+{
+	if (!fast_sigblock)
+		return;
+	__sys_fast_sigblock(FAST_SIGBLOCK_SETPTR, &curthread->fsigblock);
+}
+
 int
 _thr_send_sig(struct pthread *thread, int sig)
 {
Index: libexec/rtld-elf/rtld-libc/Makefile.inc
===================================================================
--- libexec/rtld-elf/rtld-libc/Makefile.inc
+++ libexec/rtld-elf/rtld-libc/Makefile.inc
@@ -45,8 +45,9 @@
     strncpy strrchr strsep strspn strstr strtok
 # Also use all the syscall .o files from libc_nossp_pic:
 _libc_other_objects= sigsetjmp lstat stat fstat fstatat fstatfs syscall \
-    cerror geteuid getegid munmap mprotect sysarch __sysctl issetugid __getcwd \
-    utrace thr_self thr_kill pread mmap lseek _exit _fstat _fstatat _fstatfs \
+    cerror geteuid getegid fast_sigblock munmap mprotect \
+    sysarch __sysctl issetugid __getcwd utrace \
+    thr_self thr_kill pread mmap lseek _exit _fstat _fstatat _fstatfs \
     getdirentries _getdirentries _close _fcntl _open _openat _read \
     _sigprocmask _write readlink _setjmp setjmp setjmperr
 
Index: libexec/rtld-elf/rtld.h
===================================================================
--- libexec/rtld-elf/rtld.h
+++ libexec/rtld-elf/rtld.h
@@ -369,6 +369,7 @@
 extern Elf_Addr _GLOBAL_OFFSET_TABLE_[];
 extern Elf_Sym sym_zero;	/* For resolving undefined weak refs. */
 extern bool ld_bind_not;
+extern bool ld_fast_sigblock;
 
 void dump_relocations(Obj_Entry *);
 void dump_obj_relocations(Obj_Entry *);
Index: libexec/rtld-elf/rtld.c
===================================================================
--- libexec/rtld-elf/rtld.c
+++ libexec/rtld-elf/rtld.c
@@ -285,6 +285,7 @@
 int tls_max_index = 1;		/* Largest module index allocated */
 
 static bool ld_library_path_rpath = false;
+bool ld_fast_sigblock = false;
 
 /*
  * Globals for path names, and such
@@ -419,6 +420,10 @@
     main_argc = argc;
     main_argv = argv;
 
+    if (aux_info[AT_BSDFLAGS] != NULL &&
+	(aux_info[AT_BSDFLAGS]->a_un.a_val & ELF_BSDF_FASTSIGBLK) != 0)
+	    ld_fast_sigblock = true;
+
     trust = !issetugid();
 
     md_abi_variant_hook(aux_info);
Index: libexec/rtld-elf/rtld_lock.c
===================================================================
--- libexec/rtld-elf/rtld_lock.c
+++ libexec/rtld-elf/rtld_lock.c
@@ -45,6 +45,7 @@
  */
 
 #include <sys/param.h>
+#include <sys/signalvar.h>
 #include <signal.h>
 #include <stdlib.h>
 #include <time.h>
@@ -68,6 +69,7 @@
 
 static sigset_t fullsigmask, oldsigmask;
 static int thread_flag, wnested;
+static uint32_t fsigblock;
 
 static void *
 def_lock_create(void)
@@ -117,6 +119,17 @@
 	    ;	/* Spin */
 }
 
+static void
+sig_fastunblock(void)
+{
+	uint32_t oldval;
+
+	assert((fsigblock & ~FAST_SIGBLOCK_FLAGS) >= FAST_SIGBLOCK_INC);
+	oldval = atomic_fetchadd_32(&fsigblock, -FAST_SIGBLOCK_INC);
+	if (oldval == (FAST_SIGBLOCK_PEND | FAST_SIGBLOCK_INC))
+		__sys_fast_sigblock(FAST_SIGBLOCK_UNBLOCK, NULL);
+}
+
 static void
 def_wlock_acquire(void *lock)
 {
@@ -124,14 +137,23 @@
 	sigset_t tmp_oldsigmask;
 
 	l = (Lock *)lock;
-	for (;;) {
-		sigprocmask(SIG_BLOCK, &fullsigmask, &tmp_oldsigmask);
-		if (atomic_cmpset_acq_int(&l->lock, 0, WAFLAG))
-			break;
-		sigprocmask(SIG_SETMASK, &tmp_oldsigmask, NULL);
+	if (ld_fast_sigblock) {
+		for (;;) {
+			atomic_add_32(&fsigblock, FAST_SIGBLOCK_INC);
+			if (atomic_cmpset_acq_int(&l->lock, 0, WAFLAG))
+				break;
+			sig_fastunblock();
+		}
+	} else {
+		for (;;) {
+			sigprocmask(SIG_BLOCK, &fullsigmask, &tmp_oldsigmask);
+			if (atomic_cmpset_acq_int(&l->lock, 0, WAFLAG))
+				break;
+			sigprocmask(SIG_SETMASK, &tmp_oldsigmask, NULL);
+		}
+		if (atomic_fetchadd_int(&wnested, 1) == 0)
+			oldsigmask = tmp_oldsigmask;
 	}
-	if (atomic_fetchadd_int(&wnested, 1) == 0)
-		oldsigmask = tmp_oldsigmask;
 }
 
 static void
@@ -143,9 +165,10 @@
 	if ((l->lock & WAFLAG) == 0)
 		atomic_add_rel_int(&l->lock, -RC_INCR);
 	else {
-		assert(wnested > 0);
 		atomic_add_rel_int(&l->lock, -WAFLAG);
-		if (atomic_fetchadd_int(&wnested, -1) == 1)
+		if (ld_fast_sigblock)
+			sig_fastunblock();
+		else if (atomic_fetchadd_int(&wnested, -1) == 1)
 			sigprocmask(SIG_SETMASK, &oldsigmask, NULL);
 	}
 }
@@ -279,38 +302,43 @@
 void
 lockdflt_init(void)
 {
-    int i;
-
-    deflockinfo.rtli_version  = RTLI_VERSION;
-    deflockinfo.lock_create   = def_lock_create;
-    deflockinfo.lock_destroy  = def_lock_destroy;
-    deflockinfo.rlock_acquire = def_rlock_acquire;
-    deflockinfo.wlock_acquire = def_wlock_acquire;
-    deflockinfo.lock_release  = def_lock_release;
-    deflockinfo.thread_set_flag = def_thread_set_flag;
-    deflockinfo.thread_clr_flag = def_thread_clr_flag;
-    deflockinfo.at_fork = NULL;
-
-    for (i = 0; i < RTLD_LOCK_CNT; i++) {
-	    rtld_locks[i].mask   = (1 << i);
-	    rtld_locks[i].handle = NULL;
-    }
+	int i;
+
+	deflockinfo.rtli_version = RTLI_VERSION;
+	deflockinfo.lock_create = def_lock_create;
+	deflockinfo.lock_destroy = def_lock_destroy;
+	deflockinfo.rlock_acquire = def_rlock_acquire;
+	deflockinfo.wlock_acquire = def_wlock_acquire;
+	deflockinfo.lock_release = def_lock_release;
+	deflockinfo.thread_set_flag = def_thread_set_flag;
+	deflockinfo.thread_clr_flag = def_thread_clr_flag;
+	deflockinfo.at_fork = NULL;
 
-    memcpy(&lockinfo, &deflockinfo, sizeof(lockinfo));
-    _rtld_thread_init(NULL);
-    /*
-     * Construct a mask to block all signals except traps which might
-     * conceivably be generated within the dynamic linker itself.
-     */
-    sigfillset(&fullsigmask);
-    sigdelset(&fullsigmask, SIGILL);
-    sigdelset(&fullsigmask, SIGTRAP);
-    sigdelset(&fullsigmask, SIGABRT);
-    sigdelset(&fullsigmask, SIGEMT);
-    sigdelset(&fullsigmask, SIGFPE);
-    sigdelset(&fullsigmask, SIGBUS);
-    sigdelset(&fullsigmask, SIGSEGV);
-    sigdelset(&fullsigmask, SIGSYS);
+	for (i = 0; i < RTLD_LOCK_CNT; i++) {
+		rtld_locks[i].mask   = (1 << i);
+		rtld_locks[i].handle = NULL;
+	}
+
+	memcpy(&lockinfo, &deflockinfo, sizeof(lockinfo));
+	_rtld_thread_init(NULL);
+	if (ld_fast_sigblock)
+		__sys_fast_sigblock(FAST_SIGBLOCK_SETPTR, &fsigblock);
+	else {
+		/*
+		 * Construct a mask to block all signals except traps
+		 * which might conceivably be generated within the
+		 * dynamic linker itself.
+		 */
+		sigfillset(&fullsigmask);
+		sigdelset(&fullsigmask, SIGILL);
+		sigdelset(&fullsigmask, SIGTRAP);
+		sigdelset(&fullsigmask, SIGABRT);
+		sigdelset(&fullsigmask, SIGEMT);
+		sigdelset(&fullsigmask, SIGFPE);
+		sigdelset(&fullsigmask, SIGBUS);
+		sigdelset(&fullsigmask, SIGSEGV);
+		sigdelset(&fullsigmask, SIGSYS);
+	}
 }
 
 /*
@@ -331,7 +359,10 @@
 
 	if (pli == NULL)
 		pli = &deflockinfo;
-
+	else if (ld_fast_sigblock) {
+		fsigblock = 0;
+		__sys_fast_sigblock(FAST_SIGBLOCK_UNSETPTR, NULL);
+	}
 
 	for (i = 0; i < RTLD_LOCK_CNT; i++)
 		if ((locks[i] = pli->lock_create()) == NULL)
Index: sys/compat/freebsd32/syscalls.master
===================================================================
--- sys/compat/freebsd32/syscalls.master
+++ sys/compat/freebsd32/syscalls.master
@@ -1159,5 +1159,6 @@
 				    int shmflags, const char *name); }
 572	AUE_SHMRENAME	NOPROTO { int shm_rename(const char *path_from, \
 				    const char *path_to, int flags); }
+573	AUE_NULL	NOPROTO	{ int fast_sigblock(int cmd, uint32_t *ptr); }
 
 ; vim: syntax=off
Index: sys/kern/capabilities.conf
===================================================================
--- sys/kern/capabilities.conf
+++ sys/kern/capabilities.conf
@@ -162,6 +162,11 @@
 extattr_list_fd
 extattr_set_fd
 
+##
+## fast_sigblock is same as sigprocmask
+##
+fast_sigblock
+
 ##
 ## Allow changing file flags, mode, and owner by file descriptor, subject to
 ## capability rights.
Index: sys/kern/imgact_elf.c
===================================================================
--- sys/kern/imgact_elf.c
+++ sys/kern/imgact_elf.c
@@ -1367,6 +1367,7 @@
 		AUXARGS_ENTRY(pos, AT_HWCAP, *imgp->sysent->sv_hwcap);
 	if (imgp->sysent->sv_hwcap2 != NULL)
 		AUXARGS_ENTRY(pos, AT_HWCAP2, *imgp->sysent->sv_hwcap2);
+	AUXARGS_ENTRY(pos, AT_BSDFLAGS, ELF_BSDF_FASTSIGBLK);
 	AUXARGS_ENTRY(pos, AT_NULL, 0);
 
 	free(imgp->auxargs, M_TEMP);
Index: sys/kern/kern_exec.c
===================================================================
--- sys/kern/kern_exec.c
+++ sys/kern/kern_exec.c
@@ -1025,6 +1025,7 @@
 	int error;
 	struct proc *p = imgp->proc;
 	struct vmspace *vmspace = p->p_vmspace;
+	struct thread *td = curthread;
 	vm_object_t obj;
 	struct rlimit rlim_stack;
 	vm_offset_t sv_minuser, stack_addr;
@@ -1034,6 +1035,10 @@
 	imgp->vmspace_destroyed = 1;
 	imgp->sysent = sv;
 
+	td->td_pflags &= ~TDP_FAST_SIGBLOCK;
+	td->td_sigblock_ptr = NULL;
+	td->td_sigblock_val = 0;
+
 	/* May be called with Giant held */
 	EVENTHANDLER_DIRECT_INVOKE(process_exec, p, imgp);
 
Index: sys/kern/kern_fork.c
===================================================================
--- sys/kern/kern_fork.c
+++ sys/kern/kern_fork.c
@@ -563,7 +563,8 @@
 	 * been preserved.
 	 */
 	p2->p_flag |= p1->p_flag & P_SUGID;
-	td2->td_pflags |= (td->td_pflags & TDP_ALTSTACK) | TDP_FORKING;
+	td2->td_pflags |= (td->td_pflags & (TDP_ALTSTACK |
+	    TDP_FAST_SIGBLOCK)) | TDP_FORKING;
 	SESS_LOCK(p1->p_session);
 	if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT)
 		p2->p_flag |= P_CONTROLT;
Index: sys/kern/kern_proc.c
===================================================================
--- sys/kern/kern_proc.c
+++ sys/kern/kern_proc.c
@@ -2967,6 +2967,77 @@
 	return (error);
 }
 
+static int
+sysctl_kern_proc_fastsigblk(SYSCTL_HANDLER_ARGS)
+{
+	int *name = (int *)arg1;
+	u_int namelen = arg2;
+	pid_t pid;
+	struct proc *p;
+	struct thread *td1;
+	uintptr_t addr;
+#ifdef COMPAT_FREEBSD32
+	uint32_t addr32;
+#endif
+	int error;
+
+	if (namelen != 1 || req->newptr != NULL)
+		return (EINVAL);
+
+	pid = (pid_t)name[0];
+	error = pget(pid, PGET_HOLD | PGET_NOTWEXIT | PGET_CANDEBUG, &p);
+	if (error != 0)
+		return (error);
+
+	PROC_LOCK(p);
+#ifdef COMPAT_FREEBSD32
+	if (SV_CURPROC_FLAG(SV_ILP32)) {
+		if (!SV_PROC_FLAG(p, SV_ILP32)) {
+			error = EINVAL;
+			goto errlocked;
+		}
+	}
+#endif
+	if (pid <= PID_MAX) {
+		td1 = FIRST_THREAD_IN_PROC(p);
+	} else {
+		FOREACH_THREAD_IN_PROC(p, td1) {
+			if (td1->td_tid == pid)
+				break;
+		}
+	}
+	if (td1 == NULL) {
+		error = ESRCH;
+		goto errlocked;
+	}
+	/*
+	 * The access to the private thread flags.  It is fine as far
+	 * as no out-of-thin-air values are read from td_pflags, and
+	 * usermode read of the td_sigblock_ptr is racy inherently,
+	 * since target process might have already changed it
+	 * meantime.
+	 */
+	if ((td1->td_pflags & TDP_FAST_SIGBLOCK) != 0)
+		addr = (uintptr_t)td1->td_sigblock_ptr;
+	else
+		error = ENOTTY;
+
+errlocked:
+	_PRELE(p);
+	PROC_UNLOCK(p);
+	if (error != 0)
+		return (error);
+
+#ifdef COMPAT_FREEBSD32
+	if (SV_CURPROC_FLAG(SV_ILP32)) {
+		addr32 = addr;
+		error = SYSCTL_OUT(req, &addr32, sizeof(addr32));
+	} else
+#endif
+		error = SYSCTL_OUT(req, &addr, sizeof(addr));
+	return (error);
+}
+
 SYSCTL_NODE(_kern, KERN_PROC, proc, CTLFLAG_RD,  0, "Process table");
 
 SYSCTL_PROC(_kern_proc, KERN_PROC_ALL, all, CTLFLAG_RD|CTLTYPE_STRUCT|
@@ -3080,6 +3151,10 @@
 	CTLFLAG_MPSAFE, sysctl_kern_proc_sigtramp,
 	"Process signal trampoline location");
 
+static SYSCTL_NODE(_kern_proc, KERN_PROC_FASTSIGBLK, fastsigblk, CTLFLAG_RD |
+	CTLFLAG_ANYBODY | CTLFLAG_MPSAFE, sysctl_kern_proc_fastsigblk,
+	"Thread fast_sigblock address");
+
 int allproc_gen;
 
 /*
Index: sys/kern/kern_sig.c
===================================================================
--- sys/kern/kern_sig.c
+++ sys/kern/kern_sig.c
@@ -239,6 +239,7 @@
 };
 
 static void reschedule_signals(struct proc *p, sigset_t block, int flags);
+static sigset_t fastblock_mask;
 
 static void
 sigqueue_start(void)
@@ -249,6 +250,16 @@
 	p31b_setcfg(CTL_P1003_1B_REALTIME_SIGNALS, _POSIX_REALTIME_SIGNALS);
 	p31b_setcfg(CTL_P1003_1B_RTSIG_MAX, SIGRTMAX - SIGRTMIN + 1);
 	p31b_setcfg(CTL_P1003_1B_SIGQUEUE_MAX, max_pending_per_proc);
+	SIGFILLSET(fastblock_mask);
+	SIG_CANTMASK(fastblock_mask);
+	SIGDELSET(fastblock_mask, SIGILL);
+	SIGDELSET(fastblock_mask, SIGTRAP);
+	SIGDELSET(fastblock_mask, SIGABRT);
+	SIGDELSET(fastblock_mask, SIGEMT);
+	SIGDELSET(fastblock_mask, SIGFPE);
+	SIGDELSET(fastblock_mask, SIGBUS);
+	SIGDELSET(fastblock_mask, SIGSEGV);
+	SIGDELSET(fastblock_mask, SIGSYS);
 }
 
 ksiginfo_t *
@@ -2847,6 +2858,24 @@
 			SIG_STOPSIGMASK(sigpending);
 		if (SIGISEMPTY(sigpending))	/* no signal to send */
 			return (0);
+
+		/*
+		 * Do fast sigblock if requested by usermode.  Since
+		 * we do know that there was a signal pending at this
+		 * point, set the FAST_SIGBLOCK_PEND as indicator for
+		 * usermode to perform a dummy call to
+		 * FAST_SIGBLOCK_UNBLOCK, which causes immediate
+		 * delivery of postponed pending signal.
+		 */
+		if ((td->td_pflags & TDP_FAST_SIGBLOCK) != 0) {
+			if (td->td_sigblock_val != 0)
+				SIGSETNAND(sigpending, fastblock_mask);
+			if (SIGISEMPTY(sigpending)) {
+				td->td_pflags |= TDP_FAST_SIGPENDING;
+				return (0);
+			}
+		}
+
 		if ((p->p_flag & (P_TRACED | P_PPTRACE)) == P_TRACED &&
 		    (p->p_flag2 & P2_PTRACE_FSTP) != 0 &&
 		    SIGISMEMBER(sigpending, SIGSTOP)) {
@@ -3915,3 +3944,118 @@
 			sigqueue_delete_proc(p, sig);
 	}
 }
+
+int
+sys_fast_sigblock(struct thread *td, struct fast_sigblock_args *uap)
+{
+	struct proc *p;
+	int error, res;
+	uint32_t oldval;
+
+	error = 0;
+	switch (uap->cmd) {
+	case FAST_SIGBLOCK_SETPTR:
+		if ((td->td_pflags & TDP_FAST_SIGBLOCK) != 0) {
+			error = EBUSY;
+			break;
+		}
+		if (((uintptr_t)(uap->ptr) & (sizeof(uint32_t) - 1)) != 0) {
+			error = EINVAL;
+			break;
+		}
+		td->td_pflags |= TDP_FAST_SIGBLOCK;
+		td->td_sigblock_ptr = uap->ptr;
+		break;
+
+	case FAST_SIGBLOCK_UNBLOCK:
+		if ((td->td_pflags & TDP_FAST_SIGBLOCK) != 0) {
+			error = EINVAL;
+			break;
+		}
+again:
+		res = casueword32(td->td_sigblock_ptr, FAST_SIGBLOCK_PEND,
+		    &oldval, 0);
+		if (res == -1) {
+			error = EFAULT;
+			break;
+		}
+		if (res == 1) {
+			if (oldval != FAST_SIGBLOCK_PEND) {
+				error = EBUSY;
+				break;
+			}
+			error = casueword_check_susp(td, false);
+			if (error != 0)
+				break;
+			goto again;
+		}
+		td->td_sigblock_val = 0;
+
+		/*
+		 * Rely on normal ast mechanism to deliver pending
+		 * signals to current thread.  But notify others about
+		 * fake unblock.
+		 */
+		p = td->td_proc;
+		if (error == 0 && p->p_numthreads != 1) {
+			PROC_LOCK(p);
+			reschedule_signals(p, td->td_sigmask, 0);
+			PROC_UNLOCK(p);
+		}
+		break;
+
+	case FAST_SIGBLOCK_UNSETPTR:
+		if ((td->td_pflags & TDP_FAST_SIGBLOCK) == 0) {
+			error = EINVAL;
+			break;
+		}
+		res = fueword32(td->td_sigblock_ptr, &oldval);
+		if (res == -1) {
+			error = EFAULT;
+			break;
+		}
+		if (oldval != 0 && oldval != FAST_SIGBLOCK_PEND) {
+			error = EBUSY;
+			break;
+		}
+		td->td_pflags &= ~TDP_FAST_SIGBLOCK;
+		td->td_sigblock_val = 0;
+		break;
+
+	default:
+		error = EINVAL;
+		break;
+	}
+	return (error);
+}
+
+void
+fetch_fast_sigblock(struct thread *td)
+{
+
+	if ((td->td_pflags & TDP_FAST_SIGBLOCK) == 0)
+		return;
+	if (fueword32(td->td_sigblock_ptr, &td->td_sigblock_val) == -1) {
+		fetch_fast_sigblock_failed(td, false);
+		return;
+	}
+	td->td_sigblock_val &= ~FAST_SIGBLOCK_FLAGS;
+}
+
+void
+fetch_fast_sigblock_failed(struct thread *td, bool write)
+{
+	ksiginfo_t ksi;
+
+	/*
+	 * Prevent further fetches and SIGSEGVs, allowing thread to
+	 * issue syscalls despite corruption.
+	 */
+	td->td_pflags &= ~TDP_FAST_SIGBLOCK;
+
+	ksiginfo_init_trap(&ksi);
+	ksi.ksi_signo = SIGSEGV;
+	ksi.ksi_code = write ? SEGV_ACCERR : SEGV_MAPERR;
+	ksi.ksi_addr = td->td_sigblock_ptr;
+	trapsignal(td, &ksi);
+}
Index: sys/kern/kern_thread.c
===================================================================
--- sys/kern/kern_thread.c
+++ sys/kern/kern_thread.c
@@ -82,9 +82,9 @@
     "struct thread KBI td_flags");
 _Static_assert(offsetof(struct thread, td_pflags) == 0x104,
     "struct thread KBI td_pflags");
-_Static_assert(offsetof(struct thread, td_frame) == 0x478,
+_Static_assert(offsetof(struct thread, td_frame) == 0x488,
     "struct thread KBI td_frame");
-_Static_assert(offsetof(struct thread, td_emuldata) == 0x690,
+_Static_assert(offsetof(struct thread, td_emuldata) == 0x6a0,
     "struct thread KBI td_emuldata");
 _Static_assert(offsetof(struct proc, p_flag) == 0xb0,
     "struct proc KBI p_flag");
@@ -102,9 +102,9 @@
     "struct thread KBI td_flags");
 _Static_assert(offsetof(struct thread, td_pflags) == 0xa0,
     "struct thread KBI td_pflags");
-_Static_assert(offsetof(struct thread, td_frame) == 0x2f0,
+_Static_assert(offsetof(struct thread, td_frame) == 0x2f8,
     "struct thread KBI td_frame");
-_Static_assert(offsetof(struct thread, td_emuldata) == 0x338,
+_Static_assert(offsetof(struct thread, td_emuldata) == 0x340,
     "struct thread KBI td_emuldata");
 _Static_assert(offsetof(struct proc, p_flag) == 0x68,
     "struct proc KBI p_flag");
Index: sys/kern/kern_umtx.c
===================================================================
--- sys/kern/kern_umtx.c
+++ sys/kern/kern_umtx.c
@@ -691,8 +691,8 @@
 }
 
 /*
- * Check for possible stops and suspensions while executing a umtx
- * locking operation.
+ * Check for possible stops and suspensions while executing a casueword
+ * operation.
  *
  * The sleep argument controls whether the function can handle a stop
  * request itself or it should return ERESTART and the request is
@@ -700,16 +700,17 @@
  *
  * Typically, when retrying due to casueword(9) failure (rv == 1), we
  * should handle the stop requests there, with exception of cases when
- * the thread busied the umtx key, or when functions return
- * immediately if umtxq_check_susp() returned non-zero.  On the other
- * hand, retrying the whole lock operation, we better not stop there
- * but delegate the handling to ast.
+ * the thread owns a kernel resource, for instance busied the umtx
+ * key, or when functions return immediately if casueword_check_susp()
+ * returned non-zero.  On the other hand, retrying the whole lock
+ * operation, we better not stop there but delegate the handling to
+ * ast.
  *
  * If the request is for thread termination P_SINGLE_EXIT, we cannot
  * handle it at all, and simply return EINTR.
  */
-static int
-umtxq_check_susp(struct thread *td, bool sleep)
+int
+casueword_check_susp(struct thread *td, bool sleep)
 {
 	struct proc *p;
 	int error;
@@ -1070,7 +1071,7 @@
 					return (EOWNERDEAD); /* success */
 				}
 				MPASS(rv == 1);
-				rv = umtxq_check_susp(td, false);
+				rv = casueword_check_susp(td, false);
 				if (rv != 0)
 					return (rv);
 				continue;
@@ -1111,7 +1112,7 @@
 					return (0);
 				}
 				if (rv == 1) {
-					rv = umtxq_check_susp(td, false);
+					rv = casueword_check_susp(td, false);
 					if (rv != 0)
 						return (rv);
 				}
@@ -1124,7 +1125,7 @@
 			}
 
 			/* rv == 1 but not contested, likely store failure */
-			rv = umtxq_check_susp(td, false);
+			rv = casueword_check_susp(td, false);
 			if (rv != 0)
 				return (rv);
 		}
@@ -1167,7 +1168,7 @@
 			if (rv == -1)
 				return (EFAULT);
 			if (rv == 1) {
-				rv = umtxq_check_susp(td, false);
+				rv = casueword_check_susp(td, false);
 				if (rv != 0)
 					return (rv);
 			}
@@ -1189,7 +1190,7 @@
 		umtx_key_release(&uq->uq_key);
 
 		if (error == 0)
-			error = umtxq_check_susp(td, false);
+			error = casueword_check_susp(td, false);
 	}
 
 	return (0);
@@ -1224,7 +1225,7 @@
 		if (error == -1)
 			return (EFAULT);
 		if (error == 1) {
-			error = umtxq_check_susp(td, false);
+			error = casueword_check_susp(td, false);
 			if (error != 0)
 				return (error);
 			goto again;
@@ -1261,7 +1262,7 @@
 	if (error == 1) {
 		if (old != owner)
 			return (EINVAL);
-		error = umtxq_check_susp(td, false);
+		error = casueword_check_susp(td, false);
 		if (error != 0)
 			return (error);
 		goto again;
@@ -1316,7 +1317,7 @@
 			umtxq_unbusy(&key);
 			umtxq_unlock(&key);
 			umtx_key_release(&key);
-			error = umtxq_check_susp(td, false);
+			error = casueword_check_susp(td, false);
 			if (error != 0)
 				return (error);
 			goto again;
@@ -1400,7 +1401,7 @@
 			break;
 		}
 		owner = old;
-		error = umtxq_check_susp(td, false);
+		error = casueword_check_susp(td, false);
 	}
 
 	umtxq_lock(&key);
@@ -1905,7 +1906,7 @@
 		 * to the pending signal with suspension check result.
 		 */
 		if (error == 0) {
-			error = umtxq_check_susp(td, true);
+			error = casueword_check_susp(td, true);
 			if (error != 0)
 				break;
 		}
@@ -1922,7 +1923,7 @@
 			}
 			if (rv == 1) {
 				if (error == 0) {
-					error = umtxq_check_susp(td, true);
+					error = casueword_check_susp(td, true);
 					if (error != 0)
 						break;
 				}
@@ -1994,7 +1995,7 @@
 		}
 		if (rv == 1) {
 			umtxq_unbusy_unlocked(&uq->uq_key);
-			error = umtxq_check_susp(td, true);
+			error = casueword_check_susp(td, true);
 			if (error != 0)
 				break;
 
@@ -2017,7 +2018,7 @@
 		if (error != 0)
 			continue;
 
-		error = umtxq_check_susp(td, false);
+		error = casueword_check_susp(td, false);
 		if (error != 0)
 			break;
 	}
@@ -2063,7 +2064,7 @@
 		if (error == -1)
 			return (EFAULT);
 		if (error == 1) {
-			error = umtxq_check_susp(td, true);
+			error = casueword_check_susp(td, true);
 			if (error != 0)
 				return (error);
 			goto usrloop;
@@ -2150,7 +2151,7 @@
 again:
 	error = casueword32(&m->m_owner, owner, &old, new_owner);
 	if (error == 1) {
-		error = umtxq_check_susp(td, false);
+		error = casueword_check_susp(td, false);
 		if (error == 0)
 			goto again;
 	}
@@ -2255,7 +2256,7 @@
 			 *  error to not skip the last loop iteration.
 			 */
 			if (error == 0) {
-				error = umtxq_check_susp(td, false);
+				error = casueword_check_susp(td, false);
 				if (error == 0) {
 					if (try != 0)
 						error = EBUSY;
@@ -2770,7 +2771,7 @@
 				umtx_key_release(&uq->uq_key);
 				return (0);
 			}
-			error = umtxq_check_susp(td, true);
+			error = casueword_check_susp(td, true);
 			if (error != 0)
 				break;
 			state = oldstate;
@@ -2806,7 +2807,7 @@
 				goto sleep;
 			}
 			state = oldstate;
-			error = umtxq_check_susp(td, false);
+			error = casueword_check_susp(td, false);
 			if (error != 0)
 				break;
 		}
@@ -2818,7 +2819,7 @@
 		/* state is changed while setting flags, restart */
 		if (!(state & wrflags)) {
 			umtxq_unbusy_unlocked(&uq->uq_key);
-			error = umtxq_check_susp(td, true);
+			error = casueword_check_susp(td, true);
 			if (error != 0)
 				break;
 			continue;
@@ -2886,7 +2887,7 @@
 					break;
 				}
 				state = oldstate;
-				error1 = umtxq_check_susp(td, false);
+				error1 = casueword_check_susp(td, false);
 				if (error1 != 0) {
 					if (error == 0)
 						error = error1;
@@ -2948,7 +2949,7 @@
 				return (0);
 			}
 			state = oldstate;
-			error = umtxq_check_susp(td, true);
+			error = casueword_check_susp(td, true);
 			if (error != 0)
 				break;
 		}
@@ -2995,7 +2996,7 @@
 				goto sleep;
 			}
 			state = oldstate;
-			error = umtxq_check_susp(td, false);
+			error = casueword_check_susp(td, false);
 			if (error != 0)
 				break;
 		}
@@ -3007,7 +3008,7 @@
 		if ((state & URWLOCK_WRITE_OWNER) == 0 &&
 		    URWLOCK_READER_COUNT(state) == 0) {
 			umtxq_unbusy_unlocked(&uq->uq_key);
-			error = umtxq_check_susp(td, false);
+			error = casueword_check_susp(td, false);
 			if (error != 0)
 				break;
 			continue;
@@ -3070,7 +3071,7 @@
 					break;
 				}
 				state = oldstate;
-				error1 = umtxq_check_susp(td, false);
+				error1 = casueword_check_susp(td, false);
 				/*
 				 * We are leaving the URWLOCK_WRITE_WAITERS
 				 * behind, but this should not harm the
@@ -3136,7 +3137,7 @@
 					error = EPERM;
 					goto out;
 				}
-				error = umtxq_check_susp(td, true);
+				error = casueword_check_susp(td, true);
 				if (error != 0)
 					goto out;
 			} else
@@ -3156,7 +3157,7 @@
 					error = EPERM;
 					goto out;
 				}
-				error = umtxq_check_susp(td, true);
+				error = casueword_check_susp(td, true);
 				if (error != 0)
 					goto out;
 			} else
@@ -3234,7 +3235,7 @@
 		umtxq_remove(uq);
 		umtxq_unlock(&uq->uq_key);
 		if (rv == 1) {
-			rv = umtxq_check_susp(td, true);
+			rv = casueword_check_susp(td, true);
 			if (rv == 0)
 				goto again;
 			error = rv;
@@ -3356,7 +3357,7 @@
 		umtx_key_release(&uq->uq_key);
 		if (rv == -1)
 			return (EFAULT);
-		rv = umtxq_check_susp(td, true);
+		rv = casueword_check_susp(td, true);
 		if (rv != 0)
 			return (rv);
 		goto again;
@@ -3416,7 +3417,7 @@
 				rv = casueword32(&sem->_count, count, &count,
 				    count & ~USEM_HAS_WAITERS);
 				if (rv == 1) {
-					rv = umtxq_check_susp(td, true);
+					rv = casueword_check_susp(td, true);
 					if (rv != 0)
 						break;
 				}
Index: sys/kern/subr_syscall.c
===================================================================
--- sys/kern/subr_syscall.c
+++ sys/kern/subr_syscall.c
@@ -140,6 +140,13 @@
 	/* Let system calls set td_errno directly. */
 	td->td_pflags &= ~TDP_NERRNO;
 
+	/*
+	 * Fetch fast sigblock value at the time of syscall
+	 * entry because sleepqueue primitives might call
+	 * cursig().
+	 */
+	fetch_fast_sigblock(td);
+
 	AUDIT_SYSCALL_ENTER(sa->code, td);
 	error = (sa->callp->sy_call)(td, sa->args);
 	AUDIT_SYSCALL_EXIT(error, td);
Index: sys/kern/subr_trap.c
===================================================================
--- sys/kern/subr_trap.c
+++ sys/kern/subr_trap.c
@@ -218,8 +218,8 @@
 {
 	struct thread *td;
 	struct proc *p;
-	int flags;
-	int sig;
+	uint32_t oldval;
+	int flags, sig, res;
 
 	td = curthread;
 	p = td->td_proc;
@@ -317,6 +317,7 @@
 	 */
 	if (flags & TDF_NEEDSIGCHK || p->p_pendingcnt > 0 ||
 	    !SIGISEMPTY(p->p_siglist)) {
+		fetch_fast_sigblock(td);
 		PROC_LOCK(p);
 		mtx_lock(&p->p_sigacts->ps_mtx);
 		while ((sig = cursig(td)) != 0) {
@@ -326,6 +327,38 @@
 		mtx_unlock(&p->p_sigacts->ps_mtx);
 		PROC_UNLOCK(p);
 	}
+
+	/*
+	 * Handle deferred update of the fast sigblock value, after
+	 * the postsig() loop was performed.
+	 */
+	if (td->td_pflags & TDP_FAST_SIGPENDING) {
+		td->td_pflags &= ~TDP_FAST_SIGPENDING;
+		res = fueword32(td->td_sigblock_ptr, &oldval);
+		if (res == -1) {
+			fetch_fast_sigblock_failed(td, false);
+		} else {
+			for (;;) {
+				oldval |= FAST_SIGBLOCK_PEND;
+				res = casueword32(td->td_sigblock_ptr, oldval,
+				    &oldval, oldval | FAST_SIGBLOCK_PEND);
+				if (res == -1) {
+					fetch_fast_sigblock_failed(td, true);
+					break;
+				}
+				if (res == 0) {
+					td->td_sigblock_val = oldval &
+					    ~FAST_SIGBLOCK_FLAGS;
+					break;
+				}
+				MPASS(res == 1);
+				res = casueword_check_susp(td, false);
+				if (res != 0)
+					break;
+			}
+		}
+	}
+
 	/*
 	 * We need to check to see if we have to exit or wait due to a
 	 * single threading requirement or some other STOP condition.
Index: sys/kern/syscalls.master
===================================================================
--- sys/kern/syscalls.master
+++ sys/kern/syscalls.master
@@ -3212,6 +3212,12 @@
 		    int flags
 		);
 	}
+573	AUE_NULL	STD {
+		int fast_sigblock(
+		    int cmd,
+		    _Inout_opt_ uint32_t *ptr
+		);
+	}
 
 ; Please copy any additions and changes to the following compatability tables:
 ; sys/compat/freebsd32/syscalls.master
Index: sys/sys/elf_common.h
===================================================================
--- sys/sys/elf_common.h
+++ sys/sys/elf_common.h
@@ -962,8 +962,9 @@
 #define	AT_EHDRFLAGS	24	/* e_flags field from elf hdr */
 #define	AT_HWCAP	25	/* CPU feature flags. */
 #define	AT_HWCAP2	26	/* CPU feature flags 2. */
+#define	AT_BSDFLAGS	27	/* ELF BSD Flags. */
 
-#define	AT_COUNT	27	/* Count of defined aux entry types. */
+#define	AT_COUNT	28	/* Count of defined aux entry types. */
 
 /*
  * Relocation types.
@@ -1460,5 +1461,6 @@
 #define	R_X86_64_TLSDESC	36
 #define	R_X86_64_IRELATIVE	37
 
+#define	ELF_BSDF_FASTSIGBLK	0x0001	/* Kernel supports fast sigblock */
 
 #endif /* !_SYS_ELF_COMMON_H_ */
Index: sys/sys/proc.h
===================================================================
--- sys/sys/proc.h
+++ sys/sys/proc.h
@@ -322,6 +322,9 @@
 	uintptr_t	td_rb_inact;	/* (k) Current in-action mutex loc. */
 	struct syscall_args td_sa;	/* (kx) Syscall parameters. Copied on
 					   fork for child tracing. */
+	void		*td_sigblock_ptr; /* (k) uptr for fast sigblock. */
+	uint32_t	td_sigblock_val;  /* (k) fast sigblock value read at
+					     td_sigblock_ptr on kern entry */
 #define	td_endcopy td_pcb
 
 /*
@@ -486,7 +489,7 @@
 #define	TDP_ALTSTACK	0x00000020 /* Have alternate signal stack. */
 #define	TDP_DEADLKTREAT	0x00000040 /* Lock acquisition - deadlock treatment. */
 #define	TDP_NOFAULTING	0x00000080 /* Do not handle page faults. */
-#define	TDP_UNUSED9	0x00000100 /* --available-- */
+#define	TDP_FAST_SIGBLOCK 0x00000100 /* Fast sigblock active */
 #define	TDP_OWEUPC	0x00000200 /* Call addupc() at next AST. */
 #define	TDP_ITHREAD	0x00000400 /* Thread is an interrupt thread. */
 #define	TDP_SYNCIO	0x00000800 /* Local override, disable async i/o. */
@@ -509,6 +512,7 @@
 #define	TDP_UIOHELD	0x10000000 /* Current uio has pages held in td_ma */
 #define	TDP_FORKING	0x20000000 /* Thread is being created through fork() */
 #define	TDP_EXECVMSPC	0x40000000 /* Execve destroyed old vmspace */
+#define	TDP_FAST_SIGPENDING 0x80000000
 
 /*
  * Reasons that the current thread can not be run yet.
Index: sys/sys/signalvar.h
===================================================================
--- sys/sys/signalvar.h
+++ sys/sys/signalvar.h
@@ -256,6 +256,21 @@
 /* Flags for ksi_flags */
 #define	SQ_INIT	0x01
 
+/*
+ * Fast_sigblock
+ */
+#define	FAST_SIGBLOCK_SETPTR	1
+#define	FAST_SIGBLOCK_UNBLOCK	2
+#define	FAST_SIGBLOCK_UNSETPTR	3
+
+#define	FAST_SIGBLOCK_PEND	0x1
+#define	FAST_SIGBLOCK_FLAGS	0xf
+#define	FAST_SIGBLOCK_INC	0x10
+
+#ifndef _KERNEL
+int __sys_fast_sigblock(int cmd, void *ptr);
+#endif
+
 #ifdef _KERNEL
 
 /* Return nonzero if process p has an unmasked pending signal. */
@@ -365,6 +380,8 @@
 
 int	cursig(struct thread *td);
 void	execsigs(struct proc *p);
+void	fetch_fast_sigblock(struct thread *td);
+void	fetch_fast_sigblock_failed(struct thread *td, bool write);
 void	gsignal(int pgid, int sig, ksiginfo_t *ksi);
 void	killproc(struct proc *p, char *why);
 ksiginfo_t * ksiginfo_alloc(int wait);
Index: sys/sys/sysctl.h
===================================================================
--- sys/sys/sysctl.h
+++ sys/sys/sysctl.h
@@ -988,6 +988,7 @@
 #define	KERN_PROC_SIGTRAMP	41	/* signal trampoline location */
 #define	KERN_PROC_CWD		42	/* process current working directory */
 #define	KERN_PROC_NFDS		43	/* number of open file descriptors */
+#define	KERN_PROC_FASTSIGBLK	44	/* address of fastsigblk magic word */
 
 /*
  * KERN_IPC identifiers
Index: sys/sys/systm.h
===================================================================
--- sys/sys/systm.h
+++ sys/sys/systm.h
@@ -397,6 +397,7 @@
 	    uint32_t newval);
 int	casueword(volatile u_long *p, u_long oldval, u_long *oldvalp,
 	    u_long newval);
+int	casueword_check_susp(struct thread *td, bool sleep);
 
 void	realitexpire(void *);
 
Index: usr.bin/procstat/procstat_sigs.c
===================================================================
--- usr.bin/procstat/procstat_sigs.c
+++ usr.bin/procstat/procstat_sigs.c
@@ -37,6 +37,8 @@
 #include <err.h>
 #include <errno.h>
 #include <signal.h>
+#include <stdbool.h>
+#include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -137,13 +139,17 @@
 procstat_threads_sigs(struct procstat *procstat, struct kinfo_proc *kipp)
 {
 	struct kinfo_proc *kip;
+	uintmax_t fastsigblk_addr;
+	int error, name[4];
 	int j;
 	unsigned int count, i;
+	size_t len;
 	char *threadid;
+	bool has_fastsigblk_addr;
 
 	if ((procstat_opts & PS_OPT_NOHEADER) == 0)
-		xo_emit("{T:/%5s %6s %-16s %-7s %4s}\n", "PID", "TID", "COMM",
-		     "SIG", "FLAGS");
+		xo_emit("{T:/%5s %6s %-16s %-7s %4s %-18s}\n", "PID", "TID", "COMM",
+		     "SIG", "FLAGS", "FSIGBLK");
 
 	kip = procstat_getprocs(procstat, KERN_PROC_PID | KERN_PROC_INC_THREAD,
 	    kipp->ki_pid, &count);
@@ -155,6 +161,21 @@
 	kinfo_proc_sort(kip, count);
 	for (i = 0; i < count; i++) {
 		kipp = &kip[i];
+		len = sizeof(fastsigblk_addr);
+		name[0] = CTL_KERN;
+		name[1] = KERN_PROC;
+		name[2] = KERN_PROC_FASTSIGBLK;
+		name[3] = kipp->ki_tid;
+		error = sysctl(name, 4, &fastsigblk_addr, &len, NULL, 0);
+		if (error < 0) {
+			if (errno != ESRCH && errno != ENOTTY) {
+				warn("sysctl: kern.proc.fastsigblk: %d",
+				    kipp->ki_tid);
+			}
+			has_fastsigblk_addr = false;
+		} else
+			has_fastsigblk_addr = true;
+
 		asprintf(&threadid, "%d", kipp->ki_tid);
 		if (threadid == NULL)
 			xo_errc(1, ENOMEM, "Failed to allocate memory in "
@@ -162,6 +183,7 @@
 		xo_open_container(threadid);
 		xo_emit("{e:thread_id/%6d/%d}", kipp->ki_tid);
 		xo_open_container("signals");
+
 		for (j = 1; j <= _SIG_MAXSIG; j++) {
 			xo_emit("{dk:process_id/%5d/%d} ", kipp->ki_pid);
 			xo_emit("{d:thread_id/%6d/%d} ", kipp->ki_tid);
@@ -170,6 +192,10 @@
 			xo_emit(" ");
 			procstat_print_sig(&kipp->ki_siglist, j, 'P');
 			procstat_print_sig(&kipp->ki_sigmask, j, 'B');
+			xo_emit(" ");
+			/* XXXKIB */
+			xo_emit("{d:fsigblock/%#jx/%#jx}", has_fastsigblk_addr ?
+			    (uintmax_t)fastsigblk_addr : -1);
 			procstat_close_signame(j);
 			xo_emit("\n");
 		}