D3189.id19638.diff
No OneTemporary
Actions

Size

105 KB

Referenced Files

None

Subscribers

None

D3189.id19638.diff
View Options

	Index: include/Makefile
	===================================================================
	--- include/Makefile
	+++ include/Makefile
	@@ -18,9 +18,9 @@
	inttypes.h iso646.h kenv.h langinfo.h libgen.h limits.h link.h \
	locale.h malloc.h malloc_np.h memory.h monetary.h mpool.h mqueue.h \
	ndbm.h netconfig.h \
	- netdb.h nl_types.h nlist.h nss.h nsswitch.h paths.h \
	- printf.h proc_service.h pthread.h \
	- pthread_np.h pwd.h ranlib.h readpassphrase.h regex.h \
	+ netdb.h nl_types.h nlist.h nss.h nsswitch.h paths.h printf.h \
	+ proc_service.h pthread.h pthread_np.h pthread_workqueue.h \
	+ pwd.h ranlib.h readpassphrase.h regex.h \
	res_update.h resolv.h runetype.h search.h semaphore.h setjmp.h \
	signal.h spawn.h stab.h stdalign.h stdbool.h stddef.h \
	stdnoreturn.h stdio.h stdlib.h string.h stringlist.h \
	Index: include/pthread_workqueue.h
	===================================================================
	--- /dev/null
	+++ include/pthread_workqueue.h
	@@ -0,0 +1,75 @@
	+/*-
	+ * Copyright (c) 2009-2014, Stacey Son <sson@freebsd.org>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice unmodified, this list of conditions, and the following
	+ * disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
	+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
	+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
	+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
	+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
	+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
	+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	+ *
	+ * $FreeBSD$
	+ */
	+
	+#include <sys/param.h>
	+
	+struct _pthread_workqueue;
	+
	+typedef struct _pthread_workqueue * pthread_workqueue_t;
	+typedef void * pthread_workitem_handle_t;
	+
	+/* Pad size to 64 bytes. */
	+typedef struct {
	+ uint32_t sig;
	+ int queueprio;
	+ int overcommit;
	+ unsigned int pad[13];
	+} pthread_workqueue_attr_t;
	+
	+/* Work queue priority attributes. */
	+#define WORKQ_HIGH_PRIOQUEUE 0 /* Assign to high priority queue. */
	+#define WORKQ_DEFAULT_PRIOQUEUE 1 /* Assign to default priority queue. */
	+#define WORKQ_LOW_PRIOQUEUE 2 /* Assign to low priority queue. */
	+#define WORKQ_BG_PRIOQUEUE 3 /* background priority queue */
	+
	+#define WORKQ_NUM_PRIOQUEUE 4
	+
	+extern __int32_t workq_targetconc[WORKQ_NUM_PRIOQUEUE];
	+
	+int pthread_workqueue_init_np(void);
	+int pthread_workqueue_create_np(pthread_workqueue_t * workqp,
	+ const pthread_workqueue_attr_t * attr);
	+int pthread_workqueue_additem_np(pthread_workqueue_t workq,
	+ void ( workitem_func)(void ), void workitem_arg,
	+ pthread_workitem_handle_t * itemhandlep, unsigned int *gencountp);
	+int pthread_workqueue_attr_init_np(pthread_workqueue_attr_t * attrp);
	+int pthread_workqueue_attr_destroy_np(pthread_workqueue_attr_t * attr);
	+int pthread_workqueue_attr_setqueuepriority_np(pthread_workqueue_attr_t * attr,
	+ int qprio);
	+int pthread_workqueue_attr_getovercommit_np(
	+ const pthread_workqueue_attr_t * attr, int * ocommp);
	+int pthread_workqueue_attr_setovercommit_np(pthread_workqueue_attr_t * attr,
	+ int ocomm);
	+int pthread_workqueue_requestconcurrency_np(pthread_workqueue_t workq,
	+ int queue, int request_concurrency);
	+int pthread_workqueue_getovercommit_np(pthread_workqueue_t workq,
	+ unsigned int *ocommp);
	+void pthread_workqueue_atfork_child(void);
	+void pthread_workqueue_atfork_parent(void);
	+void pthread_workqueue_atfork_prepare(void);
	+__END_DECLS
	Index: lib/libc/sys/Symbol.map
	===================================================================
	--- lib/libc/sys/Symbol.map
	+++ lib/libc/sys/Symbol.map
	@@ -399,7 +399,9 @@
	};

	FBSD_1.5 {
	- fdatasync;
	+ fdatasync;
	+ thr_stack;
	+ thr_workq;
	};

	FBSDprivate_1.0 {
	@@ -1001,10 +1003,14 @@
	__sys_thr_self;
	_thr_set_name;
	__sys_thr_set_name;
	+ _thr_stack;
	+ __sys_thr_stack;
	_thr_suspend;
	__sys_thr_suspend;
	_thr_wake;
	__sys_thr_wake;
	+ _thr_workq;
	+ __sys_thr_workq;
	_ktimer_create;
	__sys_ktimer_create;
	_ktimer_delete;
	Index: lib/libthr/pthread.map
	===================================================================
	--- lib/libthr/pthread.map
	+++ lib/libthr/pthread.map
	@@ -316,6 +316,39 @@
	pthread_getthreadid_np;
	};

	+
	+FBSD_1.3 {
	+ _pthread_workqueue_additem_np;
	+ _pthread_workqueue_atfork_child;
	+ _pthread_workqueue_atfork_parent;
	+ _pthread_workqueue_atfork_prepare;
	+ _pthread_workqueue_attr_destroy_np;
	+ _pthread_workqueue_attr_getovercommit_np;
	+ _pthread_workqueue_attr_getqueuepriority_np;
	+ _pthread_workqueue_attr_init_np;
	+ _pthread_workqueue_attr_setovercommit_np;
	+ _pthread_workqueue_attr_setqueuepriority_np;
	+ _pthread_workqueue_create_np;
	+ _pthread_workqueue_getovercommit_np;
	+ _pthread_workqueue_init_np;
	+ _pthread_workqueue_requestconcurrency_np;
	+
	+ pthread_workqueue_additem_np;
	+ pthread_workqueue_atfork_child;
	+ pthread_workqueue_atfork_parent;
	+ pthread_workqueue_atfork_prepare;
	+ pthread_workqueue_attr_destroy_np;
	+ pthread_workqueue_attr_getovercommit_np;
	+ pthread_workqueue_attr_getqueuepriority_np;
	+ pthread_workqueue_attr_init_np;
	+ pthread_workqueue_attr_setovercommit_np;
	+ pthread_workqueue_attr_setqueuepriority_np;
	+ pthread_workqueue_create_np;
	+ pthread_workqueue_getovercommit_np;
	+ pthread_workqueue_init_np;
	+ pthread_workqueue_requestconcurrency_np;
	+};
	+
	FBSD_1.4 {
	pthread_mutex_consistent;
	pthread_mutexattr_getrobust;
	Index: lib/libthr/thread/Makefile.inc
	===================================================================
	--- lib/libthr/thread/Makefile.inc
	+++ lib/libthr/thread/Makefile.inc
	@@ -56,5 +56,6 @@
	thr_suspend_np.c \
	thr_switch_np.c \
	thr_symbols.c \
	+ thr_workq.c \
	thr_umtx.c \
	thr_yield.c
	Index: lib/libthr/thread/thr_init.c
	===================================================================
	--- lib/libthr/thread/thr_init.c
	+++ lib/libthr/thread/thr_init.c
	@@ -59,7 +59,7 @@
	#include "libc_private.h"
	#include "thr_private.h"

	-char *_usrstack;
	+static char *_usrstack;
	struct pthread *_thr_initial;
	int _libthr_debug;
	int _thread_event_mask;
	@@ -113,8 +113,6 @@

	int _thr_is_smp = 0;
	size_t _thr_guard_default;
	-size_t _thr_stack_default = THR_STACK_DEFAULT;
	-size_t _thr_stack_initial = THR_STACK_INITIAL;
	int _thr_page_size;
	int _thr_spinloops;
	int _thr_yieldloops;
	@@ -387,9 +385,8 @@
	* resource limits, so this stack needs an explicitly mapped
	* red zone to protect the thread stack that is just beyond.
	*/
	- if (mmap(_usrstack - _thr_stack_initial -
	- _thr_guard_default, _thr_guard_default, 0, MAP_ANON,
	- -1, 0) == MAP_FAILED)
	+ if (mmap(_usrstack - THR_STACK_INITIAL - _thr_guard_default,
	+ _thr_guard_default, PROT_NONE, MAP_ANON, -1, 0) == MAP_FAILED)
	PANIC("Cannot allocate red zone for initial thread");

	/*
	@@ -401,8 +398,8 @@
	* actually free() it; it just puts it in the free
	* stack queue for later reuse.
	*/
	- thread->attr.stackaddr_attr = _usrstack - _thr_stack_initial;
	- thread->attr.stacksize_attr = _thr_stack_initial;
	+ thread->attr.stackaddr_attr = _usrstack - THR_STACK_INITIAL;
	+ thread->attr.stacksize_attr = THR_STACK_INITIAL;
	thread->attr.guardsize_attr = _thr_guard_default;
	thread->attr.flags \|= THR_STACK_USER;

	@@ -471,7 +468,7 @@
	if (env_bigstack != NULL \|\| env_splitstack == NULL) {
	if (getrlimit(RLIMIT_STACK, &rlim) == -1)
	PANIC("Cannot get stack rlimit");
	- _thr_stack_initial = rlim.rlim_cur;
	+ // XXX - _thr_stack_initial = rlim.rlim_cur;
	}
	len = sizeof(_thr_is_smp);
	sysctlbyname("kern.smp.cpus", &_thr_is_smp, &len, NULL, 0);
	@@ -479,7 +476,7 @@
	_thr_page_size = getpagesize();
	_thr_guard_default = _thr_page_size;
	_pthread_attr_default.guardsize_attr = _thr_guard_default;
	- _pthread_attr_default.stacksize_attr = _thr_stack_default;
	+ _pthread_attr_default.stacksize_attr = THR_STACK_DEFAULT;
	env = getenv("LIBPTHREAD_SPINLOOPS");
	if (env)
	_thr_spinloops = atoi(env);
	Index: lib/libthr/thread/thr_private.h
	===================================================================
	--- lib/libthr/thread/thr_private.h
	+++ lib/libthr/thread/thr_private.h
	@@ -705,7 +705,6 @@
	* Global variables for the pthread kernel.
	*/

	-extern char *_usrstack __hidden;
	extern struct pthread *_thr_initial __hidden;

	/* For debugger */
	@@ -738,8 +737,6 @@
	extern int _thr_is_smp __hidden;

	extern size_t _thr_guard_default __hidden;
	-extern size_t _thr_stack_default __hidden;
	-extern size_t _thr_stack_initial __hidden;
	extern int _thr_page_size __hidden;
	extern int _thr_spinloops __hidden;
	extern int _thr_yieldloops __hidden;
	Index: lib/libthr/thread/thr_stack.c
	===================================================================
	--- lib/libthr/thread/thr_stack.c
	+++ lib/libthr/thread/thr_stack.c
	@@ -84,7 +84,7 @@
	* \| Red Zone (guard page) \| red zone for 2nd thread
	* \| \|
	* +-----------------------------------+
	- * \| stack 2 - _thr_stack_default \| top of 2nd thread stack
	+ * \| stack 2 - THR_STACK_DEFAULT \| top of 2nd thread stack
	* \| \|
	* \| \|
	* \| \|
	@@ -95,7 +95,7 @@
	* \| Red Zone \| red zone for 1st thread
	* \| \|
	* +-----------------------------------+
	- * \| stack 1 - _thr_stack_default \| top of 1st thread stack
	+ * \| stack 1 - THR_STACK_DEFAULT \| top of 1st thread stack
	* \| \|
	* \| \|
	* \| \|
	@@ -106,7 +106,7 @@
	* \| Red Zone \|
	* \| \| red zone for main thread
	* +-----------------------------------+
	- * \| USRSTACK - _thr_stack_initial \| top of main thread stack
	+ * \| KERN_USRSTACK - THR_STACK_INITIAL \| top of main thread stack
	* \| \| ^
	* \| \| \|
	* \| \| \|
	@@ -117,7 +117,6 @@
	* high memory
	*
	*/
	-static char *last_stack = NULL;

	/*
	* Round size up to the nearest multiple of
	@@ -194,11 +193,10 @@
	struct stack *spare_stack;
	size_t stacksize;
	size_t guardsize;
	- char *stackaddr;

	/*
	* Round up stack size to nearest multiple of _thr_page_size so
	- * that mmap() * will work. If the stack size is not an even
	+ * that thr_stack() will work. If the stack size is not an even
	* multiple, we end up initializing things such that there is
	* unused space above the beginning of the stack, so the stack
	* sits snugly against its guard.
	@@ -211,7 +209,7 @@

	/*
	* Use the garbage collector lock for synchronization of the
	- * spare stack lists and allocations from usrstack.
	+ * spare stack lists.
	*/
	THREAD_LIST_WRLOCK(curthread);
	/*
	@@ -246,43 +244,10 @@
	THREAD_LIST_UNLOCK(curthread);
	}
	else {
	- /*
	- * Allocate a stack from or below usrstack, depending
	- * on the LIBPTHREAD_BIGSTACK_MAIN env variable.
	- */
	- if (last_stack == NULL)
	- last_stack = _usrstack - _thr_stack_initial -
	- _thr_guard_default;
	-
	- /* Allocate a new stack. */
	- stackaddr = last_stack - stacksize - guardsize;
	-
	- /*
	- * Even if stack allocation fails, we don't want to try to
	- * use this location again, so unconditionally decrement
	- * last_stack. Under normal operating conditions, the most
	- * likely reason for an mmap() error is a stack overflow of
	- * the adjacent thread stack.
	- */
	- last_stack -= (stacksize + guardsize);
	-
	- /* Release the lock before mmap'ing it. */
	+ /* thr_stack() can block so release the lock */
	THREAD_LIST_UNLOCK(curthread);

	- /* Map the stack and guard page together, and split guard
	- page from allocated space: */
	- if ((stackaddr = mmap(stackaddr, stacksize + guardsize,
	- _rtld_get_stack_prot(), MAP_STACK,
	- -1, 0)) != MAP_FAILED &&
	- (guardsize == 0 \|\|
	- mprotect(stackaddr, guardsize, PROT_NONE) == 0)) {
	- stackaddr += guardsize;
	- } else {
	- if (stackaddr != MAP_FAILED)
	- munmap(stackaddr, stacksize + guardsize);
	- stackaddr = NULL;
	- }
	- attr->stackaddr_attr = stackaddr;
	+ attr->stackaddr_attr = thr_stack(stacksize, guardsize);
	}
	if (attr->stackaddr_attr != NULL)
	return (0);
	Index: share/man/man3/Makefile
	===================================================================
	--- share/man/man3/Makefile
	+++ share/man/man3/Makefile
	@@ -273,6 +273,7 @@
	pthread_suspend_np.3 \
	pthread_switch_add_np.3 \
	pthread_testcancel.3 \
	+ pthread_workqueue_np.3 \
	pthread_yield.3

	PTHREAD_MLINKS= pthread_affinity_np.3 pthread_getaffinity_np.3 \
	@@ -335,6 +336,15 @@
	PTHREAD_MLINKS+=pthread_testcancel.3 pthread_setcancelstate.3 \
	pthread_testcancel.3 pthread_setcanceltype.3
	PTHREAD_MLINKS+=pthread_join.3 pthread_timedjoin_np.3
	+PTHREAD_MLINKS+=pthread_workqueue_np.3 pthread_workqueue_init_np.3 \
	+ pthread_workqueue_np.3 pthread_workqueue_create_np.3 \
	+ pthread_workqueue_np.3 pthread_workqueue_additem_np.3 \
	+ pthread_workqueue_np.3 pthread_workqueue_attr_init_np.3 \
	+ pthread_workqueue_np.3 pthread_workqueue_attr_destroy_np.3 \
	+ pthread_workqueue_np.3 pthread_workqueue_attr_getqueuepriority_np.3 \
	+ pthread_workqueue_np.3 pthread_workqueue_attr_setqueuepriority_np.3 \
	+ pthread_workqueue_np.3 pthread_workqueue_attr_getovercommit_np.3 \
	+ pthread_workqueue_np.3 pthread_workqueue_attr_setovercommit_np.3
	.endif

	.include <bsd.prog.mk>
	Index: share/man/man3/pthread_workqueue_np.3
	===================================================================
	--- /dev/null
	+++ share/man/man3/pthread_workqueue_np.3
	@@ -0,0 +1,489 @@
	+.\" Copyright (C) 2009-2015 sson@FreeBSD.org
	+.\" All rights reserved.
	+.\"
	+.\" Redistribution and use in source and binary forms, with or without
	+.\" modification, are permitted provided that the following conditions
	+.\" are met:
	+.\" 1. Redistributions of source code must retain the above copyright
	+.\" notice(s), this list of conditions and the following disclaimer as
	+.\" the first lines of this file unmodified other than the possible
	+.\" addition of one or more copyright notices.
	+.\" 2. Redistributions in binary form must reproduce the above copyright
	+.\" notice(s), this list of conditions and the following disclaimer in
	+.\" the documentation and/or other materials provided with the
	+.\" distribution.
	+.\"
	+.\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
	+.\" EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
	+.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
	+.\" LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
	+.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
	+.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
	+.\" BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
	+.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
	+.\" OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
	+.\" EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	+.\"
	+.\" $FreeBSD$
	+.Dd May 1, 2014
	+.Dt PTHREAD_WORKQUEUE 3
	+.Os
	+.Sh NAME
	+.Nm pthread_workqueue_init_np ,
	+.Nm pthread_workqueue_create_np ,
	+.Nm pthread_workqueue_additem_np
	+.Nd thread workqueue operations
	+.Pp
	+.Nm pthread_workqueue_attr_init_np ,
	+.Nm pthread_workqueue_attr_destroy_np ,
	+.Nm pthread_workqueue_attr_getovercommit_np ,
	+.Nm pthread_workqueue_attr_setovercommit_np ,
	+.Nm pthread_workqueue_attr_getqueuepriority_np ,
	+.Nm pthread_workqueue_attr_setqueuepriority_np
	+.Nd thread workqueue attribute operations
	+.Sh LIBRARY
	+.Lb libpthread
	+.Sh SYNOPSIS
	+.In pthread_np.h
	+.Ft int
	+.Fn pthread_workqueue_init_np "void"
	+.Ft int
	+.Fn pthread_workqueue_create_np "pthread_workqueue_t workqp" "const pthread_workqueue_attr_t attr"
	+.Ft int
	+.Fn pthread_workqueue_additem_np "pthread_workqueue_t workq" "void ( workitem_func)(void )" "void workitem_arg" "pthread_workitem_handle_t * itemhandlep" "unsigned int *gencountp"
	+.Ft int
	+.Fn pthread_workqueue_attr_init_np "pthread_workqueue_attr_t *attr"
	+.Ft int
	+.Fn pthread_workqueue_attr_destroy_np "pthread_workqueue_attr_t *attr"
	+.Ft int
	+.Fn pthread_workqueue_attr_getovercommit_np "pthread_workqueue_attr_t attr" "int ocommp"
	+.Ft int
	+.Fn pthread_workqueue_attr_setovercommit_np "pthread_workqueue_attr_t *attr" "int ocomm"
	+.Ft int
	+.Fn pthread_workqueue_attr_getqueuepriority_np "pthread_workqueue_attr_t attr" "int qpriop"
	+.Ft int
	+.Fn pthread_workqueue_attr_setqueuepriority_np "pthread_workqueue_attr_t *attr" "int qprio"
	+.Sh DESCRIPTION
	+The
	+.Fn pthread_workqueue_*_np
	+functions are used to create and submit work items to a thread pool.
	+The size of the thread pool is managed by the kernel based on physical
	+resources and these tunable
	+.Xr sysctl 3
	+MIBs:
	+.Bl -tag -width "Va kern.wq_reduce_pool_window_usecs"
	+.It Va kern.wq_yielded_threshold
	+Maximum number of threads to yield within the window.
	+.It Va kern.wq_yielded_window_usecs
	+Yielded thread window size given in microseconds.
	+.It Va kern.wq_stalled_window_usecs
	+Number of microseconds until a thread is considered stalled.
	+.It Va kern.wq_reduce_pool_window_usecs
	+Number of microseconds while a thread is idle until it is
	+removed from the thread pool.
	+.It Va kern.wq_max_timer_interval_usecs
	+Number of microseconds to wait to check for stalled or idle threads.
	+.It Va kern.wq_max_threads
	+Maximum number of threads in the thread pool.
	+.El
	+.Pp
	+The user may create multiple work queues of different priority and
	+manually overcommit the available resources.
	+.Pp
	+.Fn pthread_workqueue_init_np
	+allocates and initializes the thread workqueue subsystem.
	+.Pp
	+.Fn pthread_workqueue_create_np
	+creates a new thread workqueue with the attributes given by
	+.Fa attr .
	+If
	+.Fa attr
	+is NULL then the default attributes are used.
	+A workqueue handle is returned in the
	+.Fa workqp
	+parameter.
	+.Pp
	+Thread workqueue attributes are used to specify parameters to
	+.Fn pthread_workqueue_create_np .
	+One attribute object can be used in multiple calls to
	+.Fn pthread_workqueue_create_np ,
	+with or without modifications between calls.
	+.Pp
	+.Fn pthread_workqueue_additem_np
	+is used to submit work items to the thread pool specified by the
	+.Fa workq
	+parameter.
	+The work item function and function argument are given by
	+.Fa workitem_func
	+and
	+.Fa workitem_arg .
	+The work item handle is returned in
	+.Fa itemhandlep .
	+.Pp
	+The
	+.Fn pthread_workqueue_attr_init_np
	+function initializes
	+.Fa attr
	+with all the default thread workqueue attributes.
	+.Pp
	+The
	+.Fn pthread_workqueue_attr_destroy_np
	+function destroys
	+.Fa attr .
	+.Pp
	+The
	+.Fn pthread_workqueue_attr_set*_np
	+functions set the attribute that corresponds to each function name.
	+.Fn pthread_workqueue_attr_setovercommit_np
	+can be used to set the overcommit flag.
	+When the overcommit flag is set, more threads will be started as
	+needed.
	+This might overcommit the physical resources of the system.
	+.Fn pthread_workqueue_attr_setqueuepriority_np
	+sets the queue priority attribute of the thread work queue and must be
	+set to one of these values:
	+.Bl -tag -width "Va WORKQ_DEFAULT_PRIOQUEUE"
	+.It Va WORKQ_HIGH_PRIOQUEUE
	+Queued work items with this attribute will be given higher priority by
	+the thread scheduler.
	+.It Va WORKQ_DEFAULT_PRIOQUEUE
	+Queued work items in the queue with this attribute are given the default
	+priority.
	+.It Va WORKQ_LOW_PRIOQUEUE
	+Queued work items in the queue with this attribute will be given lower priority
	+by the thread scheduler.
	+.El
	+.Pp
	+The
	+.Fn pthread_workqueue_attr_get*_np
	+functions copy the value of the attribute that corresponds to each function name
	+to the location pointed to by the second function parameter.
	+.Sh RETURN VALUES
	+If successful, these functions return 0.
	+Otherwise, an error number is returned to indicate the error.
	+.Sh ERRORS
	+The
	+.Fn pthread_workqueue_init_np
	+function will fail with:
	+.Bl -tag -width Er
	+.It Bq Er ENOMEM
	+Out of memory.
	+.El
	+.Pp
	+The
	+.Fn pthread_workqueue_create_np
	+function will fail with:
	+.Bl -tag -width Er
	+.It Bq Er ENOMEM
	+Out of memory.
	+.El
	+.Pp
	+The
	+.Fn pthread_workqueue_additem_np
	+function will fail with:
	+.Bl -tag -width Er
	+.It Bq Er EINVAL
	+Invalid workqueue handle.
	+.It Bq Er ENOMEM
	+Out of memory.
	+.It Bq Er ESRCH
	+Cannot find workqueue.
	+.El
	+.Pp
	+The
	+.Fn pthread_workqueue_attr_init_np
	+function will fail if:
	+.Bl -tag -width Er
	+.It Bq Er ENOMEM
	+Out of memory.
	+.El
	+.Pp
	+The
	+.Fn pthread_workqueue_attr_destroy_np
	+function will fail if:
	+.Bl -tag -width Er
	+.It Bq Er EINVAL
	+Invalid value for
	+.Fa attr .
	+.El
	+.Pp
	+The
	+.Fn pthread_workqueue_attr_setqueuepriority_np
	+function will fail if:
	+.Bl -tag -width Er
	+.It Bq Er EINVAL
	+Invalid value for
	+.Fa attr
	+or for
	+.Fa qprio.
	+.El
	+.Pp
	+The
	+.Fn pthread_workqueue_attr_setovercommit_np ,
	+.Fn pthread_workqueue_attr_getovercommit_np
	+and
	+.Fn pthread_workqueue_attr_getqueuepriority_np
	+functions will fail if:
	+.Bl -tag -width Er
	+.It Bq Er EINVAL
	+Invalid value for
	+.Fa attr .
	+.El
	+.Sh SEE ALSO
	+.Xr pthread 3 ,
	+.Xr sysctl 3
	+.Sh BUGS
	+There is currently no way to remove or destroy work queues or pending
	+work items other than exiting the process.
	+.Sh HISTORY
	+This thread workqueues code was created to support Grand Central Dispatch (GCD
	+or libdispatch) and first appeared in
	+.Fx 10.1 .
	+.Sh AUTHORS
	+.An "Stacey Son" Aq sson@FreeBSD.org .
	+.\" Copyright (C) 2009-2015 sson@FreeBSD.org
	+.\" All rights reserved.
	+.\"
	+.\" Redistribution and use in source and binary forms, with or without
	+.\" modification, are permitted provided that the following conditions
	+.\" are met:
	+.\" 1. Redistributions of source code must retain the above copyright
	+.\" notice(s), this list of conditions and the following disclaimer as
	+.\" the first lines of this file unmodified other than the possible
	+.\" addition of one or more copyright notices.
	+.\" 2. Redistributions in binary form must reproduce the above copyright
	+.\" notice(s), this list of conditions and the following disclaimer in
	+.\" the documentation and/or other materials provided with the
	+.\" distribution.
	+.\"
	+.\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
	+.\" EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
	+.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
	+.\" LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
	+.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
	+.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
	+.\" BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
	+.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
	+.\" OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
	+.\" EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	+.\"
	+.\" $FreeBSD$
	+.Dd May 1, 2014
	+.Dt PTHREAD_WORKQUEUE 3
	+.Os
	+.Sh NAME
	+.Nm pthread_workqueue_init_np ,
	+.Nm pthread_workqueue_create_np ,
	+.Nm pthread_workqueue_additem_np
	+.Nd thread workqueue operations
	+.Pp
	+.Nm pthread_workqueue_attr_init_np ,
	+.Nm pthread_workqueue_attr_destroy_np ,
	+.Nm pthread_workqueue_attr_getovercommit_np ,
	+.Nm pthread_workqueue_attr_setovercommit_np ,
	+.Nm pthread_workqueue_attr_getqueuepriority_np ,
	+.Nm pthread_workqueue_attr_setqueuepriority_np
	+.Nd thread workqueue attribute operations
	+.Sh LIBRARY
	+.Lb libpthread
	+.Sh SYNOPSIS
	+.In pthread_np.h
	+.Ft int
	+.Fn pthread_workqueue_init_np "void"
	+.Ft int
	+.Fn pthread_workqueue_create_np "pthread_workqueue_t workqp" "const pthread_workqueue_attr_t attr"
	+.Ft int
	+.Fn pthread_workqueue_additem_np "pthread_workqueue_t workq" "void ( workitem_func)(void )" "void workitem_arg" "pthread_workitem_handle_t * itemhandlep" "unsigned int *gencountp"
	+.Ft int
	+.Fn pthread_workqueue_attr_init_np "pthread_workqueue_attr_t *attr"
	+.Ft int
	+.Fn pthread_workqueue_attr_destroy_np "pthread_workqueue_attr_t *attr"
	+.Ft int
	+.Fn pthread_workqueue_attr_getovercommit_np "pthread_workqueue_attr_t attr" "int ocommp"
	+.Ft int
	+.Fn pthread_workqueue_attr_setovercommit_np "pthread_workqueue_attr_t *attr" "int ocomm"
	+.Ft int
	+.Fn pthread_workqueue_attr_getqueuepriority_np "pthread_workqueue_attr_t attr" "int qpriop"
	+.Ft int
	+.Fn pthread_workqueue_attr_setqueuepriority_np "pthread_workqueue_attr_t *attr" "int qprio"
	+.Sh DESCRIPTION
	+The
	+.Fn pthread_workqueue_*_np
	+functions are used to create and submit work items to a thread pool.
	+The size of the thread pool is managed by the kernel based on physical
	+resources and these tunable
	+.Xr sysctl 3
	+MIBs:
	+.Bl -tag -width "Va kern.wq_reduce_pool_window_usecs"
	+.It Va kern.wq_yielded_threshold
	+Maximum number of threads to yield within the window.
	+.It Va kern.wq_yielded_window_usecs
	+Yielded thread window size given in microseconds.
	+.It Va kern.wq_stalled_window_usecs
	+Number of microseconds until a thread is considered stalled.
	+.It Va kern.wq_reduce_pool_window_usecs
	+Number of microseconds while a thread is idle until it is
	+removed from the thread pool.
	+.It Va kern.wq_max_timer_interval_usecs
	+Number of microseconds to wait to check for stalled or idle threads.
	+.It Va kern.wq_max_threads
	+Maximum number of threads in the thread pool.
	+.El
	+.Pp
	+The user may create multiple work queues of different priority and
	+manually overcommit the available resources.
	+.Pp
	+.Fn pthread_workqueue_init_np
	+allocates and initializes the thread workqueue subsystem.
	+.Pp
	+.Fn pthread_workqueue_create_np
	+creates a new thread workqueue with the attributes given by
	+.Fa attr .
	+If
	+.Fa attr
	+is
	+.Dv NULL
	+then the default attributes are used.
	+A workqueue handle is returned in the
	+.Fa workqp
	+parameter.
	+.Pp
	+Thread workqueue attributes are used to specify parameters to
	+.Fn pthread_workqueue_create_np .
	+One attribute object can be used in multiple calls to
	+.Fn pthread_workqueue_create_np ,
	+with or without modifications between calls.
	+.Pp
	+.Fn pthread_workqueue_additem_np
	+is used to submit work items to the thread pool specified by the
	+.Fa workq
	+parameter.
	+The work item function and function argument are given by
	+.Fa workitem_func
	+and
	+.Fa workitem_arg .
	+The work item handle is returned in
	+.Fa itemhandlep .
	+.Pp
	+The
	+.Fn pthread_workqueue_attr_init_np
	+function initializes
	+.Fa attr
	+with all the default thread workqueue attributes.
	+.Pp
	+The
	+.Fn pthread_workqueue_attr_destroy_np
	+function destroys
	+.Fa attr .
	+.Pp
	+The
	+.Fn pthread_workqueue_attr_set*_np
	+functions set the attribute that corresponds to each function name.
	+.Fn pthread_workqueue_attr_setovercommit_np
	+can be used to set the overcommit flag.
	+When the overcommit flag is set, more threads will be started as needed.
	+This might overcommit the physical resources of the system.
	+.Fn pthread_workqueue_attr_setqueuepriority_np
	+sets the queue priority attribute of the thread work queue and must be
	+set to one of these values:
	+.Bl -tag -width "Va WORKQ_DEFAULT_PRIOQUEUE"
	+.It Va WORKQ_HIGH_PRIOQUEUE
	+Queued work items with this attribute will be given higher priority by
	+the thread scheduler.
	+.It Va WORKQ_DEFAULT_PRIOQUEUE
	+Queued work items in the queue with this attribute are given the default
	+priority.
	+.It Va WORKQ_LOW_PRIOQUEUE
	+Queued work items in the queue with this attribute will be given lower priority
	+by the thread scheduler.
	+.El
	+.Pp
	+The
	+.Fn pthread_workqueue_attr_get*_np
	+functions copy the value of the attribute that corresponds to each function name
	+to the location pointed to by the second function parameter.
	+.Sh RETURN VALUES
	+If successful, these functions return 0.
	+Otherwise, an error number is returned to indicate the error.
	+.Sh ERRORS
	+The
	+.Fn pthread_workqueue_init_np
	+function will fail with:
	+.Bl -tag -width Er
	+.It Bq Er ENOMEM
	+Out of memory.
	+.El
	+.Pp
	+The
	+.Fn pthread_workqueue_create_np
	+function will fail with:
	+.Bl -tag -width Er
	+.It Bq Er ENOMEM
	+Out of memory.
	+.El
	+.Pp
	+The
	+.Fn pthread_workqueue_additem_np
	+function will fail with:
	+.Bl -tag -width Er
	+.It Bq Er EINVAL
	+Invalid workqueue handle.
	+.It Bq Er ENOMEM
	+Out of memory.
	+.It Bq Er ESRCH
	+Cannot find workqueue.
	+.El
	+.Pp
	+The
	+.Fn pthread_workqueue_attr_init_np
	+function will fail if:
	+.Bl -tag -width Er
	+.It Bq Er ENOMEM
	+Out of memory.
	+.El
	+.Pp
	+The
	+.Fn pthread_workqueue_attr_destroy_np
	+function will fail if:
	+.Bl -tag -width Er
	+.It Bq Er EINVAL
	+Invalid value for
	+.Fa attr .
	+.El
	+.Pp
	+The
	+.Fn pthread_workqueue_attr_setqueuepriority_np
	+function will fail if:
	+.Bl -tag -width Er
	+.It Bq Er EINVAL
	+Invalid value for
	+.Fa attr
	+or for
	+.Fa qprio.
	+.El
	+.Pp
	+The
	+.Fn pthread_workqueue_attr_setovercommit_np ,
	+.Fn pthread_workqueue_attr_getovercommit_np
	+and
	+.Fn pthread_workqueue_attr_getqueuepriority_np
	+functions will fail if:
	+.Bl -tag -width Er
	+.It Bq Er EINVAL
	+Invalid value for
	+.Fa attr .
	+.El
	+.Sh SEE ALSO
	+.Xr pthread 3 ,
	+.Xr sysctl 3
	+.Sh BUGS
	+There is currently no way to remove or destroy work queues or pending
	+work items other than exiting the process.
	+.Sh HISTORY
	+This thread workqueues code was created to support Grand Central Dispatch (GCD
	+or libdispatch) and first appeared in
	+.Fx 10.1 .
	+.Sh AUTHORS
	+.An Stacey Son Aq Mt sson@FreeBSD.org
	Index: sys/conf/NOTES
	===================================================================
	--- sys/conf/NOTES
	+++ sys/conf/NOTES
	@@ -1172,6 +1172,9 @@

	# POSIX message queue
	options P1003_1B_MQUEUE
	+
	+# PThread WorkQueue support
	+options THRWORKQ

	#####################################################################
	# SECURITY POLICY PARAMETERS
	Index: sys/conf/files
	===================================================================
	--- sys/conf/files
	+++ sys/conf/files
	@@ -3315,6 +3315,7 @@
	kern/kern_tc.c standard
	kern/kern_thr.c standard
	kern/kern_thread.c standard
	+kern/kern_thrworkq.c standard
	kern/kern_time.c standard
	kern/kern_timeout.c standard
	kern/kern_umtx.c standard
	Index: sys/conf/options
	===================================================================
	--- sys/conf/options
	+++ sys/conf/options
	@@ -225,6 +225,9 @@
	P1003_1B_SEMAPHORES opt_posix.h
	_KPOSIX_PRIORITY_SCHEDULING opt_posix.h

	+# PThread WorkQueue Option
	+THRWORKQ opt_thrworkq.h
	+
	# Do we want the config file compiled into the kernel?
	INCLUDE_CONFIG_FILE opt_config.h

	Index: sys/i386/include/atomic.h
	===================================================================
	--- sys/i386/include/atomic.h
	+++ sys/i386/include/atomic.h
	@@ -214,6 +214,26 @@
	return (res);
	}

	+static __inline int
	+atomic_cmpset_64(volatile uint64_t *dst, uint64_t exp, uint64_t src)
	+{
	+ int64_t res = exp;
	+
	+ __asm __volatile (
	+ " " MPLOCKED " "
	+ " cmpxchg8b %2 ; "
	+ " setz %%al ; "
	+ " movzbl %%al,%0 ; "
	+ "# atomic_cmpset_64"
	+ : "+A" (res), /* 0 (result) */
	+ "=m" (dst) / 1 */
	+ : "m" (dst), / 2 */
	+ "b" ((uint32_t)src),
	+ "c" ((uint32_t)(src >> 32)));
	+
	+ return (res);
	+}
	+
	#endif /* CPU_DISABLE_CMPXCHG */

	/*
	Index: sys/kern/init_sysent.c
	===================================================================
	--- sys/kern/init_sysent.c
	+++ sys/kern/init_sysent.c
	@@ -513,8 +513,8 @@
	{ AS(thr_set_name_args), (sy_call_t )sys_thr_set_name, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, / 464 = thr_set_name */
	{ AS(aio_fsync_args), (sy_call_t )sys_aio_fsync, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, / 465 = aio_fsync */
	{ AS(rtprio_thread_args), (sy_call_t )sys_rtprio_thread, AUE_RTPRIO, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, / 466 = rtprio_thread */
	- { 0, (sy_call_t )nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, / 467 = nosys */
	- { 0, (sy_call_t )nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, / 468 = nosys */
	+ { AS(thr_stack_args), (sy_call_t )sys_thr_stack, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, / 467 = thr_stack */
	+ { AS(thr_workq_args), (sy_call_t )sys_thr_workq, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, / 468 = thr_workq */
	{ 0, (sy_call_t )nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, / 469 = __getpath_fromfd */
	{ 0, (sy_call_t )nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, / 470 = __getpath_fromaddr */
	{ AS(sctp_peeloff_args), (sy_call_t )lkmressys, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_ABSENT }, / 471 = sctp_peeloff */
	Index: sys/kern/kern_exec.c
	===================================================================
	--- sys/kern/kern_exec.c
	+++ sys/kern/kern_exec.c
	@@ -30,6 +30,7 @@
	#include "opt_capsicum.h"
	#include "opt_hwpmc_hooks.h"
	#include "opt_ktrace.h"
	+#include "opt_thrworkq.h"
	#include "opt_vm.h"

	#include <sys/param.h>
	@@ -64,6 +65,9 @@
	#include <sys/sysent.h>
	#include <sys/shm.h>
	#include <sys/sysctl.h>
	+#ifdef THRWORKQ
	+#include <sys/thrworkq.h>
	+#endif
	#include <sys/vnode.h>
	#include <sys/stat.h>
	#ifdef KTRACE
	@@ -211,6 +215,7 @@
	int error;

	error = pre_execve(td, &oldvmspace);
	+
	if (error != 0)
	return (error);
	error = exec_copyin_args(&args, uap->fname, UIO_USERSPACE,
	@@ -294,6 +299,10 @@
	error = ERESTART;
	PROC_UNLOCK(p);
	}
	+#ifdef THRWORKQ
	+ if (error == 0)
	+ thrworkq_exit(p);
	+#endif
	KASSERT(error != 0 \|\| (td->td_pflags & TDP_EXECVMSPC) == 0,
	("nested execve"));
	*oldvmspace = p->p_vmspace;
	Index: sys/kern/kern_exit.c
	===================================================================
	--- sys/kern/kern_exit.c
	+++ sys/kern/kern_exit.c
	@@ -39,6 +39,7 @@

	#include "opt_compat.h"
	#include "opt_ktrace.h"
	+#include "opt_thrworkq.h"

	#include <sys/param.h>
	#include <sys/systm.h>
	@@ -72,6 +73,9 @@
	#include <sys/shm.h>
	#include <sys/sem.h>
	#include <sys/umtx.h>
	+#ifdef THRWORKQ
	+#include <sys/thrworkq.h>
	+#endif
	#ifdef KTRACE
	#include <sys/ktrace.h>
	#endif
	@@ -204,6 +208,13 @@
	panic("Going nowhere without my init!");
	}

	+#ifdef THRWORKQ
	+ /*
	+ * Check if this process has a thread workqueue.
	+ */
	+ thrworkq_exit(p);
	+#endif
	+
	/*
	* Deref SU mp, since the thread does not return to userspace.
	*/
	Index: sys/kern/kern_mutex.c
	===================================================================
	--- sys/kern/kern_mutex.c
	+++ sys/kern/kern_mutex.c
	@@ -40,6 +40,7 @@
	#include "opt_ddb.h"
	#include "opt_hwpmc_hooks.h"
	#include "opt_sched.h"
	+#include "opt_thrworkq.h"

	#include <sys/param.h>
	#include <sys/systm.h>
	@@ -1032,6 +1033,10 @@
	blocked_lock.mtx_lock = 0xdeadc0de; /* Always blocked. */
	mtx_init(&proc0.p_mtx, "process lock", NULL, MTX_DEF \| MTX_DUPOK);
	mtx_init(&proc0.p_slock, "process slock", NULL, MTX_SPIN);
	+#ifdef THRWORKQ
	+ mtx_init(&proc0.p_twqlock, "thr workq lock", NULL, MTX_DEF \| MTX_DUPOK);
	+ proc0.p_twq = NULL;
	+#endif /* THRWORKQ */
	mtx_init(&proc0.p_statmtx, "pstatl", NULL, MTX_SPIN);
	mtx_init(&proc0.p_itimmtx, "pitiml", NULL, MTX_SPIN);
	mtx_init(&proc0.p_profmtx, "pprofl", NULL, MTX_SPIN);
	Index: sys/kern/kern_proc.c
	===================================================================
	--- sys/kern/kern_proc.c
	+++ sys/kern/kern_proc.c
	@@ -37,6 +37,7 @@
	#include "opt_ktrace.h"
	#include "opt_kstack_pages.h"
	#include "opt_stack.h"
	+#include "opt_thrworkq.h"

	#include <sys/param.h>
	#include <sys/systm.h>
	@@ -245,6 +246,10 @@
	cv_init(&p->p_pwait, "ppwait");
	cv_init(&p->p_dbgwait, "dbgwait");
	TAILQ_INIT(&p->p_threads); /* all threads in proc */
	+#ifdef THRWORKQ
	+ mtx_init(&p->p_twqlock, "thr workq lock", NULL, MTX_DEF \| MTX_DUPOK);
	+ p->p_twq = NULL;
	+#endif /* THRQORKQ */
	EVENTHANDLER_INVOKE(process_init, p);
	p->p_stats = pstats_alloc();
	p->p_pgrp = NULL;
	Index: sys/kern/kern_synch.c
	===================================================================
	--- sys/kern/kern_synch.c
	+++ sys/kern/kern_synch.c
	@@ -429,6 +429,11 @@
	SCHED_STAT_INC(sched_switch_stats[flags & SW_TYPE_MASK]);
	#endif
	/*
	+ * Do the context switch callback before blocking.
	+ */
	+ if (td->td_cswitchcb != NULL)
	+ (*td->td_cswitchcb)(SWCB_BLOCK, td);
	+ /*
	* Compute the amount of time during which the current
	* thread was running, and add that to its total so far.
	*/
	@@ -459,6 +464,11 @@
	CTR4(KTR_PROC, "mi_switch: new thread %ld (td_sched %p, pid %ld, %s)",
	td->td_tid, td_get_sched(td), td->td_proc->p_pid, td->td_name);

	+ /*
	+ * Do the context switch callback for unblocking.
	+ */
	+ if (td->td_cswitchcb != NULL)
	+ (*td->td_cswitchcb)(SWCB_UNBLOCK, td);
	/*
	* If the last thread was exiting, finish cleaning it up.
	*/
	Index: sys/kern/kern_thr.c
	===================================================================
	--- sys/kern/kern_thr.c
	+++ sys/kern/kern_thr.c
	@@ -29,9 +29,12 @@

	#include "opt_compat.h"
	#include "opt_posix.h"
	+#include "opt_thrworkq.h"
	+
	#include <sys/param.h>
	#include <sys/kernel.h>
	#include <sys/lock.h>
	+#include <sys/mman.h>
	#include <sys/mutex.h>
	#include <sys/priv.h>
	#include <sys/proc.h>
	@@ -52,6 +55,7 @@
	#include <sys/ucontext.h>
	#include <sys/thr.h>
	#include <sys/rtprio.h>
	+#include <sys/thrworkq.h>
	#include <sys/umtx.h>
	#include <sys/limits.h>

	@@ -59,8 +63,25 @@

	#include <machine/frame.h>

	+#include <vm/pmap.h>
	+#include <vm/vm.h>
	+#include <vm/vm_extern.h>
	+#include <vm/vm_map.h>
	+
	#include <security/audit/audit.h>

	+/*
	+ * Default stack guard size for thread. If set to zero then no
	+ * guard page.
	+ */
	+#define THR_GUARD_DEFAULT PAGE_SIZE
	+
	+/*
	+ * XXX - These should most likely be sysctl parameters.
	+ */
	+static vm_size_t thr_stack_default = THR_STACK_DEFAULT;
	+static vm_size_t thr_stack_initial = THR_STACK_INITIAL;
	+
	static SYSCTL_NODE(_kern, OID_AUTO, threads, CTLFLAG_RW, 0,
	"thread allocation");

	@@ -312,8 +333,15 @@

	umtx_thread_exit(td);

	- /* Signal userland that it can free the stack. */
	+#ifdef THRWORKQ
	+ if (td->td_reuse_stack != NULL) {
	+ thrworkq_reusestack(td->td_proc, td->td_reuse_stack);
	+ td->td_reuse_stack = NULL;
	+ }
	+#endif
	+
	if ((void *)uap->state != NULL) {
	+ /* Signal userland that it can free the stack. */
	suword_lwpid(uap->state, 1);
	kern_umtx_wake(td, uap->state, INT_MAX, 0);
	}
	@@ -596,6 +624,137 @@
	}

	int
	+sys_thr_stack(struct thread td, struct thr_stack_args uap)
	+{
	+ vm_size_t stacksz, guardsz;
	+ void *addr;
	+ int error;
	+
	+ /* Round up to the nearest page size. */
	+ stacksz = (vm_size_t)round_page(uap->stacksize);
	+ guardsz = (vm_size_t)round_page(uap->guardsize);
	+
	+ if (stacksz == 0)
	+ stacksz = thr_stack_default;
	+
	+ error = kern_thr_stack(td->td_proc, &addr, stacksz, guardsz);
	+
	+ td->td_retval[0] = (register_t) addr;
	+
	+ return (error);
	+}
	+
	+/*
	+ * kern_thr_stack() maps a new thread stack in the process. It returns
	+ * the stack address in the 'addr' arg.
	+ *
	+ * Base address of the last stack allocated (including its red zone, if
	+ * there is one). Stacks are allocated contiguously, starting beyond the
	+ * top of the main stack. When a new stack is created, a red zone is
	+ * typically created (actually, the red zone is mapped with PROT_NONE) above
	+ * the top of the stack, such that the stack will not be able to grow all
	+ * the way to the bottom of the next stack. This isn't fool-proof. It is
	+ * possible for a stack to grow by a large amount, such that it grows into
	+ * the next stack, and as long as the memory within the red zone is never
	+ * accessed, nothing will prevent one thread stack from trouncing all over
	+ * the next.
	+ *
	+ * low memory
	+ * . . . . . . . . . . . . . . . . . .
	+ * \| \|
	+ * \| stack 3 \| start of 3rd thread stack
	+ * +-----------------------------------+
	+ * \| \|
	+ * \| Red Zone (guard page) \| red zone for 2nd thread
	+ * \| \|
	+ * +-----------------------------------+
	+ * \| stack 2 - thr_stack_default \| top of 2nd thread stack
	+ * \| \|
	+ * \| \|
	+ * \| \|
	+ * \| \|
	+ * \| stack 2 \|
	+ * +-----------------------------------+ <-- start of 2nd thread stack
	+ * \| \|
	+ * \| Red Zone (guard page) \| red zone for 1st thread
	+ * \| \|
	+ * +-----------------------------------+
	+ * \| stack 1 - thr_stack_default \| top of 1st thread stack
	+ * \| \|
	+ * \| \|
	+ * \| \|
	+ * \| \|
	+ * \| stack 1 \|
	+ * +-----------------------------------+ <-- start of 1st thread stack
	+ * \| \| (initial value of p->p_thrstack)
	+ * \| Red Zone (guard page) \|
	+ * \| \| red zone for main thread
	+ * +-----------------------------------+
	+ * \| ->sv_usrstack - thr_stack_initial \| top of main thread stack
	+ * \| \| ^
	+ * \| \| \|
	+ * \| \| \|
	+ * \| \| \| stack growth
	+ * \| \|
	+ * +-----------------------------------+ <-- start of main thread stack
	+ * (p->p_sysent->sv_usrstack)
	+ * high memory
	+ *
	+ * XXX - This code assumes that the stack always grows down in address space.
	+ */
	+int
	+kern_thr_stack(struct proc p, void *addr, vm_size_t stacksz,
	+ vm_size_t guardsz)
	+{
	+ vm_offset_t stackaddr;
	+ vm_map_t map;
	+ int error;
	+
	+ KASSERT(stacksz != 0, ("[%s: %d] stacksz = 0", __FILE__, __LINE__));
	+
	+ *addr = NULL;
	+
	+ PROC_LOCK(p);
	+ if (p->p_thrstack == 0) {
	+ /* Compute the start of the first thread stack. */
	+ p->p_thrstack = p->p_sysent->sv_usrstack -
	+ (vm_offset_t)(thr_stack_initial + THR_GUARD_DEFAULT);
	+ }
	+
	+ stackaddr = p->p_thrstack - (vm_offset_t)(stacksz + guardsz);
	+
	+ /*
	+ * Compute the next stack location unconditionally. Under normal
	+ * operating conditions, the most likely reason for no being able
	+ * to map the thread stack is a stack overflow of the adjacent
	+ * thread stack.
	+ */
	+ p->p_thrstack -= (vm_offset_t)(stacksz + guardsz);
	+ PROC_UNLOCK(p);
	+
	+ map = &p->p_vmspace->vm_map;
	+ error = vm_mmap(map, &stackaddr, (stacksz + guardsz), VM_PROT_ALL,
	+ PROT_READ \| PROT_WRITE, MAP_STACK, OBJT_DEFAULT, NULL, 0);
	+ if (error)
	+ return (error);
	+
	+ if (guardsz != 0) {
	+ error = vm_map_protect(map, stackaddr, stackaddr + guardsz,
	+ PROT_NONE, 0);
	+ if (error) {
	+ /* unmap memory */
	+ (void) vm_map_remove(map, stackaddr, stackaddr +
	+ (stacksz + guardsz));
	+
	+ return (error);
	+ }
	+ }
	+
	+ addr = (void )(stackaddr + guardsz);
	+ return (0);
	+}
	+
	+int
	kern_thr_alloc(struct proc p, int pages, struct thread *ntd)
	{

	Index: sys/kern/kern_thread.c
	===================================================================
	--- sys/kern/kern_thread.c
	+++ sys/kern/kern_thread.c
	@@ -142,6 +142,9 @@

	td->td_tid = tid_alloc();

	+ td->td_cswitchcb = NULL;
	+ td->td_threadlist = NULL;
	+ td->td_reuse_stack = NULL;
	/*
	* Note that td_critnest begins life as 1 because the thread is not
	* running and is thereby implicitly waiting to be on the receiving
	@@ -769,6 +772,12 @@
	if (td2 == td)
	continue;
	thread_lock(td2);
	+ /* a workq thread may not actually be runnable */
	+ if (td2->td_state == TDS_INACTIVE && (td2->td_flags & TDF_WORKQ)) {
	+ thread_unlock(td2);
	+ thread_stopped(p);
	+ continue;
	+ }
	td2->td_flags \|= TDF_ASTPENDING \| TDF_NEEDSUSPCHK;
	if (TD_IS_INHIBITED(td2)) {
	wakeup_swapper \|= weed_inhib(mode, td2, p);
	Index: sys/kern/kern_thrworkq.c
	===================================================================
	--- /dev/null
	+++ sys/kern/kern_thrworkq.c
	@@ -0,0 +1,1863 @@
	+/*-
	+ * Copyright (c) 2009-2014, Stacey Son <sson@FreeBSD.org>
	+ * Coryright (c) 2000-2009, Apple, Inc.
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice unmodified, this list of conditions, and the following
	+ * disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
	+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
	+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
	+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
	+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
	+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
	+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	+ *
	+ * $FreeBSD$
	+ *
	+ */
	+
	+#include "opt_thrworkq.h"
	+
	+#ifdef THRWORKQ
	+
	+#include <sys/param.h>
	+#include <sys/systm.h>
	+#include <sys/kernel.h>
	+
	+#include <sys/condvar.h>
	+#include <sys/cpuset.h>
	+#include <sys/kthread.h>
	+#include <sys/lock.h>
	+#include <sys/malloc.h>
	+#include <sys/mutex.h>
	+#include <sys/proc.h>
	+#include <sys/queue.h>
	+#include <sys/sched.h>
	+#include <sys/smp.h>
	+#include <sys/syscall.h>
	+#include <sys/syscallsubr.h>
	+#include <sys/sysctl.h>
	+#include <sys/sysent.h>
	+#include <sys/syslog.h>
	+#include <sys/systm.h>
	+#include <sys/sysproto.h>
	+#include <sys/thr.h>
	+#include <sys/thrworkq.h>
	+#include <sys/time.h>
	+#include <sys/unistd.h>
	+
	+#include <machine/atomic.h>
	+
	+#if !(defined(__i386__) \|\| defined(__x86_64__) \|\| defined(__sparc64__) \|\| \
	+ defined(__sparc__) \|\| defined(__ia64__))
	+/*
	+ * XXX atomic.h for each arch that doesn't have atomic_*_64() should maybe
	+ * have something like the following.
	+ */
	+static struct mtx atomic_mtx;
	+MTX_SYSINIT(atomic, &atomic_mtx, "atomic_mtx", MTX_DEF);
	+
	+static __inline u_int32_t
	+atomic_cmpset_64(volatile u_int64_t *p, volatile u_int64_t cmpval,
	+ volatile u_int64_t newval)
	+{
	+ int ret;
	+
	+ mtx_lock(&atomic_mtx);
	+ if (*p == cmpval) {
	+ *p = newval;
	+ ret = 1;
	+ } else {
	+ ret = 0;
	+ }
	+ mtx_unlock(&atomic_mtx);
	+
	+ return (ret);
	+}
	+#endif
	+
	+struct threadlist {
	+ TAILQ_ENTRY(threadlist) th_entry;
	+ struct thread *th_thread;
	+ int th_flags;
	+ uint16_t th_affinity_tag;
	+ uint16_t th_priority;
	+ struct thrworkq *th_workq;
	+ stack_t th_stack;
	+};
	+
	+/*
	+ * threadlist flags.
	+ */
	+#define TH_LIST_INITED 0x01
	+#define TH_LIST_RUNNING 0x02
	+#define TH_LIST_BLOCKED 0x04
	+#define TH_LIST_UNSCHEDULED 0x08
	+#define TH_LIST_BUSY 0x10
	+#define TH_LIST_SBUSY 0x20
	+
	+struct workitem {
	+ TAILQ_ENTRY(workitem) wi_entry;
	+ void *wi_item;
	+ uint32_t wi_affinity;
	+};
	+
	+struct workitemlist {
	+ TAILQ_HEAD(, workitem) wl_itemlist;
	+ TAILQ_HEAD(, workitem) wl_freelist;
	+};
	+
	+struct thrworkq {
	+ void wq_workqfunc; / user workq function */
	+ void wq_newtdfunc; / funciton called on new td start up */
	+ void wq_exitfunc; / funciton called on td shutdown */
	+ char wq_ptlsbase; / parent TLS base */
	+ struct thread wq_pthread; / parent thread */
	+ size_t wq_stacksize; /* stack size of each worker thread */
	+ size_t wq_guardsize; /* stack guard size. Usually a page. */
	+ struct workitem wq_array[WORKQ_OS_ELEM_MAX * WORKQ_OS_NUMPRIOS];
	+ struct proc *wq_proc;
	+ struct proc wq_atimer_thread; / timer kernel thread */
	+ struct cv wq_atimer_cv; /* timer condition var */
	+ struct callout *wq_atimer_call;
	+ int wq_flags;
	+ int wq_itemcount;
	+ uint64_t wq_thread_yielded_timestamp;
	+ uint32_t wq_thread_yielded_count;
	+ uint32_t wq_timer_interval;
	+ uint32_t wq_affinity_max;
	+ uint32_t wq_threads_scheduled;
	+ uint32_t wq_nthreads; /* num of thread in workq */
	+ uint32_t wq_thidlecount; /* idle threads waiting */
	+ /* requested concurrency for each priority level */
	+ uint32_t wq_reqconc[WORKQ_OS_NUMPRIOS];
	+ /* priority based item list */
	+ struct workitemlist wq_list[WORKQ_OS_NUMPRIOS];
	+ uint32_t wq_list_bitmap;
	+ TAILQ_HEAD(, threadlist) wq_thrunlist; /* workq threads working. */
	+ TAILQ_HEAD(, threadlist) wq_thidlelist; /* workq threads idle. */
	+ void *wq_stacklist; / recycled stacks FIFO. */
	+ uint32_t wq_stacktop; /* top of stack list FIFO. */
	+ uint32_t wq_maxthreads; /* max num of threads for
	+ this workq. */
	+ uint32_t *wq_thactive_count[WORKQ_OS_NUMPRIOS];
	+ uint32_t *wq_thscheduled_count[WORKQ_OS_NUMPRIOS];
	+ uint64_t *wq_lastblocked_ts[WORKQ_OS_NUMPRIOS];
	+};
	+
	+/*
	+ * Workqueue flags (wq_flags).
	+ */
	+#define WQ_LIST_INITED 0x01
	+#define WQ_ATIMER_RUNNING 0x02
	+#define WQ_EXITING 0x04
	+
	+/*
	+ * Upcall types for twq_set_upcall().
	+ */
	+#define WQ_UPCALL_NEWTD 1
	+#define WQ_UPCALL_WORKQ 2
	+#define WQ_UPCALL_EXIT 3
	+
	+#define WORKQUEUE_LOCK(p) mtx_lock(&(p)->p_twqlock)
	+#define WORKQUEUE_UNLOCK(p) mtx_unlock(&(p)->p_twqlock)
	+#define WORKQUEUE_ASSERT_LOCKED(p) mtx_assert(&(p)->p_twqlock, MA_OWNED)
	+
	+#define WQ_TIMER_NEEDED(wq, start_timer) do { \
	+ int oldflags = wq->wq_flags; \
	+ \
	+ if ( !(oldflags & (WQ_EXITING \| WQ_ATIMER_RUNNING))) { \
	+ if (atomic_cmpset_32(&wq->wq_flags, oldflags, \
	+ oldflags \| WQ_ATIMER_RUNNING)) \
	+ start_timer = 1; \
	+ } \
	+} while (0)
	+
	+static MALLOC_DEFINE(M_THRWORKQ, "thr_workq", "Thread Workqueue");
	+
	+static int twq_additem(struct thrworkq wq, int prio, void item,
	+ int affinity);
	+static int twq_removeitem(struct thrworkq wq, int prio, void item);
	+static int twq_run_nextitem(struct proc p, struct thrworkq wq,
	+ struct thread td, void oc_item, int oc_prio,
	+ int oc_affinity);
	+static void twq_runitem(struct proc p, void item, struct thread *td,
	+ struct threadlist *tl, int wake_thread);
	+static int twq_unpark(struct thread *td, int timedout);
	+static int twq_addnewthread(struct thrworkq *wq);
	+static void twq_removethread(struct threadlist *tl);
	+static void twq_add_timer(void *arg);
	+static void twq_interval_timer_start(struct thrworkq *wq);
	+static void twq_callback(int type, struct thread *td);
	+static int twq_thr_busy(uint64_t cur_ts, uint64_t *lastblocked_tsp);
	+static int twq_init_workqueue(struct proc p, struct twq_param arg);
	+static int twq_timer_work(struct proc p, struct thrworkq wq,
	+ int *start_timer);
	+
	+/*
	+ * Thread work queue tunable paramaters defaults.
	+ */
	+#define WORKQUEUE_MAXTHREADS 512 /* Max num of threads / workQ */
	+#define WQ_YIELDED_THRESHOLD 2000 /* Max num of threads to yield
	+ in window */
	+#define WQ_YIELDED_WINDOW_USECS 30000 /* Yield window interval size */
	+#define WQ_STALLED_WINDOW_USECS 200 /* Useconds until thread is
	+ considered stalled */
	+#define WQ_REDUCE_POOL_WINDOW_USECS 5000000 /* Useconds until idle thread
	+ is removed */
	+#define WQ_MAX_TIMER_INTERVAL_USECS 50000 /* Useconds to wait to check for
	+ stalled or idle threads */
	+
	+/*
	+ * Thread work queue tunable parameters.
	+ */
	+static uint32_t wq_yielded_threshold = WQ_YIELDED_THRESHOLD;
	+static uint32_t wq_yielded_window_usecs = WQ_YIELDED_WINDOW_USECS;
	+static uint32_t wq_stalled_window_usecs = WQ_STALLED_WINDOW_USECS;
	+static uint32_t wq_reduce_pool_window_usecs = WQ_REDUCE_POOL_WINDOW_USECS;
	+static uint32_t wq_max_timer_interval_usecs = WQ_MAX_TIMER_INTERVAL_USECS;
	+static uint32_t wq_max_threads = WORKQUEUE_MAXTHREADS;
	+extern int max_threads_per_proc;
	+
	+SYSCTL_INT(_kern, OID_AUTO, wq_yielded_threshold, CTLFLAG_RW,
	+ &wq_yielded_threshold, 0, "Max number of threads to yield in window");
	+SYSCTL_INT(_kern, OID_AUTO, wq_yielded_window_usecs, CTLFLAG_RW,
	+ &wq_yielded_window_usecs, 0, "Size of yielded window in useconds");
	+SYSCTL_INT(_kern, OID_AUTO, wq_stalled_window_usecs, CTLFLAG_RW,
	+ &wq_stalled_window_usecs, 0, "Useconds until thread is stalled");
	+SYSCTL_INT(_kern, OID_AUTO, wq_reduce_pool_window_usecs, CTLFLAG_RW,
	+ &wq_reduce_pool_window_usecs, 0, "Useconds until idle thread is removed");
	+SYSCTL_INT(_kern, OID_AUTO, wq_max_timer_interval_usecs, CTLFLAG_RW,
	+ &wq_max_timer_interval_usecs, 0,
	+ "Useconds between stalled/idle thread checks");
	+SYSCTL_INT(_kern, OID_AUTO, wq_max_threads, CTLFLAG_RW,
	+ &wq_max_threads, 0, "Max num of threads per workq");
	+
	+/*
	+ * Set up callback from mi_switch().
	+ */
	+static void
	+twq_set_schedcallback(struct thread *td, mi_switchcb_t cswitchcb)
	+{
	+
	+ td->td_cswitchcb = cswitchcb;
	+}
	+
	+static void
	+twq_set_upcall(struct threadlist tl, int which, void item)
	+{
	+ struct thrworkq *wq = tl->th_workq;
	+ void *func;
	+
	+ /* XXX should thread sched lock be held?? */
	+
	+ KASSERT(wq != NULL, ("[%s: %d] twq_set_upcall: wq == NULL", __FILE__,
	+ __LINE__));
	+
	+ switch (which) {
	+
	+ case WQ_UPCALL_NEWTD:
	+ func = wq->wq_newtdfunc;
	+ break;
	+
	+ case WQ_UPCALL_WORKQ:
	+ func = wq->wq_workqfunc;
	+ break;
	+
	+ case WQ_UPCALL_EXIT:
	+ func = wq->wq_exitfunc;
	+ break;
	+
	+ default:
	+ panic("twq_set_upcall: unknown upcall type");
	+ }
	+
	+ cpu_set_upcall_kse(tl->th_thread, func, item, &tl->th_stack);
	+}
	+
	+static void
	+twq_schedthr(struct thread *newtd)
	+{
	+
	+ thread_lock(newtd);
	+ TD_SET_CAN_RUN(newtd);
	+ sched_add(newtd, SRQ_BORING);
	+ thread_unlock(newtd);
	+}
	+
	+
	+static uint64_t
	+twq_microuptime(void)
	+{
	+ struct timeval t;
	+
	+ microuptime(&t);
	+ return ((u_int64_t)t.tv_sec * 1000000 + (u_int64_t)t.tv_usec);
	+}
	+
	+static uint32_t
	+twq_usecstoticks(uint32_t usec)
	+{
	+ struct timeval tv;
	+ uint32_t tticks;
	+
	+ tv.tv_sec = usec / 1000000;
	+ tv.tv_usec = usec - (tv.tv_sec * 1000000);
	+ tticks = tvtohz(&tv);
	+
	+ return (tticks);
	+}
	+
	+static void
	+twq_interval_timer_start(struct thrworkq *wq)
	+{
	+ uint32_t deadline;
	+
	+ if (wq->wq_timer_interval == 0)
	+ wq->wq_timer_interval = wq_stalled_window_usecs;
	+ else {
	+
	+ wq->wq_timer_interval = wq->wq_timer_interval * 2;
	+ if (wq->wq_timer_interval > wq_max_timer_interval_usecs)
	+ wq->wq_timer_interval = wq_max_timer_interval_usecs;
	+ }
	+
	+ deadline = twq_usecstoticks(wq->wq_timer_interval);
	+ callout_reset_curcpu(wq->wq_atimer_call, deadline, twq_add_timer,
	+ wq);
	+}
	+
	+static int
	+twq_thr_busy(uint64_t cur_ts, uint64_t *lastblocked_tsp)
	+{
	+ uint64_t lastblocked_ts;
	+ uint64_t elapsed;
	+
	+ /*
	+ * The timestap is updated atomically w/o holding the workqueue
	+ * lock so we need to do an atomic read of the 64 bits so that
	+ * we don't see a mismatched pair of 32 bit reads. We accomplish
	+ * this in an architecturally independent fashion by using
	+ * atomic_cmpset_64 to write back the value we grabbed. If it
	+ * succeeds then we have a good timestamp to evalute. If it fails
	+ * we straddled grabbing the timestamp while it was being updated.
	+ * Treat a failed update as a busy thread since it implies we are
	+ * about to see a really fresh timestamp anyway.
	+ *
	+ */
	+ lastblocked_ts = *lastblocked_tsp;
	+
	+ if (!atomic_cmpset_64(lastblocked_tsp, lastblocked_ts, lastblocked_ts))
	+ return (1);
	+
	+ if (lastblocked_ts >= cur_ts) {
	+ /*
	+ * Because the update of the timestamp when a thread blocks
	+ * isn't serialized against us looking at it (i.e. we don't
	+ * hold the workq lock) it's possible to have a timestamp that
	+ * matches the current time or that even looks to be in the
	+ * future relative to when we grabbed the current time.
	+ * Just treat this as a busy thread since it must have just
	+ * blocked.
	+ */
	+ return (1);
	+ }
	+
	+ /* Timestamps are in usecs. */
	+ elapsed = cur_ts - lastblocked_ts;
	+
	+ if (elapsed < (uint64_t)wq_stalled_window_usecs)
	+ return (1);
	+
	+ return (0);
	+}
	+
	+static void
	+twq_add_timer(void *arg)
	+{
	+ struct thrworkq wq = (struct thrworkq )arg;
	+
	+
	+ cv_signal(&wq->wq_atimer_cv);
	+}
	+
	+static int
	+twq_timer_work(struct proc p, struct thrworkq wq, int *start_timer)
	+{
	+ int retval;
	+ int add_thread;
	+ uint32_t busycount;
	+ uint32_t priority;
	+ uint32_t affinity_tag;
	+ uint32_t i;
	+ uint64_t curtime;
	+
	+
	+ WORKQUEUE_ASSERT_LOCKED(p);
	+
	+ retval = 1;
	+ add_thread = 0;
	+
	+ /*
	+ * Check to see if the stall frequency was beyond our tolerance
	+ * or we have work on the queue, but haven't scheduled any new
	+ * work within our acceptable time interval because there were
	+ * no idle threads left to schedule.
	+ */
	+ if (wq->wq_itemcount) {
	+
	+ for (priority = 0; priority < WORKQ_OS_NUMPRIOS; priority++) {
	+ if (wq->wq_list_bitmap & (1 << priority))
	+ break;
	+ }
	+
	+ KASSERT(priority < WORKQ_OS_NUMPRIOS,
	+ ("[%s: %d] priority >= WORKQ_OS_NUMPRIOS", __FILE__,
	+ __LINE__));
	+
	+ curtime = twq_microuptime();
	+ busycount = 0;
	+
	+ for (affinity_tag = 0; affinity_tag < wq->wq_reqconc[priority];
	+ affinity_tag++) {
	+ /*
	+ * if we have no idle threads, we can try to
	+ * add them if needed.
	+ */
	+ if (wq->wq_thidlecount == 0)
	+ add_thread = 1;
	+
	+ /*
	+ * Look for first affinity group that is
	+ * currently not active. i.e. no active
	+ * threads at this priority level or higher
	+ * and has not been active recently at this
	+ * priority level or higher.
	+ */
	+ for (i = 0; i <= priority; i++) {
	+ if (wq->wq_thactive_count[i][affinity_tag]) {
	+ add_thread = 0;
	+ break;
	+ }
	+ if (wq->wq_thscheduled_count[i][affinity_tag]) {
	+ if (twq_thr_busy(curtime,
	+ &wq->wq_lastblocked_ts[i]
	+ [affinity_tag])) {
	+ add_thread = 0;
	+ busycount++;
	+ break;
	+ }
	+ }
	+ }
	+ if (add_thread) {
	+ retval = twq_addnewthread(wq);
	+ break;
	+ }
	+ }
	+ if (wq->wq_itemcount) {
	+ /*
	+ * As long as we have threads to schedule, and
	+ * we successfully scheduled new work, keep
	+ * trying.
	+ */
	+ while (wq->wq_thidlecount &&
	+ !(wq->wq_flags & WQ_EXITING)) {
	+ /*
	+ * twq_run_nextitem is
	+ * responsible for dropping the
	+ * workqueue lock in all cases.
	+ */
	+ retval = twq_run_nextitem(p, wq, NULL, 0, 0, 0);
	+ WORKQUEUE_LOCK(p);
	+
	+ if (retval == 0)
	+ break;
	+ }
	+ if ( !(wq->wq_flags & WQ_EXITING) && wq->wq_itemcount) {
	+ if (wq->wq_thidlecount == 0 && retval &&
	+ add_thread)
	+ return (1);
	+
	+ if (wq->wq_thidlecount == 0 \|\| busycount)
	+ WQ_TIMER_NEEDED(wq, *start_timer);
	+ }
	+ }
	+ }
	+
	+ return (0);
	+}
	+
	+
	+static void
	+twq_timer_kthread(void *arg)
	+{
	+ struct thrworkq wq = (struct thrworkq )arg;
	+ struct proc *p;
	+ int start_timer;
	+
	+ p = wq->wq_proc;
	+
	+ while (1) {
	+
	+ WORKQUEUE_LOCK(p);
	+
	+ cv_wait(&wq->wq_atimer_cv, &p->p_twqlock);
	+
	+ start_timer = 0;
	+
	+ /*
	+ * The workq lock will protect us from seeing WQ_EXITING change
	+ * state, but we still need to update this atomically in case
	+ * someone else tries to start the timer just as we're
	+ * releasing it.
	+ */
	+ while ( !(atomic_cmpset_32(&wq->wq_flags, wq->wq_flags,
	+ (wq->wq_flags & ~WQ_ATIMER_RUNNING))));
	+
	+ while (!(wq->wq_flags & WQ_EXITING) &&
	+ twq_timer_work(p, wq, &start_timer));
	+
	+ if ( !(wq->wq_flags & WQ_ATIMER_RUNNING))
	+ wq->wq_timer_interval = 0;
	+
	+ if (wq->wq_flags & WQ_EXITING)
	+ break;
	+
	+ WORKQUEUE_UNLOCK(p);
	+
	+ if (start_timer)
	+ twq_interval_timer_start(wq);
	+
	+ }
	+
	+ wq->wq_atimer_thread = NULL;
	+ WORKQUEUE_UNLOCK(p);
	+ kproc_exit(0);
	+}
	+
	+/*
	+ * thrworkq_thread_yielded is called when an user thread voluntary yields.
	+ */
	+void
	+thrworkq_thread_yielded(void)
	+{
	+ struct thrworkq *wq;
	+ struct proc *p = curproc;
	+
	+ if ((wq = p->p_twq) == NULL \|\| wq->wq_itemcount == 0)
	+ return;
	+
	+ WORKQUEUE_LOCK(p);
	+
	+ if (wq->wq_itemcount) {
	+ uint64_t curtime;
	+ uint64_t elapsed;
	+
	+ if (wq->wq_thread_yielded_count++ == 0)
	+ wq->wq_thread_yielded_timestamp = twq_microuptime();
	+
	+ if (wq->wq_thread_yielded_count < wq_yielded_threshold) {
	+ WORKQUEUE_UNLOCK(p);
	+ return;
	+ }
	+
	+ wq->wq_thread_yielded_count = 0;
	+
	+ curtime = twq_microuptime();
	+ elapsed = curtime - wq->wq_thread_yielded_timestamp;
	+
	+ if (elapsed < wq_yielded_window_usecs) {
	+
	+ /*
	+ * We have 'wq_yielded_threadhold' or more threads
	+ * yielding within a 'wq_yielded_window_usecs' period
	+ * of time. Let's see if we need to add a thread or
	+ * assign some work.
	+ */
	+
	+ if (wq->wq_thidlecount == 0) {
	+ (void) twq_addnewthread(wq);
	+ /*
	+ * 'twq_addnewthread' drops the workqueue
	+ * lock when creating the new thread and then
	+ * retakes it before returning. This window
	+ * allows other threads to process work on the
	+ * queue, so we need to recheck for available
	+ * work if none found, we just return. The
	+ * newly created thread will eventually get
	+ * used (if it hasn't already).
	+ */
	+ if (wq->wq_itemcount == 0) {
	+ WORKQUEUE_UNLOCK(p);
	+ return;
	+ }
	+ }
	+ if (wq->wq_thidlecount) {
	+ uint32_t priority;
	+ uint32_t affinity = -1;
	+ void *item;
	+ struct workitem *witem = NULL;
	+ struct workitemlist *wl = NULL;
	+ struct thread *td;
	+ struct threadlist *tl;
	+
	+ /*
	+ * We have an idle thread. Let's assign some
	+ * work.
	+ */
	+
	+ td = curthread;
	+ if ((tl = td->td_threadlist))
	+ affinity = tl->th_affinity_tag;
	+
	+ for (priority = 0;
	+ priority < WORKQ_OS_NUMPRIOS; priority++) {
	+ if (wq->wq_list_bitmap &
	+ (1 << priority)) {
	+
	+ wl = (struct workitemlist *)
	+ &wq->wq_list[priority];
	+ break;
	+ }
	+ }
	+ KASSERT(wl != NULL, ("[%s: %d] wl == NULL",
	+ __FILE__, __LINE__));
	+ KASSERT(!(TAILQ_EMPTY(&wl->wl_itemlist)),
	+ ("[%s: %d] wl_itemlist not empty",
	+ __FILE__, __LINE__));
	+
	+ witem = TAILQ_FIRST(&wl->wl_itemlist);
	+ TAILQ_REMOVE(&wl->wl_itemlist, witem, wi_entry);
	+
	+ if (TAILQ_EMPTY(&wl->wl_itemlist))
	+ wq->wq_list_bitmap &= ~(1 << priority);
	+ wq->wq_itemcount--;
	+
	+ item = witem->wi_item;
	+ witem->wi_item = (void *)0;
	+ witem->wi_affinity = 0;
	+
	+ TAILQ_INSERT_HEAD(&wl->wl_freelist, witem,
	+ wi_entry);
	+
	+ (void)twq_run_nextitem(p, wq,
	+ NULL, item, priority, affinity);
	+ /*
	+ * twq_run_nextitem is responsible for
	+ * dropping the workqueue lock in all cases.
	+ */
	+
	+ return;
	+ }
	+ }
	+ }
	+ WORKQUEUE_UNLOCK(p);
	+}
	+
	+/*
	+ * Callback for miswitch(). It is called before and after a context switch.
	+ */
	+static void
	+twq_callback(int type, struct thread *td)
	+{
	+ struct threadlist *tl;
	+ struct thrworkq *wq;
	+
	+ tl = td->td_threadlist;
	+ wq = tl->th_workq;
	+
	+ switch (type) {
	+
	+ case SWCB_BLOCK:
	+ {
	+ uint32_t old_activecount;
	+
	+ old_activecount = atomic_fetchadd_32(
	+ &wq->wq_thactive_count[tl->th_priority]
	+ [tl->th_affinity_tag], -1);
	+ if (old_activecount == 1) {
	+ int start_timer = 0;
	+ uint64_t curtime;
	+ uint64_t *lastblocked_ptr;
	+
	+ /*
	+ * We were the last active thread on this
	+ * affinity set and we've got work to do.
	+ */
	+ lastblocked_ptr =
	+ &wq->wq_lastblocked_ts[tl->th_priority]
	+ [tl->th_affinity_tag];
	+ curtime = twq_microuptime();
	+
	+ /*
	+ * If we collide with another thread trying
	+ * to update the last_blocked (really
	+ * unlikely since another thread would have to
	+ * get scheduled and then block after we start
	+ * down this path), it's not a problem. Either
	+ * timestamp is adequate, so no need to retry.
	+ */
	+ (void)atomic_cmpset_64(lastblocked_ptr,
	+ *lastblocked_ptr, curtime);
	+ if (wq->wq_itemcount)
	+ WQ_TIMER_NEEDED(wq, start_timer);
	+
	+ if (start_timer)
	+ twq_interval_timer_start(wq);
	+ }
	+ }
	+ break;
	+
	+ case SWCB_UNBLOCK:
	+ /*
	+ * We cannot take the workqueue_lock here. An UNBLOCK can occur
	+ * from a timer event whichis run from an interrupt context. If
	+ * the workqueue_lock is already held by this processor, we'll
	+ * deadlock. The thread lock for this thread being UNBLOCKED
	+ * is also held.
	+ */
	+ atomic_add_32(&wq->wq_thactive_count[tl->th_priority]
	+ [tl->th_affinity_tag], 1);
	+ break;
	+ }
	+}
	+
	+static void
	+twq_removethread(struct threadlist *tl)
	+{
	+ struct thrworkq *wq;
	+ struct thread *td;
	+
	+ wq = tl->th_workq;
	+
	+ TAILQ_REMOVE(&wq->wq_thidlelist, tl, th_entry);
	+
	+ wq->wq_nthreads--;
	+ wq->wq_thidlecount--;
	+
	+
	+ WORKQUEUE_UNLOCK(curproc);
	+
	+ td = tl->th_thread;
	+
	+ thread_lock(td);
	+ /*
	+ * Recycle this thread's stack. Done when the thread exits.
	+ */
	+ td->td_reuse_stack = tl->th_stack.ss_sp;
	+
	+ twq_set_schedcallback(td, NULL);
	+
	+ /*
	+ * Clear the threadlist pointer so blocked thread on wakeup for
	+ * termination will not access the thread list as it is going to
	+ * be freed.
	+ */
	+ td->td_threadlist = NULL;
	+
	+ /*
	+ * Set to call the exit upcall to clean up and exit.
	+ */
	+ twq_set_upcall(tl, WQ_UPCALL_EXIT, NULL);
	+
	+ thread_unlock(td);
	+
	+ free(tl, M_THRWORKQ);
	+}
	+
	+/*
	+ * twq_addnewthread() is called with the workqueue lock held.
	+ */
	+static int
	+twq_addnewthread(struct thrworkq *wq)
	+{
	+ int error;
	+ struct threadlist *tl;
	+ void *stackaddr = NULL;
	+ struct thread newtd, td;
	+ struct proc *p = wq->wq_proc;
	+ int try;
	+
	+ WORKQUEUE_ASSERT_LOCKED(p);
	+
	+ if (wq->wq_nthreads >= wq->wq_maxthreads
	+ /* \|\| wq->wq_nthreads >= (max_threads_per_proc - 20) */)
	+ return (0);
	+ wq->wq_nthreads++;
	+
	+ td = wq->wq_pthread;
	+
	+ /*
	+ * See if we have a stack we can reuse.
	+ */
	+ if (wq->wq_stacktop > 0) {
	+ wq->wq_stacktop--;
	+ stackaddr = wq->wq_stacklist[wq->wq_stacktop];
	+ KASSERT(stackaddr != NULL, ("[%s:%d] stackaddr = NULL",
	+ __FILE__, __LINE__));
	+ wq->wq_stacklist[wq->wq_stacktop] = NULL;
	+ }
	+ WORKQUEUE_UNLOCK(p);
	+
	+ /*
	+ * If needed, map a new thread stack and guard page.
	+ */
	+ if (stackaddr == NULL)
	+ for (try = 0; try < 3; try++) {
	+ error = kern_thr_stack(p, &stackaddr, wq->wq_stacksize,
	+ wq->wq_guardsize);
	+ if (error == 0)
	+ break;
	+ if (error != ENOMEM)
	+ goto failed;
	+ }
	+
	+ newtd = thread_alloc(0);
	+ if (newtd == NULL) {
	+ /* Save the stack address so we can reuse it. */
	+ thrworkq_reusestack(p, stackaddr);
	+ goto failed;
	+ }
	+
	+ bzero(&newtd->td_startzero,
	+ __rangeof(struct thread, td_startzero, td_endzero));
	+ bcopy(&td->td_startcopy, &newtd->td_startcopy,
	+ __rangeof(struct thread, td_startcopy, td_endcopy));
	+ newtd->td_proc = p;
	+ newtd->td_ucred = crhold(td->td_ucred);
	+
	+ cpu_set_upcall(newtd, td);
	+
	+ /*
	+ * Allocate thread list and init.
	+ */
	+ tl = (struct threadlist *) malloc(sizeof(struct threadlist),
	+ M_THRWORKQ, M_WAITOK \| M_ZERO);
	+
	+ tl->th_thread = newtd;
	+ tl->th_workq = wq;
	+
	+ tl->th_affinity_tag = -1;
	+ tl->th_priority = WORKQ_OS_NUMPRIOS;
	+
	+ tl->th_stack.ss_sp = stackaddr;
	+ tl->th_stack.ss_size = wq->wq_stacksize;
	+
	+ tl->th_flags = TH_LIST_INITED \| TH_LIST_UNSCHEDULED;
	+
	+ newtd->td_threadlist = (void *)tl;
	+
	+ PROC_LOCK(p);
	+ p->p_flag \|= P_HADTHREADS;
	+ newtd->td_sigmask = td->td_sigmask;
	+ thread_link(newtd, p);
	+ bcopy(p->p_comm, newtd->td_name, sizeof(newtd->td_name));
	+ thread_lock(td);
	+ sched_fork_thread(td, newtd);
	+ thread_unlock(td);
	+ /*
	+ * tell suspend handling code that if this thread is inactive
	+ * to simply skip it
	+ */
	+ newtd->td_flags \|= TDF_WORKQ;
	+ if (P_SHOULDSTOP(p))
	+ newtd->td_flags \|= TDF_ASTPENDING \| TDF_NEEDSUSPCHK;
	+ PROC_UNLOCK(p);
	+
	+ tidhash_add(newtd);
	+
	+ /*
	+ * We don't add the new thread to the scheduler yet until we find some
	+ * work for it to do.
	+ */
	+
	+ WORKQUEUE_LOCK(p);
	+ TAILQ_INSERT_TAIL(&wq->wq_thidlelist, tl, th_entry);
	+ wq->wq_thidlecount++;
	+
	+ return (1);
	+
	+failed:
	+ WORKQUEUE_LOCK(p);
	+ wq->wq_nthreads--;
	+
	+ return (0);
	+
	+}
	+
	+static int
	+twq_init_workqueue(struct proc p, struct twq_param arg)
	+{
	+ struct thrworkq *wq;
	+ uint32_t i, j;
	+ size_t wq_size;
	+ char ptr, nptr;
	+ struct workitem *witem;
	+ struct workitemlist *wl;
	+ struct callout *calloutp;
	+ int error;
	+ void *ssptr;
	+ uint32_t maxthreads;
	+
	+ /* 'smp_cpus' is the number of cpus running. */
	+ wq_size = sizeof(struct thrworkq) +
	+ (smp_cpus * WORKQ_OS_NUMPRIOS * sizeof(uint32_t)) +
	+ (smp_cpus * WORKQ_OS_NUMPRIOS * sizeof(uint32_t)) +
	+ (smp_cpus * WORKQ_OS_NUMPRIOS * sizeof(uint64_t)) +
	+ sizeof(uint64_t);
	+
	+ ptr = malloc(wq_size, M_THRWORKQ, M_WAITOK \| M_ZERO);
	+ maxthreads = wq_max_threads;
	+ ssptr = malloc(sizeof(void ) maxthreads, M_THRWORKQ,
	+ M_WAITOK \| M_ZERO);
	+ calloutp = (struct callout *)malloc(sizeof(struct callout), M_THRWORKQ,
	+ M_WAITOK \| M_ZERO);
	+
	+ WORKQUEUE_LOCK(p);
	+ if (p->p_twq != NULL) {
	+ WORKQUEUE_UNLOCK(p);
	+ free(ptr, M_THRWORKQ);
	+ free(ssptr, M_THRWORKQ);
	+ free(calloutp, M_THRWORKQ);
	+ return (EINVAL);
	+ }
	+
	+ /*
	+ * Initialize workqueue information.
	+ */
	+ wq = (struct thrworkq *)ptr;
	+ wq->wq_flags = WQ_LIST_INITED;
	+ wq->wq_proc = p;
	+ wq->wq_affinity_max = smp_cpus;
	+ wq->wq_workqfunc = arg->twq_workqfunc;
	+ wq->wq_newtdfunc = arg->twq_newtdfunc;
	+ wq->wq_exitfunc = arg->twq_exitfunc;
	+ if (arg->twq_stacksize == 0)
	+ wq->wq_stacksize = THR_STACK_DEFAULT;
	+ else
	+ wq->wq_stacksize = round_page(arg->twq_stacksize);
	+ wq->wq_guardsize = round_page(arg->twq_guardsize);
	+ wq->wq_pthread = curthread;
	+
	+ wq->wq_stacklist = ssptr;
	+ wq->wq_stacktop = 0;
	+ wq->wq_maxthreads = maxthreads;
	+
	+ for (i = 0; i < WORKQ_OS_NUMPRIOS; i++) {
	+ wl = (struct workitemlist *)&wq->wq_list[i];
	+ TAILQ_INIT(&wl->wl_itemlist);
	+ TAILQ_INIT(&wl->wl_freelist);
	+
	+ for (j = 0; j < WORKQ_OS_ELEM_MAX; j++) {
	+ witem = &wq->wq_array[(i * WORKQ_OS_ELEM_MAX) + j];
	+ TAILQ_INSERT_TAIL(&wl->wl_freelist, witem, wi_entry);
	+ }
	+ wq->wq_reqconc[i] = wq->wq_affinity_max;
	+ }
	+ nptr = ptr + sizeof(struct thrworkq);
	+
	+ for (i = 0; i < WORKQ_OS_NUMPRIOS; i++) {
	+ wq->wq_thactive_count[i] = (uint32_t *)nptr;
	+ nptr += (smp_cpus * sizeof(uint32_t));
	+ }
	+ for (i = 0; i < WORKQ_OS_NUMPRIOS; i++) {
	+ wq->wq_thscheduled_count[i] = (uint32_t *)nptr;
	+ nptr += (smp_cpus * sizeof(uint32_t));
	+ }
	+
	+ /*
	+ * Align nptr on a 64 bit boundary so we can do atomic
	+ * operations on the timestamps. (We allocated an extra
	+ * uint64_t space above so we have room for this adjustment.)
	+ */
	+ nptr += (sizeof(uint64_t) - 1);
	+ nptr = (char *)((long)nptr & ~(sizeof(uint64_t) - 1));
	+
	+ for (i = 0; i < WORKQ_OS_NUMPRIOS; i++) {
	+ wq->wq_lastblocked_ts[i] = (uint64_t *)nptr;
	+ nptr += (smp_cpus * sizeof(uint64_t));
	+ }
	+ TAILQ_INIT(&wq->wq_thrunlist);
	+ TAILQ_INIT(&wq->wq_thidlelist);
	+
	+ cv_init(&wq->wq_atimer_cv, "twq_atimer_cv");
	+ wq->wq_atimer_call = calloutp;
	+ callout_init(wq->wq_atimer_call, CALLOUT_MPSAFE);
	+
	+ PROC_LOCK(p);
	+ p->p_twq = wq;
	+ PROC_UNLOCK(p);
	+ WORKQUEUE_UNLOCK(p);
	+
	+ error = kproc_create(twq_timer_kthread, (void *)wq,
	+ &wq->wq_atimer_thread, RFHIGHPID, 0, "twq %d", p->p_pid);
	+ if (error)
	+ panic("twq_init_workqueue: kproc_create returned %d", error);
	+
	+ return (0);
	+}
	+
	+/*
	+ * thr_workq() system call.
	+ */
	+int
	+sys_thr_workq(struct thread td, struct thr_workq_args uap)
	+{
	+ struct twq_param arg;
	+ struct proc *p = td->td_proc;
	+ int cmd = uap->cmd;
	+ int prio, reqconc, affinity;
	+ int error = 0;
	+ void *oc_item = NULL;
	+ struct thrworkq *wq;
	+
	+ error = copyin(uap->args, &arg, sizeof(arg));
	+ if (error)
	+ return (error);
	+
	+ /*
	+ * Affinity is not used yet.
	+ */
	+ affinity = -1;
	+
	+ switch (cmd) {
	+
	+ case WQOPS_INIT:
	+ /*
	+ * Return the PID for the handle for now. If we decide to
	+ * support multiple workqueues per process then we will need
	+ * to do something different.
	+ */
	+ error = suword(arg.twq_retid, p->p_pid);
	+ if (error)
	+ return (error);
	+ return (twq_init_workqueue(p, &arg));
	+
	+ case WQOPS_QUEUE_ADD:
	+ WORKQUEUE_LOCK(p);
	+ if ((wq = p->p_twq) == NULL \|\| arg.twq_id != p->p_pid) {
	+ WORKQUEUE_UNLOCK(p);
	+ return (EINVAL);
	+ }
	+
	+ prio = arg.twq_add_prio;
	+ /* affinity = arg.twq_add_affin; XXX Not yet used. */
	+
	+ /*
	+ * Add item to workqueue. If the WORKQUEUE_OVERCOMMIT flag
	+ * is set we want to commit the item to a thread even if we
	+ * have to start a new one.
	+ */
	+ if (prio & WORKQUEUE_OVERCOMMIT) {
	+ prio &= ~WORKQUEUE_OVERCOMMIT;
	+ oc_item = arg.twq_add_item;
	+ }
	+ if ((prio < 0) \|\| (prio >= WORKQ_OS_NUMPRIOS)) {
	+ WORKQUEUE_UNLOCK(p);
	+ return (EINVAL);
	+ }
	+
	+ if (wq->wq_thidlecount == 0 &&
	+ (oc_item \|\| (wq->wq_nthreads < wq->wq_affinity_max))) {
	+ (void) twq_addnewthread(wq);
	+
	+ /*
	+ * If we can start a new thread then this work item
	+ * will have to wait on the queue.
	+ */
	+ if (wq->wq_thidlecount == 0)
	+ oc_item = NULL;
	+ }
	+ if (oc_item == NULL)
	+ error = twq_additem(wq, prio, arg.twq_add_item,
	+ affinity);
	+
	+ /* twq_run_nextitem() drops the workqueue lock. */
	+ (void)twq_run_nextitem(p, wq, NULL, oc_item, prio, affinity);
	+
	+ return (error);
	+
	+ case WQOPS_QUEUE_REMOVE:
	+ WORKQUEUE_LOCK(p);
	+ if ((wq = p->p_twq) == NULL \|\| arg.twq_id != p->p_pid) {
	+ WORKQUEUE_UNLOCK(p);
	+ return (EINVAL);
	+ }
	+
	+ prio = arg.twq_rm_prio;
	+ /* affinity = arg.twq_add_affin; Not yet used. */
	+
	+ /*
	+ * Remove item from workqueue.
	+ */
	+ if ((prio < 0) \|\| (prio >= WORKQ_OS_NUMPRIOS)) {
	+ WORKQUEUE_UNLOCK(p);
	+ return (EINVAL);
	+ }
	+
	+
	+ error = twq_removeitem(wq, prio, arg.twq_rm_item);
	+
	+ /*
	+ * twq_run_nextitem() drops the workqueue lock. See if
	+ * we can assign a work item to an idle thread.
	+ */
	+ (void)twq_run_nextitem(p, wq, NULL, NULL, prio, affinity);
	+
	+ return (error);
	+
	+ case WQOPS_THREAD_RETURN:
	+ WORKQUEUE_LOCK(p);
	+ if ((wq = p->p_twq) == NULL \|\| arg.twq_id != p->p_pid) {
	+ WORKQUEUE_UNLOCK(p);
	+ return (EINVAL);
	+ }
	+
	+ if (td->td_threadlist == NULL) {
	+ WORKQUEUE_UNLOCK(p);
	+ return (EINVAL);
	+ }
	+
	+ /*
	+ * twq_run_nextitem() drops the workqueue lock. Assign
	+ * any pending work to this (or other idle) thread.
	+ */
	+ (void)twq_run_nextitem(p, wq, td, NULL, 0, -1);
	+
	+ return (0);
	+
	+ case WQOPS_THREAD_SETCONC:
	+ WORKQUEUE_LOCK(p);
	+ if ((wq = p->p_twq) == NULL \|\| arg.twq_id != p->p_pid) {
	+ WORKQUEUE_UNLOCK(p);
	+ return (EINVAL);
	+ }
	+
	+ prio = arg.twq_setconc_prio;
	+ if ((prio < 0) \|\| (prio > WORKQ_OS_NUMPRIOS)) {
	+ WORKQUEUE_UNLOCK(p);
	+ return (EINVAL);
	+ }
	+
	+ reqconc = arg.twq_setconc_conc;
	+
	+
	+ if (prio < WORKQ_OS_NUMPRIOS)
	+ wq->wq_reqconc[prio] = reqconc;
	+ else {
	+ for (prio = 0; prio < WORKQ_OS_NUMPRIOS; prio++)
	+ wq->wq_reqconc[prio] = reqconc;
	+ }
	+
	+ /*
	+ * twq_run_nextitem() drops the workqueue lock. See if
	+ * we can assign a work item to an idle thread.
	+ */
	+ (void)twq_run_nextitem(p, wq, NULL, NULL, 0, -1);
	+
	+ return (0);
	+
	+ default:
	+ return (EINVAL);
	+ }
	+}
	+
	+/*
	+ *
	+ */
	+void
	+thrworkq_reusestack(struct proc p, void stackaddr)
	+{
	+ struct thrworkq *wq;
	+
	+ WORKQUEUE_LOCK(p);
	+ /* Recycle its stack. */
	+ if ((wq = p->p_twq) != NULL)
	+ wq->wq_stacklist[wq->wq_stacktop++] = stackaddr;
	+ WORKQUEUE_UNLOCK(p);
	+}
	+
	+/*
	+ * thrworkq_exit is called when a process is about to exit (or has exec'ed).
	+ */
	+void
	+thrworkq_exit(struct proc *p)
	+{
	+ struct thrworkq *wq;
	+ struct threadlist tl, tlist;
	+ struct thread *td;
	+
	+ KASSERT(p != 0, ("[%s: %d] thrworkq_exit: p = NULL",
	+ __FILE__, __LINE__));
	+
	+ WORKQUEUE_LOCK(p);
	+ if ((wq = p->p_twq) == NULL) {
	+ WORKQUEUE_UNLOCK(p);
	+ return;
	+ }
	+ p->p_twq = NULL;
	+
	+ /*
	+ * We now arm the timer in the callback function w/o
	+ * holding the workq lock. WQ_ATIMER_RUNNING via
	+ * atomic_cmpset in order to insure only a single timer is
	+ * running and to notice that WQ_EXITING has been set been
	+ * set (we don't want to start a timer once WQ_EXITING is
	+ * posted).
	+ *
	+ * So once we have successfully set WQ_EXITING, we cannot fire
	+ * up a new timer. Therefore no need to clear the timer state
	+ * atomically from the flags.
	+ *
	+ * Since we always hold the workq_lock when dropping
	+ * WQ_ATIMER_RUNNING the check for and sleep until clear is
	+ * protected.
	+ */
	+
	+ while ( !(atomic_cmpset_32(&wq->wq_flags, wq->wq_flags,
	+ (wq->wq_flags \| WQ_EXITING))));
	+
	+ if (wq->wq_flags & WQ_ATIMER_RUNNING) {
	+ if (callout_stop(wq->wq_atimer_call) != 0)
	+ wq->wq_flags &= ~WQ_ATIMER_RUNNING;
	+ }
	+
	+ /* Wait for timer thread to die. */
	+ if (wq->wq_atimer_thread) {
	+ cv_signal(&wq->wq_atimer_cv);
	+ if (msleep(wq->wq_atimer_thread, &p->p_twqlock, PWAIT,
	+ "twq_atimer", 60 * hz))
	+ printf("thr workq timer thread didn't die.");
	+ else
	+ cv_destroy(&wq->wq_atimer_cv);
	+ }
	+ WORKQUEUE_UNLOCK(p);
	+
	+ TAILQ_FOREACH_SAFE(tl, &wq->wq_thrunlist, th_entry, tlist) {
	+
	+ td = tl->th_thread;
	+
	+ thread_lock(td);
	+ twq_set_schedcallback(td, NULL);
	+ td->td_threadlist = NULL;
	+ thread_unlock(td);
	+
	+ TAILQ_REMOVE(&wq->wq_thrunlist, tl, th_entry);
	+
	+ free(tl, M_THRWORKQ);
	+ }
	+ TAILQ_FOREACH_SAFE(tl, &wq->wq_thidlelist, th_entry, tlist) {
	+
	+ td = tl->th_thread;
	+
	+ thread_lock(td);
	+ twq_set_schedcallback(td, NULL);
	+ td->td_threadlist = NULL;
	+ twq_set_upcall(tl, WQ_UPCALL_EXIT, NULL);
	+ thread_unlock(td);
	+
	+ if (tl->th_flags & TH_LIST_UNSCHEDULED) {
	+ /*
	+ * Schedule unscheduled the thread so it can exit.
	+ */
	+ tl->th_flags &= ~TH_LIST_UNSCHEDULED;
	+ twq_schedthr(td);
	+ }
	+
	+ TAILQ_REMOVE(&wq->wq_thrunlist, tl, th_entry);
	+
	+ free(tl, M_THRWORKQ);
	+ }
	+
	+ callout_drain(wq->wq_atimer_call);
	+ free(wq->wq_atimer_call, M_THRWORKQ);
	+
	+ free(wq->wq_stacklist, M_THRWORKQ);
	+
	+ free(wq, M_THRWORKQ);
	+}
	+
	+/*
	+ * Add item to workqueue. Workqueue lock must be held.
	+ */
	+static int
	+twq_additem(struct thrworkq wq, int prio, void item, int affinity)
	+{
	+ struct workitem *witem;
	+ struct workitemlist *wl;
	+
	+ WORKQUEUE_ASSERT_LOCKED(wq->wq_proc);
	+
	+ wl = (struct workitemlist *)&wq->wq_list[prio];
	+
	+ if (TAILQ_EMPTY(&wl->wl_freelist))
	+ return (ENOMEM);
	+
	+ witem = (struct workitem *)TAILQ_FIRST(&wl->wl_freelist);
	+ TAILQ_REMOVE(&wl->wl_freelist, witem, wi_entry);
	+
	+ witem->wi_item = item;
	+ witem->wi_affinity = affinity;
	+ TAILQ_INSERT_TAIL(&wl->wl_itemlist, witem, wi_entry);
	+
	+ wq->wq_list_bitmap \|= (1 << prio);
	+
	+ wq->wq_itemcount++;
	+
	+ return (0);
	+}
	+
	+/*
	+ * Remove item from workqueue. Workqueue lock must be held.
	+ */
	+static int
	+twq_removeitem(struct thrworkq wq, int prio, void item)
	+{
	+ struct workitem *witem;
	+ struct workitemlist *wl;
	+ int error = ESRCH;
	+
	+ WORKQUEUE_ASSERT_LOCKED(wq->wq_proc);
	+
	+ wl = (struct workitemlist *)&wq->wq_list[prio];
	+
	+ TAILQ_FOREACH(witem, &wl->wl_itemlist, wi_entry) {
	+ if (witem->wi_item == item) {
	+ TAILQ_REMOVE(&wl->wl_itemlist, witem, wi_entry);
	+
	+ if (TAILQ_EMPTY(&wl->wl_itemlist))
	+ wq->wq_list_bitmap &= ~(1 << prio);
	+ wq->wq_itemcount--;
	+
	+ witem->wi_item = NULL;
	+ witem->wi_affinity = 0;
	+ TAILQ_INSERT_HEAD(&wl->wl_freelist, witem, wi_entry);
	+
	+ error = 0;
	+ break;
	+ }
	+ }
	+ return (error);
	+}
	+
	+/*
	+ * twq_run_nextitem is called with the workqueue lock held and
	+ * must drop it in all cases.
	+ */
	+static int
	+twq_run_nextitem(struct proc p, struct thrworkq wq,
	+ struct thread thread, void oc_item, int oc_prio, int oc_affinity)
	+{
	+ struct workitem *witem = NULL;
	+ void *item = 0;
	+ struct thread *th_to_run = NULL;
	+ struct thread *th_to_park = NULL;
	+ int wake_thread = 0;
	+ uint32_t priority, orig_priority;
	+ uint32_t affinity_tag, orig_affinity_tag;
	+ uint32_t i;
	+ uint32_t activecount, busycount;
	+ uint32_t us_to_wait;
	+ int start_timer = 0;
	+ int adjust_counters = 1;
	+ uint64_t curtime;
	+ int error;
	+ struct threadlist *tl = NULL;
	+ struct threadlist *ttl = NULL;
	+ struct workitemlist *wl = NULL;
	+
	+ WORKQUEUE_ASSERT_LOCKED(p);
	+ /*
	+ * From here until we drop the workq lock we can't be pre-empted.
	+ * This is important since we have to independently update the priority
	+ * and affinity that the thread is associated with and these values are
	+ * used to index the multi-dimensional counter arrays in
	+ * 'twq_callback'.
	+ */
	+ if (oc_item) {
	+ uint32_t min_scheduled = 0;
	+ uint32_t scheduled_count;
	+ uint32_t active_count;
	+ uint32_t t_affinity = 0;
	+
	+ priority = oc_prio;
	+ item = oc_item;
	+
	+ if ((affinity_tag = oc_affinity) == (uint32_t)-1) {
	+ /*
	+ * CPU affinity is not assigned yet.
	+ */
	+ for (affinity_tag = 0;
	+ affinity_tag < wq->wq_reqconc[priority];
	+ affinity_tag++) {
	+ /*
	+ * Look for the affinity group with the
	+ * least number of threads.
	+ */
	+ scheduled_count = 0;
	+ active_count = 0;
	+
	+ for (i = 0; i <= priority; i++) {
	+ scheduled_count +=
	+ wq->wq_thscheduled_count[i][affinity_tag];
	+ active_count +=
	+ wq->wq_thactive_count[i][affinity_tag];
	+ }
	+ if (active_count == 0) {
	+ t_affinity = affinity_tag;
	+ break;
	+ }
	+ if (affinity_tag == 0 \|\|
	+ scheduled_count < min_scheduled) {
	+ min_scheduled = scheduled_count;
	+ t_affinity = affinity_tag;
	+ }
	+ }
	+ affinity_tag = t_affinity;
	+ }
	+ goto grab_idle_thread;
	+ }
	+ if (wq->wq_itemcount == 0) {
	+ if ((th_to_park = thread) == NULL)
	+ goto out_of_work;
	+ goto parkit;
	+ }
	+ for (priority = 0; priority < WORKQ_OS_NUMPRIOS; priority++) {
	+ if (wq->wq_list_bitmap & (1 << priority)) {
	+ wl = (struct workitemlist *)&wq->wq_list[priority];
	+ break;
	+ }
	+ }
	+ KASSERT(wl != NULL, ("[%s:%d] workq list is NULL", __FILE__, __LINE__));
	+ KASSERT(!(TAILQ_EMPTY(&wl->wl_itemlist)),
	+ ("[%s:%d] workq list is empty", __FILE__, __LINE__));
	+
	+ curtime = twq_microuptime();
	+
	+ if (thread != NULL) {
	+ tl = thread->td_threadlist;
	+ KASSERT(tl != NULL, ("[%s:%d] tl = NULL", __FILE__, __LINE__));
	+ affinity_tag = tl->th_affinity_tag;
	+
	+ /*
	+ * Check to see if the affinity group this thread is
	+ * associated with is still within the bounds of the
	+ * specified concurrency for the priority level we're
	+ * considering running work for.
	+ */
	+ if (affinity_tag < wq->wq_reqconc[priority]) {
	+ /*
	+ * We're a worker thread from the pool. Currently
	+ * we are considered 'active' which means we're counted
	+ * in 'wq_thactive_count'. Add up the active counts
	+ * of all the priority levels up to and including
	+ * the one we want to schedule.
	+ */
	+ for (activecount = 0, i = 0; i <= priority; i++) {
	+ uint32_t acount;
	+
	+ acount =
	+ wq->wq_thactive_count[i][affinity_tag];
	+ if (acount == 0 &&
	+ wq->wq_thscheduled_count[i][affinity_tag]){
	+ if (twq_thr_busy(curtime,
	+ &wq->wq_lastblocked_ts[i]
	+ [affinity_tag]))
	+ acount = 1;
	+ }
	+ activecount += acount;
	+ }
	+ if (activecount == 1) {
	+ /*
	+ * We're the only active thread associated
	+ * with our affinity group at this priority
	+ * level and higher so pick up some work and
	+ * keep going.
	+ */
	+ th_to_run = thread;
	+ goto pick_up_work;
	+ }
	+ }
	+
	+ /*
	+ * There's more than one thread running in this affinity group
	+ * or the concurrency level has been cut back for this priority.
	+ * Let's continue on and look for an 'empty' group to run this
	+ * work item in.
	+ */
	+ }
	+ busycount = 0;
	+
	+ for (affinity_tag = 0; affinity_tag < wq->wq_reqconc[priority];
	+ affinity_tag++) {
	+ /*
	+ * Look for first affinity group that is currently not active
	+ * (i.e. no active threads at this priority level of higher
	+ * and no threads that have run recently).
	+ */
	+ for (activecount = 0, i = 0; i <= priority; i++) {
	+ if ((activecount =
	+ wq->wq_thactive_count[i][affinity_tag]) != 0)
	+ break;
	+
	+ if (wq->wq_thscheduled_count[i][affinity_tag] != 0) {
	+ if (twq_thr_busy(curtime,
	+ &wq->wq_lastblocked_ts[i][affinity_tag])) {
	+ busycount++;
	+ break;
	+ }
	+ }
	+ }
	+ if (activecount == 0 && busycount == 0)
	+ break;
	+ }
	+ if (affinity_tag >= wq->wq_reqconc[priority]) {
	+ /*
	+ * We've already got at least 1 thread per affinity group in
	+ * the active state.
	+ */
	+ if (busycount) {
	+ /*
	+ * We found at least 1 thread in the 'busy' state.
	+ * Make sure we start the timer because if they are the
	+ * threads keeping us from scheduling this workitem then
	+ * we won't get a callback to kick off the timer. We
	+ * need to start i now.
	+ */
	+ WQ_TIMER_NEEDED(wq, start_timer);
	+ }
	+
	+ if (thread != NULL) {
	+ /*
	+ * Go park this one for later.
	+ */
	+ th_to_park = thread;
	+ goto parkit;
	+ }
	+ goto out_of_work;
	+ }
	+ if (thread != NULL) {
	+ /*
	+ * We're overbooked on the affinity group this thread is
	+ * currently associated with but we have work to do and
	+ * at least 1 idle processor. Therefore, we we'll just
	+ * retarget this thread to the new affinity group.
	+ */
	+ th_to_run = thread;
	+ goto pick_up_work;
	+ }
	+ if (wq->wq_thidlecount == 0) {
	+ /*
	+ * We don't have a thread to schedule but we have work to
	+ * do and at least 1 affinity group doesn't currently have
	+ * an active thread.
	+ */
	+ WQ_TIMER_NEEDED(wq, start_timer);
	+ goto no_thread_to_run;
	+ }
	+
	+grab_idle_thread:
	+ /*
	+ * We've got a candidate (affinity group with no currently active
	+ * threads) to start a new thread on. We already know there is both
	+ * work available and an idle thread, so activate a thread and then
	+ * fall into the code that pulls a new workitem (pick_up_work).
	+ */
	+ TAILQ_FOREACH(ttl, &wq->wq_thidlelist, th_entry) {
	+ if (ttl->th_affinity_tag == affinity_tag \|\|
	+ ttl->th_affinity_tag == (uint16_t)-1) {
	+ TAILQ_REMOVE(&wq->wq_thidlelist, ttl, th_entry);
	+ tl = ttl;
	+
	+ break;
	+ }
	+ }
	+ if (tl == NULL) {
	+ tl = TAILQ_FIRST(&wq->wq_thidlelist);
	+ TAILQ_REMOVE(&wq->wq_thidlelist, tl, th_entry);
	+ }
	+ wq->wq_thidlecount--;
	+
	+ TAILQ_INSERT_TAIL(&wq->wq_thrunlist, tl, th_entry);
	+
	+ if ((tl->th_flags & TH_LIST_UNSCHEDULED) == TH_LIST_UNSCHEDULED) {
	+ tl->th_flags &= ~TH_LIST_UNSCHEDULED;
	+ tl->th_flags \|= TH_LIST_SBUSY;
	+
	+ thread_lock(tl->th_thread);
	+ twq_set_schedcallback(tl->th_thread, twq_callback);
	+ thread_unlock(tl->th_thread);
	+
	+ } else if ((tl->th_flags & TH_LIST_BLOCKED) == TH_LIST_BLOCKED) {
	+ tl->th_flags &= ~TH_LIST_BLOCKED;
	+ tl->th_flags \|= TH_LIST_BUSY;
	+ wake_thread = 1;
	+ }
	+ tl->th_flags \|= TH_LIST_RUNNING;
	+
	+ wq->wq_threads_scheduled++;
	+ wq->wq_thscheduled_count[priority][affinity_tag]++;
	+ atomic_add_32(&wq->wq_thactive_count[priority][affinity_tag], 1);
	+
	+ adjust_counters = 0;
	+ th_to_run = tl->th_thread;
	+
	+pick_up_work:
	+ if (item == 0) {
	+ witem = TAILQ_FIRST(&wl->wl_itemlist);
	+ TAILQ_REMOVE(&wl->wl_itemlist, witem, wi_entry);
	+
	+ if (TAILQ_EMPTY(&wl->wl_itemlist))
	+ wq->wq_list_bitmap &= ~(1 << priority);
	+ wq->wq_itemcount--;
	+
	+ item = witem->wi_item;
	+ witem->wi_item = (void *)0;
	+ witem->wi_affinity = 0;
	+ TAILQ_INSERT_HEAD(&wl->wl_freelist, witem, wi_entry);
	+ }
	+ orig_priority = tl->th_priority;
	+ orig_affinity_tag = tl->th_affinity_tag;
	+
	+ tl->th_priority = priority;
	+ tl->th_affinity_tag = affinity_tag;
	+
	+ if (adjust_counters &&
	+ (orig_priority != priority \|\| orig_affinity_tag != affinity_tag)) {
	+ /*
	+ * We need to adjust these counters based on this thread's
	+ * new disposition w/r to affinity and priority.
	+ */
	+ atomic_add_int(&wq->wq_thactive_count[orig_priority]
	+ [orig_affinity_tag], -1);
	+ atomic_add_int(&wq->wq_thactive_count[priority][affinity_tag],
	+ 1);
	+ wq->wq_thscheduled_count[orig_priority][orig_affinity_tag]--;
	+ wq->wq_thscheduled_count[priority][affinity_tag]++;
	+ }
	+ wq->wq_thread_yielded_count = 0;
	+
	+ WORKQUEUE_UNLOCK(p);
	+
	+ if (orig_affinity_tag != affinity_tag) {
	+ /*
	+ * This thread's affinity does not match the affinity group
	+ * it's being placed on (it's either a brand new thread or
	+ * we're retargeting an existing thread to a new group).
	+ * An affinity tag of 0 means no affinity but we want our
	+ * tags to be 0 based because they are used to index arrays
	+ * so keep it 0 based on internally and bump by 1 when
	+ * calling out to set it.
	+ */
	+
	+ /* XXX - Not used yet. */
	+#if 0
	+ CPU_ZERO(&mask);
	+ CPU_SET(affinity_tag, &mask);
	+ cpuset_setthread(th_to_run->td_tid, &mask);
	+#endif
	+ ;
	+ }
	+ if (orig_priority != priority) {
	+ /*
	+ * XXX Set thread priority.
	+ *
	+ * Can't simply just set thread priority here since:
	+ *
	+ * (1) The thread priority of TIMESHARE priority class is
	+ * adjusted by the scheduler and there doesn't seem to be
	+ * a per-thread 'nice' parameter.
	+ *
	+ * (2) We really shouldn't use the REALTIME class since
	+ * thread workqueues doesn't require the process to have
	+ * privilege.
	+ *
	+ * (3) Could maybe use IDLE priority class for
	+ * WORKQ_LOW_PRIOQUUE.
	+ *
	+ * Need to figure out something here.
	+ */
	+ ;
	+ }
	+ twq_runitem(p, item, th_to_run, tl, wake_thread);
	+
	+ return (1);
	+
	+out_of_work:
	+ /*
	+ * We have no work to do or we are fully booked w/r to running threads.
	+ */
	+no_thread_to_run:
	+ WORKQUEUE_UNLOCK(p);
	+
	+ if (start_timer)
	+ twq_interval_timer_start(wq);
	+
	+ return (0);
	+
	+parkit:
	+ /*
	+ * This is a workqueue thread with no more work to do.
	+ * Park it for now.
	+ */
	+
	+ KASSERT(th_to_park == curthread,
	+ ("[%s, %d] twq_run_nextitem: th_to_park is not current thread",
	+ __FILE__, __LINE__));
	+
	+ tl = th_to_park->td_threadlist;
	+ if (tl == 0)
	+ panic("wq thread with no threadlist ");
	+
	+ TAILQ_REMOVE(&wq->wq_thrunlist, tl, th_entry);
	+ tl->th_flags &= ~TH_LIST_RUNNING;
	+
	+ tl->th_flags \|= TH_LIST_BLOCKED;
	+ TAILQ_INSERT_HEAD(&wq->wq_thidlelist, tl, th_entry);
	+
	+ thread_lock(th_to_park);
	+ twq_set_schedcallback(th_to_park, NULL);
	+ thread_unlock(th_to_park);
	+
	+ atomic_add_32(&wq->wq_thactive_count[tl->th_priority]
	+ [tl->th_affinity_tag], -1);
	+ wq->wq_thscheduled_count[tl->th_priority][tl->th_affinity_tag]--;
	+ wq->wq_threads_scheduled--;
	+
	+ if (wq->wq_thidlecount < 100)
	+ us_to_wait = wq_reduce_pool_window_usecs -
	+ (wq->wq_thidlecount * (wq_reduce_pool_window_usecs / 100));
	+ else
	+ us_to_wait = wq_reduce_pool_window_usecs / 100;
	+
	+ wq->wq_thidlecount++;
	+
	+ if (start_timer)
	+ twq_interval_timer_start(wq);
	+
	+ /*
	+ * XXX I may be imaging things but it seems that only one
	+ * thread will get unparked when a bunch are parked. Need
	+ * to look at this closer.
	+ */
	+sleep_again:
	+ error = msleep(tl, &p->p_twqlock, PCATCH, "twq_thpark",
	+ twq_usecstoticks(us_to_wait));
	+ if (error == 0 \|\| error == EWOULDBLOCK) {
	+ if (twq_unpark(th_to_park, error == EWOULDBLOCK) != 0)
	+ goto sleep_again;
	+ } else
	+ WORKQUEUE_UNLOCK(p);
	+
	+ return (0); /* returning to system call handler */
	+}
	+
	+static int
	+twq_unpark(struct thread *td, int timedout)
	+{
	+ struct threadlist *tl;
	+ struct proc *p = curproc;
	+ struct thrworkq *wq = p->p_twq;
	+
	+ KASSERT(td == curthread, ("[%s: %d] twq_unpark: td != curthread",
	+ __FILE__, __LINE__));
	+ WORKQUEUE_ASSERT_LOCKED(p);
	+
	+ if (wq == NULL \|\| (tl = td->td_threadlist) == NULL) {
	+ WORKQUEUE_UNLOCK(p);
	+ return (0);
	+ }
	+
	+ if (timedout) {
	+ if ( !(tl->th_flags & TH_LIST_RUNNING)) {
	+ /*
	+ * The timer popped us out and we've not been
	+ * moved off of the idle list so we should now
	+ * self-destruct.
	+ *
	+ * twq_removethread() consumes the workq lock.
	+ */
	+ twq_removethread(tl);
	+ return (0);
	+ }
	+
	+ while (tl->th_flags & TH_LIST_BUSY) {
	+
	+ /*
	+ * The timer woke us up, but we have already started to
	+ * make this a runnable thread, but have not yet
	+ * finished that process so wait for the normal wakeup.
	+ * Set the timer again in case we miss the wakeup in
	+ * a race condition.
	+ */
	+ /* Keep the workq lock held. */
	+ return (1);
	+ }
	+ }
	+
	+ KASSERT(((tl->th_flags & (TH_LIST_RUNNING \| TH_LIST_BUSY)) ==
	+ TH_LIST_RUNNING), ("[%s: %d] twq_unpark: !TH_LIST_RUNNING",
	+ __FILE__, __LINE__));
	+
	+ /*
	+ * A normal wakeup of this thread occurred.
	+ * No need for any synchronization with the
	+ * timer and twq_runitem
	+ */
	+ thread_lock(td);
	+ twq_set_schedcallback(td, twq_callback);
	+ thread_unlock(td);
	+
	+ WORKQUEUE_UNLOCK(p);
	+ return (0);
	+}
	+
	+static void
	+twq_runitem(struct proc p, void item, struct thread *td,
	+ struct threadlist *tl, int wake_thread)
	+{
	+
	+ KASSERT(p->p_twq != NULL, ("[%s: %d] twq_runitem: wq = NULL",
	+ __FILE__, __LINE__));
	+
	+ if (wake_thread) {
	+ twq_set_upcall(tl, WQ_UPCALL_WORKQ, item);
	+ WORKQUEUE_LOCK(p);
	+ tl->th_flags &= ~TH_LIST_BUSY;
	+ wakeup(tl);
	+ WORKQUEUE_UNLOCK(p);
	+ } else {
	+ twq_set_upcall(tl, WQ_UPCALL_NEWTD, item);
	+ WORKQUEUE_LOCK(p);
	+ if (tl->th_flags & TH_LIST_SBUSY) {
	+ tl->th_flags &= ~TH_LIST_SBUSY;
	+ twq_schedthr(td);
	+ }
	+ WORKQUEUE_UNLOCK(p);
	+ }
	+}
	+
	+#else /* ! THRWORKQ */
	+
	+#include <sys/sysproto.h>
	+
	+int
	+sys_thr_workq(struct thread td, struct thr_workq_args uap)
	+{
	+
	+ return (ENOSYS);
	+}
	+
	+#endif /* ! THRWORKQ */
	Index: sys/kern/p1003_1b.c
	===================================================================
	--- sys/kern/p1003_1b.c
	+++ sys/kern/p1003_1b.c
	@@ -37,6 +37,7 @@
	__FBSDID("$FreeBSD$");

	#include "opt_posix.h"
	+#include "opt_thrworkq.h"

	#include <sys/param.h>
	#include <sys/systm.h>
	@@ -52,6 +53,9 @@
	#include <sys/sysent.h>
	#include <sys/syslog.h>
	#include <sys/sysproto.h>
	+#ifdef THRWORKQ
	+#include <sys/thrworkq.h>
	+#endif

	MALLOC_DEFINE(M_P31B, "p1003.1b", "Posix 1003.1B");

	@@ -292,6 +296,9 @@
	sys_sched_yield(struct thread td, struct sched_yield_args uap)
	{

	+#ifdef THRWORKQ
	+ thrworkq_thread_yielded();
	+#endif
	sched_relinquish(curthread);
	return 0;
	}
	Index: sys/kern/syscalls.c
	===================================================================
	--- sys/kern/syscalls.c
	+++ sys/kern/syscalls.c
	@@ -474,8 +474,8 @@
	"thr_set_name", /* 464 = thr_set_name */
	"aio_fsync", /* 465 = aio_fsync */
	"rtprio_thread", /* 466 = rtprio_thread */
	- "#467", /* 467 = nosys */
	- "#468", /* 468 = nosys */
	+ "thr_stack", /* 467 = thr_stack */
	+ "thr_workq", /* 468 = thr_workq */
	"#469", /* 469 = __getpath_fromfd */
	"#470", /* 470 = __getpath_fromaddr */
	"sctp_peeloff", /* 471 = sctp_peeloff */
	Index: sys/kern/systrace_args.c
	===================================================================
	--- sys/kern/systrace_args.c
	+++ sys/kern/systrace_args.c
	@@ -2606,6 +2606,22 @@
	*n_args = 3;
	break;
	}
	+ /* thr_stack */
	+ case 467: {
	+ struct thr_stack_args *p = params;
	+ uarg[0] = p->stacksize; /* size_t */
	+ uarg[1] = p->guardsize; /* size_t */
	+ *n_args = 2;
	+ break;
	+ }
	+ /* thr_workq */
	+ case 468: {
	+ struct thr_workq_args *p = params;
	+ iarg[0] = p->cmd; /* int */
	+ uarg[1] = (intptr_t) p->args; /* struct twq_param * */
	+ *n_args = 2;
	+ break;
	+ }
	/* sctp_peeloff */
	case 471: {
	struct sctp_peeloff_args *p = params;
	@@ -7559,6 +7575,32 @@
	break;
	};
	break;
	+ /* thr_stack */
	+ case 467:
	+ switch(ndx) {
	+ case 0:
	+ p = "size_t";
	+ break;
	+ case 1:
	+ p = "size_t";
	+ break;
	+ default:
	+ break;
	+ };
	+ break;
	+ /* thr_workq */
	+ case 468:
	+ switch(ndx) {
	+ case 0:
	+ p = "int";
	+ break;
	+ case 1:
	+ p = "struct twq_param *";
	+ break;
	+ default:
	+ break;
	+ };
	+ break;
	/* sctp_peeloff */
	case 471:
	switch(ndx) {
	@@ -10383,6 +10425,16 @@
	if (ndx == 0 \|\| ndx == 1)
	p = "int";
	break;
	+ /* thr_stack */
	+ case 467:
	+ if (ndx == 0 \|\| ndx == 1)
	+ p = "caddr_t";
	+ break;
	+ /* thr_workq */
	+ case 468:
	+ if (ndx == 0 \|\| ndx == 1)
	+ p = "int";
	+ break;
	/* sctp_peeloff */
	case 471:
	if (ndx == 0 \|\| ndx == 1)
	Index: sys/sys/proc.h
	===================================================================
	--- sys/sys/proc.h
	+++ sys/sys/proc.h
	@@ -180,6 +180,10 @@
	struct thread;
	struct trapframe;
	struct turnstile;
	+struct thrworkq;
	+struct threadlist;
	+
	+typedef void (mi_switchcb_t)(int, struct thread );

	/*
	* XXX: Does this belong in resource.h or resourcevar.h instead?
	@@ -336,7 +340,11 @@
	struct proc td_rfppwait_p; / (k) The vforked child */
	struct vm_page *td_ma; / (k) uio pages held */
	int td_ma_cnt; /* (k) size of td_ma /
	+ mi_switchcb_t td_cswitchcb; /* (k) context switch callback. */
	+ struct threadlist td_threadlist; / (?) thread workq thread list. */
	+ void td_reuse_stack; / (?) reuse workq thread stack. */
	void td_emuldata; / Emulator state data */
	+ void td_machdata; / (k) mach state. */
	int td_lastcpu; /* (t) Last cpu we were on. */
	int td_oncpu; /* (t) Which cpu we are on. */
	};
	@@ -400,7 +408,7 @@
	#define TDF_THRWAKEUP 0x00100000 /* Libthr thread must not suspend itself. */
	#define TDF_SEINTR 0x00200000 /* EINTR on stop attempts. */
	#define TDF_SWAPINREQ 0x00400000 /* Swapin request due to wakeup. */
	-#define TDF_UNUSED23 0x00800000 /* --available-- */
	+#define TDF_WORKQ 0x00800000 /* a workq thread */
	#define TDF_SCHED0 0x01000000 /* Reserved for scheduler private use */
	#define TDF_SCHED1 0x02000000 /* Reserved for scheduler private use */
	#define TDF_SCHED2 0x04000000 /* Reserved for scheduler private use */
	@@ -461,6 +469,7 @@
	#define TDP_UIOHELD 0x10000000 /* Current uio has pages held in td_ma */
	#define TDP_FORKING 0x20000000 /* Thread is being created through fork() */
	#define TDP_EXECVMSPC 0x40000000 /* Execve destroyed old vmspace */
	+#define TDP_UNUSUED32 0x80000000 /* Mach initialization done */

	/*
	* Reasons that the current thread can not be run yet.
	@@ -647,6 +656,10 @@
	*/
	LIST_ENTRY(proc) p_orphan; /* (e) List of orphan processes. */
	LIST_HEAD(, proc) p_orphans; /* (e) Pointer to list of orphans. */
	+ vm_offset_t p_thrstack; /* ( ) next addr for thread stack */
	+ struct mtx p_twqlock; /* (n) thread workqueue lock. */
	+ struct thrworkq p_twq; / (^) thread workqueue. */
	+ void p_machdata; / (c) Mach state data. */
	};

	#define p_session p_pgrp->pg_session
	@@ -759,6 +772,9 @@
	#define SW_VOL 0x0100 /* Voluntary switch. */
	#define SW_INVOL 0x0200 /* Involuntary switch. */
	#define SW_PREEMPT 0x0400 /* The invol switch is a preemption */
	+/* Callback type. */
	+#define SWCB_BLOCK 1 /* Thread is about to block. */
	+#define SWCB_UNBLOCK 2 /* Thread was just unblocked. */

	/* How values for thread_single(). */
	#define SINGLE_NO_EXIT 0
	Index: sys/sys/syscall.h
	===================================================================
	--- sys/sys/syscall.h
	+++ sys/sys/syscall.h
	@@ -389,6 +389,8 @@
	#define SYS_thr_set_name 464
	#define SYS_aio_fsync 465
	#define SYS_rtprio_thread 466
	+#define SYS_thr_stack 467
	+#define SYS_thr_workq 468
	#define SYS_sctp_peeloff 471
	#define SYS_sctp_generic_sendmsg 472
	#define SYS_sctp_generic_sendmsg_iov 473
	Index: sys/sys/syscall.mk
	===================================================================
	--- sys/sys/syscall.mk
	+++ sys/sys/syscall.mk
	@@ -318,6 +318,8 @@
	thr_set_name.o \
	aio_fsync.o \
	rtprio_thread.o \
	+ thr_stack.o \
	+ thr_workq.o \
	sctp_peeloff.o \
	sctp_generic_sendmsg.o \
	sctp_generic_sendmsg_iov.o \
	Index: sys/sys/syscallsubr.h
	===================================================================
	--- sys/sys/syscallsubr.h
	+++ sys/sys/syscallsubr.h
	@@ -241,6 +241,8 @@
	int kern_thr_alloc(struct proc , int pages, struct thread *);
	int kern_thr_exit(struct thread *td);
	int kern_thr_new(struct thread td, struct thr_param param);
	+int kern_thr_stack(struct proc p, void *addr, vm_size_t stacksz,
	+ vm_size_t guardsz);
	int kern_thr_suspend(struct thread td, struct timespec tsp);
	int kern_truncate(struct thread td, char path, enum uio_seg pathseg,
	off_t length);
	Index: sys/sys/sysproto.h
	===================================================================
	--- sys/sys/sysproto.h
	+++ sys/sys/sysproto.h
	@@ -1378,6 +1378,14 @@
	char lwpid_l_[PADL_(lwpid_t)]; lwpid_t lwpid; char lwpid_r_[PADR_(lwpid_t)];
	char rtp_l_[PADL_(struct rtprio )]; struct rtprio rtp; char rtp_r_[PADR_(struct rtprio *)];
	};
	+struct thr_stack_args {
	+ char stacksize_l_[PADL_(size_t)]; size_t stacksize; char stacksize_r_[PADR_(size_t)];
	+ char guardsize_l_[PADL_(size_t)]; size_t guardsize; char guardsize_r_[PADR_(size_t)];
	+};
	+struct thr_workq_args {
	+ char cmd_l_[PADL_(int)]; int cmd; char cmd_r_[PADR_(int)];
	+ char args_l_[PADL_(struct twq_param )]; struct twq_param args; char args_r_[PADR_(struct twq_param *)];
	+};
	struct sctp_peeloff_args {
	char sd_l_[PADL_(int)]; int sd; char sd_r_[PADR_(int)];
	char name_l_[PADL_(uint32_t)]; uint32_t name; char name_r_[PADR_(uint32_t)];
	@@ -2094,6 +2102,8 @@
	int sys_thr_set_name(struct thread , struct thr_set_name_args );
	int sys_aio_fsync(struct thread , struct aio_fsync_args );
	int sys_rtprio_thread(struct thread , struct rtprio_thread_args );
	+int sys_thr_stack(struct thread , struct thr_stack_args );
	+int sys_thr_workq(struct thread , struct thr_workq_args );
	int sys_sctp_peeloff(struct thread , struct sctp_peeloff_args );
	int sys_sctp_generic_sendmsg(struct thread , struct sctp_generic_sendmsg_args );
	int sys_sctp_generic_sendmsg_iov(struct thread , struct sctp_generic_sendmsg_iov_args );
	@@ -2871,6 +2881,8 @@
	#define SYS_AUE_thr_set_name AUE_NULL
	#define SYS_AUE_aio_fsync AUE_NULL
	#define SYS_AUE_rtprio_thread AUE_RTPRIO
	+#define SYS_AUE_thr_stack AUE_NULL
	+#define SYS_AUE_thr_workq AUE_NULL
	#define SYS_AUE_sctp_peeloff AUE_NULL
	#define SYS_AUE_sctp_generic_sendmsg AUE_NULL
	#define SYS_AUE_sctp_generic_sendmsg_iov AUE_NULL
	Index: sys/sys/thr.h
	===================================================================
	--- sys/sys/thr.h
	+++ sys/sys/thr.h
	@@ -44,6 +44,11 @@
	/* Create the system scope thread. */
	#define THR_SYSTEM_SCOPE 0x0002

	+/* Default thread stack size. */
	+#define THR_STACK_DEFAULT (sizeof(void ) / 4 1024 * 1024)
	+/* Initial (main) thread's stack size. */
	+#define THR_STACK_INITIAL (THR_STACK_DEFAULT * 2)
	+
	struct thr_param {
	void (start_func)(void ); /* thread entry function. */
	void arg; / argument for entry function. */
	@@ -79,6 +84,7 @@
	int thr_suspend(const struct timespec *timeout);
	int thr_wake(long id);
	int thr_set_name(long id, const char *name);
	+void *thr_stack(size_t stacksize, size_t guardsize);
	__END_DECLS
	#endif /* !_KERNEL */

	Index: sys/sys/thrworkq.h
	===================================================================
	--- /dev/null
	+++ sys/sys/thrworkq.h
	@@ -0,0 +1,124 @@
	+/*-
	+ * Copyright (c) 2009-2014 Stacey Son <sson@FreeBSD.org>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice unmodified, this list of conditions, and the following
	+ * disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
	+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
	+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
	+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
	+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
	+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
	+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	+ *
	+ * $FreeBSD$
	+ *
	+ */
	+
	+#ifndef _SYS_THRWORKQ_H_
	+#define _SYS_THRWORKQ_H_
	+
	+/*
	+ * thr_workq() system call commands.
	+ */
	+#define WQOPS_INIT 1
	+#define WQOPS_QUEUE_ADD 2
	+#define WQOPS_QUEUE_REMOVE 3
	+#define WQOPS_THREAD_RETURN 4
	+#define WQOPS_THREAD_SETCONC 5
	+
	+/*
	+ * Workqueue priority flags.
	+ */
	+#define WORKQUEUE_OVERCOMMIT 0x10000 /* Attempt to start new thread if
	+ can't run immediately even if
	+ it requires overcommitting
	+ resources. */
	+
	+/*
	+ * Kernel workqueue limits and sizing defaults.
	+ */
	+#define WORKQ_OS_ELEM_MAX 64 /* Max number of work items pending in
	+ workq. */
	+#define WORKQ_OS_NUMPRIOS 3 /* Number of workq priority levels. */
	+
	+struct wqa_init {
	+ int retid; / workqueue ID returned */
	+ void (workqfunc)(void ); /* workq entry function */
	+ void (newtdfunc)(void ); /* new thread startup function */
	+ void (exitfunc)(void ); /* thread shutdown function */
	+ size_t stacksize; /* per worker thread stack size */
	+ size_t guardsize; /* per worker thread stack guard size */
	+};
	+
	+struct wqa_qadd {
	+ void item; / work item (arg to workq func) */
	+ int prio; /* item priority */
	+ int affin; /* item CPU affinity */
	+};
	+
	+struct wqa_qrm {
	+ void item; / work item */
	+ int prio; /* item priority */
	+};
	+
	+struct wqa_setconc {
	+ int prio; /* priority queue */
	+ int conc; /* request concurrency */
	+};
	+
	+struct twq_param {
	+ int twq_id;
	+ union {
	+ struct wqa_init init;
	+ struct wqa_qadd qadd;
	+ struct wqa_qrm qrm;
	+ struct wqa_setconc setconc;
	+ } a;
	+
	+#define twq_retid a.init.retid
	+#define twq_workqfunc a.init.workqfunc
	+#define twq_newtdfunc a.init.newtdfunc
	+#define twq_exitfunc a.init.exitfunc
	+#define twq_stacksize a.init.stacksize
	+#define twq_guardsize a.init.guardsize
	+
	+#define twq_add_item a.qadd.item
	+#define twq_add_prio a.qadd.prio
	+#define twq_add_affin a.qadd.affin
	+
	+#define twq_rm_item a.qrm.item
	+#define twq_rm_prio a.qrm.prio
	+
	+#define twq_setconc_prio a.setconc.prio
	+#define twq_setconc_conc a.setconc.conc
	+};
	+
	+#ifdef _KERNEL
	+#include <sys/proc.h>
	+
	+extern void thrworkq_exit(struct proc *p);
	+extern int thrworkq_newthread(struct thread td, void func, void *arg,
	+ stack_t *stack);
	+extern void thrworkq_reusestack(struct proc p, void stackaddr);
	+extern void thrworkq_thread_yielded(void);
	+
	+#else
	+
	+int thr_workq(int cmd, struct twq_param *args);
	+
	+#endif /* _KERNEL */
	+
	+#endif /* ! _SYS_THRWORKQ_H_ */

File Metadata

Mime Type: text/plain
Expires: Wed, Dec 31, 1:54 PM (16 h, 39 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 27413142
Default Alt Text: D3189.id19638.diff (105 KB)

D3189.id19638.diffNo OneTemporaryActions

D3189.id19638.diffView Options

File Metadata

Event Timeline

D3189.id19638.diff
No OneTemporary
Actions

D3189.id19638.diff
View Options