Differential D27185 Diff 79448 sys/kern/kern_thread.c

Changeset View

Standalone View

sys/kern/kern_thread.c

	Show First 20 Lines • Show All 46 Lines • ▼ Show 20 Lines
	#include <sys/sdt.h>			#include <sys/sdt.h>
	#include <sys/smp.h>			#include <sys/smp.h>
	#include <sys/sched.h>			#include <sys/sched.h>
	#include <sys/sleepqueue.h>			#include <sys/sleepqueue.h>
	#include <sys/selinfo.h>			#include <sys/selinfo.h>
	#include <sys/syscallsubr.h>			#include <sys/syscallsubr.h>
	#include <sys/sysent.h>			#include <sys/sysent.h>
	#include <sys/turnstile.h>			#include <sys/turnstile.h>
	#include <sys/ktr.h>			#include <sys/ktr.h>
				markjUnsubmitted Not Done Inline Actions taskqueue.h sorts before turnstile.h markj: taskqueue.h sorts before turnstile.h
	#include <sys/rwlock.h>			#include <sys/rwlock.h>
	#include <sys/umtx.h>			#include <sys/umtx.h>
	#include <sys/vmmeter.h>			#include <sys/vmmeter.h>
	#include <sys/cpuset.h>			#include <sys/cpuset.h>
	#ifdef HWPMC_HOOKS			#ifdef HWPMC_HOOKS
	#include <sys/pmckern.h>			#include <sys/pmckern.h>
	#endif			#endif
	#include <sys/priv.h>			#include <sys/priv.h>
	▲ Show 20 Lines • Show All 59 Lines • ▼ Show 20 Lines
	SDT_PROVIDER_DECLARE(proc);			SDT_PROVIDER_DECLARE(proc);
	SDT_PROBE_DEFINE(proc, , , lwp__exit);			SDT_PROBE_DEFINE(proc, , , lwp__exit);

	/*			/*
	* thread related storage.			* thread related storage.
	*/			*/
	static uma_zone_t thread_zone;			static uma_zone_t thread_zone;

	static __exclusive_cache_line struct thread *thread_zombies;			struct thread_domain_data {
				struct thread *tdd_zombies;
				int tdd_localticks;
				int tdd_remoteticks;
				} __aligned(CACHE_LINE_SIZE);

				struct thread_domain_data thread_domain_data[MAXMEMDOM];
				markjUnsubmitted Done Inline Actions `static` markj: `static`

				static int
				thread_domain(struct thread *td)
				kibUnsubmitted Done Inline Actions I do not like this name. It implies too much for casual reader, while it only means that struct td storage is from specific domain. Perhaps 'struct_thread_domain' would be better. Or remove the function at all, it seems to be single use. kib: I do not like this name. It implies too much for casual reader, while it only means that…
				mjgAuthorUnsubmitted Done Inline Actions renamed; there will be more uses mjg: renamed; there will be more uses
				{

				return (uma_item_domain(td));
				}

				static void thread_reap_all(void);

	static void thread_zombie(struct thread *);			static void thread_zombie(struct thread *);
	static int thread_unsuspend_one(struct thread td, struct proc p,			static int thread_unsuspend_one(struct thread td, struct proc p,
	bool boundary);			bool boundary);
	static void thread_free_batched(struct thread *td);			static void thread_free_batched(struct thread *td);

	static struct mtx tid_lock;			static struct mtx tid_lock;
	static bitstr_t *tid_bitmap;			static bitstr_t *tid_bitmap;

	Show All 13 Lines
	#define TIDHASHLOCK(tid) (&tidhashtbl_lock[(tid) & tidhashlock])			#define TIDHASHLOCK(tid) (&tidhashtbl_lock[(tid) & tidhashlock])

	EVENTHANDLER_LIST_DEFINE(thread_ctor);			EVENTHANDLER_LIST_DEFINE(thread_ctor);
	EVENTHANDLER_LIST_DEFINE(thread_dtor);			EVENTHANDLER_LIST_DEFINE(thread_dtor);
	EVENTHANDLER_LIST_DEFINE(thread_init);			EVENTHANDLER_LIST_DEFINE(thread_init);
	EVENTHANDLER_LIST_DEFINE(thread_fini);			EVENTHANDLER_LIST_DEFINE(thread_fini);

	static bool			static bool
	thread_count_inc(void)			thread_count_inc_hard(void)
	{			{
	static struct timeval lastfail;			static struct timeval lastfail;
	static int curfail;			static int curfail;
	int nthreads_new;			int nthreads_new;

	thread_reap();			thread_reap_all();

	nthreads_new = atomic_fetchadd_int(&nthreads, 1) + 1;			nthreads_new = atomic_fetchadd_int(&nthreads, 1) + 1;
	if (nthreads_new >= maxthread - 100) {			if (nthreads_new >= maxthread - 100) {
	if (priv_check_cred(curthread->td_ucred, PRIV_MAXPROC) != 0 \|\|			if (priv_check_cred(curthread->td_ucred, PRIV_MAXPROC) != 0 \|\|
	nthreads_new >= maxthread) {			nthreads_new >= maxthread) {
	atomic_subtract_int(&nthreads, 1);			atomic_subtract_int(&nthreads, 1);
				markjUnsubmitted Done Inline Actions Can we avoid duplicating these five lines of code? markj: Can we avoid duplicating these five lines of code?
				mjgAuthorUnsubmitted Done Inline Actions i don't see a handy way to do it and I don't think matters mjg: i don't see a handy way to do it and I don't think matters
				markjUnsubmitted Done Inline Actions Define `_thread_count_inc()` or so which does it and returns the result, in `thread_count_inc()`, use the result to decide whether to reap and try again. markj: Define `_thread_count_inc()` or so which does it and returns the result, in `thread_count_inc…
				mjgAuthorUnsubmitted Done Inline Actions I was thinking thread_count_inc_try mjg: I was thinking thread_count_inc_try
	if (ppsratecheck(&lastfail, &curfail, 1)) {			if (ppsratecheck(&lastfail, &curfail, 1)) {
	printf("maxthread limit exceeded by uid %u "			printf("maxthread limit exceeded by uid %u "
	"(pid %d); consider increasing kern.maxthread\n",			"(pid %d); consider increasing kern.maxthread\n",
	curthread->td_ucred->cr_ruid, curproc->p_pid);			curthread->td_ucred->cr_ruid, curproc->p_pid);
	}			}
	return (false);			return (false);
	}			}
	}			}
	return (true);			return (true);
	}			}

				static bool
				thread_count_inc(void)
				{
				int nthreads_new;

				thread_reap();

				nthreads_new = atomic_fetchadd_int(&nthreads, 1) + 1;
				if (nthreads_new >= maxthread - 100) {
				if (priv_check_cred(curthread->td_ucred, PRIV_MAXPROC) != 0 \|\|
				nthreads_new >= maxthread) {
				atomic_subtract_int(&nthreads, 1);
				return (thread_count_inc_hard());
				}
				}
				return (true);
				}

	static void			static void
	thread_count_sub(int n)			thread_count_sub(int n)
	{			{

	atomic_subtract_int(&nthreads, n);			atomic_subtract_int(&nthreads, n);
	}			}

	static void			static void
	▲ Show 20 Lines • Show All 259 Lines • ▼ Show 20 Lines
	}			}

	/*			/*
	* Place an unused thread on the zombie list.			* Place an unused thread on the zombie list.
	*/			*/
	void			void
	thread_zombie(struct thread *td)			thread_zombie(struct thread *td)
	{			{
				struct thread_domain_data *tdd;
	struct thread *ztd;			struct thread *ztd;

	ztd = atomic_load_ptr(&thread_zombies);			tdd = &thread_domain_data[thread_domain(td)];
				ztd = atomic_load_ptr(&tdd->tdd_zombies);
				markjUnsubmitted Not Done Inline Actions I wrote D27207 to try and address the need for uma_item_domain(). With that you'd write tdd = &thread_domain_data[vm_phys_domain(vtophys(td))]; and only need to include <vm/pmap.h> and <vm/vm_phys.h>, which I think is reasonable. markj: I wrote D27207 to try and address the need for uma_item_domain(). With that you'd write ```…
				mjgAuthorUnsubmitted Done Inline Actions I really think this should be combined to vtodomain() or similar. For example the kernel can start handing out VAs which encode the target domain or there may be some other optimization which elides the need to grab the physical address. mjg: I really think this should be combined to vtodomain() or similar. For example the kernel can…
				markjUnsubmitted Not Done Inline Actions vtodomain() would just expand to what I wrote above, and I'm not sure yet where it should go since it depends on both the pmap and vm_phys modules. Sure, additional optimizations are possible in some cases, but first I want the existing KPIs to be a bit cleaner. markj: vtodomain() would just expand to what I wrote above, and I'm not sure yet where it should go…
				mjgAuthorUnsubmitted Done Inline Actions That's fine, the point is that should anything change here down the road consumers will only need to be recompiled to take advantage of it. mjg: That's fine, the point is that should anything change here down the road consumers will only…
				markjUnsubmitted Not Done Inline Actions Assuming that generic optimizations are sufficient, yes. If you start doing things like encoding a domain ID in the VA, then you'd want more specialized interfaces anyway. I'm not sure how that should look, so I prefer to punt on it for now. markj: Assuming that generic optimizations are sufficient, yes. If you start doing things like…
				mjgAuthorUnsubmitted Done Inline Actions again, it was just an example, general point being to hide the detail of translations from the consumer. I don't understand where the resistance to a vtodomain (or whatever other name) is coming from, but I'm not going to insist. Just provide something to call and I'll use it. mjg: again, it was just an example, general point being to hide the detail of translations from the…
	for (;;) {			for (;;) {
	td->td_zombie = ztd;			td->td_zombie = ztd;
				kibUnsubmitted Done Inline Actions If you pass &thread_domain_data[i] instead of i, this gets rid of cast gymnastic and one more line in thread_reap_worker. kib: If you pass &thread_domain_data[i] instead of i, this gets rid of cast gymnastic and one more…
	if (atomic_fcmpset_rel_ptr((uintptr_t *)&thread_zombies,			if (atomic_fcmpset_rel_ptr((uintptr_t *)&tdd->tdd_zombies,
	(uintptr_t *)&ztd, (uintptr_t)td))			(uintptr_t *)&ztd, (uintptr_t)td))
	break;			break;
	continue;			continue;
	}			}
	}			}

	/*			/*
	* Release a thread that has exited after cpu_throw().			* Release a thread that has exited after cpu_throw().
	*/			*/
	void			void
	thread_stash(struct thread *td)			thread_stash(struct thread *td)
	{			{
	atomic_subtract_rel_int(&td->td_proc->p_exitthreads, 1);			atomic_subtract_rel_int(&td->td_proc->p_exitthreads, 1);
	thread_zombie(td);			thread_zombie(td);
	}			}

	/*			/*
	* Reap zombie threads.			* Zombie reaping support.
				*
				* Each domain has its own zombie list and a local ticks counter signifying
				* the last time reaping took place. However, there may be no reaping-inducing
				* activity on the domain making threads linger. In order to combat the problem
				* remote tick counter is also added, letting CPUs decide whether to take a peek
				* at the other domains and reap them.
				*
				* Note if there is 0 activity all caches can have lingering threads, but that's
				* fine as there is nothing to do.
	*/			*/
	void			static void
	thread_reap(void)			thread_reap_domain(struct thread_domain_data *tdd)
	{			{
	struct thread itd, ntd;			struct thread itd, ntd;
	lwpid_t tidbatch[16];			lwpid_t tidbatch[16];
	int tidbatchn;			int tidbatchn;

	/*			/*
	* Reading upfront is pessimal if followed by concurrent atomic_swap,			* Reading upfront is pessimal if followed by concurrent atomic_swap,
	* but most of the time the list is empty.			* but most of the time the list is empty.
	*/			*/
	if (thread_zombies == NULL)			if (tdd->tdd_zombies == NULL)
	return;			return;

	itd = (struct thread )atomic_swap_ptr((uintptr_t )&thread_zombies,			itd = (struct thread )atomic_swap_ptr((uintptr_t )&tdd->tdd_zombies,
	(uintptr_t)NULL);			(uintptr_t)NULL);
				if (itd == NULL)
				return;

				/*
				* Multiple CPUs can get here, the race is fine as ticks is only
				* advisory.
				*/
				tdd->tdd_localticks = ticks;

	tidbatchn = 0;			tidbatchn = 0;
	while (itd != NULL) {			while (itd != NULL) {
	ntd = itd->td_zombie;			ntd = itd->td_zombie;
	tidbatch[tidbatchn] = itd->td_tid;			tidbatch[tidbatchn] = itd->td_tid;
	tidbatchn++;			tidbatchn++;
	thread_cow_free(itd);			thread_cow_free(itd);
	thread_free_batched(itd);			thread_free_batched(itd);
	if (tidbatchn == nitems(tidbatch)) {			if (tidbatchn == nitems(tidbatch)) {
	tid_free_batch(tidbatch, tidbatchn);			tid_free_batch(tidbatch, tidbatchn);
	thread_count_sub(tidbatchn);			thread_count_sub(tidbatchn);
	tidbatchn = 0;			tidbatchn = 0;
	}			}
	itd = ntd;			itd = ntd;
	}			}

	if (tidbatchn != 0) {			if (tidbatchn != 0) {
	tid_free_batch(tidbatch, tidbatchn);			tid_free_batch(tidbatch, tidbatchn);
	thread_count_sub(tidbatchn);			thread_count_sub(tidbatchn);
				}
				}

				/*
				* Reap zombies from all domains.
				*/
				static void
				thread_reap_all(void)
				{
				struct thread_domain_data *tdd;
				int i, domain;

				domain = PCPU_GET(domain);
				for (i = 0; i < vm_ndomains; i++) {
				tdd = &thread_domain_data[(i + domain) % vm_ndomains];
				thread_reap_domain(tdd);
				}
				}

				/*
				* Reap zombies from other domains if they linger too long.
				*/
				static void
				thread_reap_rest_cond(int domain)
				{
				struct thread_domain_data *tdd;
				int cticks, lticks, i;

				domain++;
				cticks = atomic_load_int(&ticks);
				for (i = 0; i < vm_ndomains - 1; i++) {
				tdd = &thread_domain_data[(i + domain) % vm_ndomains];
				lticks = atomic_load_int(&tdd->tdd_localticks);
				if (lticks > cticks \|\| lticks + 1000 < cticks) {
				markjUnsubmitted Done Inline Actions `1000` should instead be an expression that scales with `hz`. If the intent is to only reap once a second, I suggest rewriting this as `(u_int)(ticks - lticks) >= hz`. markj: `1000` should instead be an expression that scales with `hz`. If the intent is to only reap…
				mjgAuthorUnsubmitted Done Inline Actions it was smaller, but will dedup to the above later. mjg: it was smaller, but will dedup to the above later.
				thread_reap_domain(tdd);
				}
				}
				}

				void
				thread_reap(void)
				{
				struct thread_domain_data *tdd;
				int domain, cticks, rticks;

				domain = PCPU_GET(domain);
				tdd = &thread_domain_data[domain];

				thread_reap_domain(tdd);
				cticks = atomic_load_int(&ticks);
				markjUnsubmitted Done Inline Actions I think this can cause shutdown to hang by up to 5s per domain. Instead of dedicated threads, you could use a self-arming callout to poll the per-domain queues and schedule a taskqueue thread to drain them. markj: I think this can cause shutdown to hang by up to 5s per domain. Instead of dedicated threads…
				rticks = atomic_load_int(&tdd->tdd_remoteticks);
				if (rticks > cticks \|\| rticks + 1000 < cticks) {
				markjUnsubmitted Done Inline Actions Ditto. markj: Ditto.
				if (atomic_cmpset_int(&tdd->tdd_remoteticks, rticks, cticks)) {
				thread_reap_rest_cond(domain);
				}
	}			}
	}			}

	/*			/*
	* Allocate a thread.			* Allocate a thread.
	*/			*/
	struct thread *			struct thread *
	thread_alloc(int pages)			thread_alloc(int pages)
				kibUnsubmitted Not Done Inline Actions We have taskqueue_enqueue_timeout(9) that hides this machinery. kib: We have taskqueue_enqueue_timeout(9) that hides this machinery.
				mjgAuthorUnsubmitted Done Inline Actions I have seen it. The taskqueue callback is only there just in case and it looks like it is more expensive to execute than mere callout, but I'm not going to insist one way or the other. mjg: I have seen it. The taskqueue callback is only there just in case and it looks like it is more…
	{			{
	struct thread *td;			struct thread *td;
	lwpid_t tid;			lwpid_t tid;

	if (!thread_count_inc()) {			if (!thread_count_inc()) {
	return (NULL);			return (NULL);
	}			}

	▲ Show 20 Lines • Show All 986 Lines • Show Last 20 Lines