Index: sys/kern/kern_thread.c =================================================================== --- sys/kern/kern_thread.c +++ sys/kern/kern_thread.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include #include @@ -128,9 +129,20 @@ */ static uma_zone_t thread_zone; -static __exclusive_cache_line struct thread *thread_zombies; +struct thread_domain_data { + struct thread *tdd_zombies; + int tdd_reapticks; +} __aligned_if_smp(CACHE_LINE_SIZE); + +static struct thread_domain_data thread_domain_data[MAXMEMDOM]; + +static struct task thread_reap_task; +static struct callout thread_reap_callout; static void thread_zombie(struct thread *); +static void thread_reap_all(void); +static void thread_reap_task_cb(void *, int); +static void thread_reap_callout_cb(void *); static int thread_unsuspend_one(struct thread *td, struct proc *p, bool boundary); static void thread_free_batched(struct thread *td); @@ -159,30 +171,45 @@ EVENTHANDLER_LIST_DEFINE(thread_fini); static bool -thread_count_inc(void) +thread_count_inc_try(void) { - static struct timeval lastfail; - static int curfail; int nthreads_new; - thread_reap(); - nthreads_new = atomic_fetchadd_int(&nthreads, 1) + 1; if (nthreads_new >= maxthread - 100) { if (priv_check_cred(curthread->td_ucred, PRIV_MAXPROC) != 0 || nthreads_new >= maxthread) { atomic_subtract_int(&nthreads, 1); - if (ppsratecheck(&lastfail, &curfail, 1)) { - printf("maxthread limit exceeded by uid %u " - "(pid %d); consider increasing kern.maxthread\n", - curthread->td_ucred->cr_ruid, curproc->p_pid); - } return (false); } } return (true); } +static bool +thread_count_inc(void) +{ + static struct timeval lastfail; + static int curfail; + + thread_reap(); + if (thread_count_inc_try()) { + return (true); + } + + thread_reap_all(); + if (thread_count_inc_try()) { + return (true); + } + + if (ppsratecheck(&lastfail, &curfail, 1)) { + printf("maxthread limit exceeded by uid %u " + "(pid %d); consider increasing kern.maxthread\n", + curthread->td_ucred->cr_ruid, curproc->p_pid); + } + return (false); +} + static void thread_count_sub(int n) { @@ -450,6 +477,10 @@ M_TIDHASH, M_WAITOK | M_ZERO); for (i = 0; i < tidhashlock + 1; i++) rw_init(&tidhashtbl_lock[i], "tidhash"); + + TASK_INIT(&thread_reap_task, 0, thread_reap_task_cb, NULL); + callout_init(&thread_reap_callout, 1); + callout_reset(&thread_reap_callout, 5 * hz, thread_reap_callout_cb, NULL); } /* @@ -458,12 +489,14 @@ void thread_zombie(struct thread *td) { + struct thread_domain_data *tdd; struct thread *ztd; - ztd = atomic_load_ptr(&thread_zombies); + tdd = &thread_domain_data[uma_item_domain(td)]; + ztd = atomic_load_ptr(&tdd->tdd_zombies); for (;;) { td->td_zombie = ztd; - if (atomic_fcmpset_rel_ptr((uintptr_t *)&thread_zombies, + if (atomic_fcmpset_rel_ptr((uintptr_t *)&tdd->tdd_zombies, (uintptr_t *)&ztd, (uintptr_t)td)) break; continue; @@ -481,10 +514,10 @@ } /* - * Reap zombie threads. + * Reap zombies from passed domain. */ -void -thread_reap(void) +static void +thread_reap_domain(struct thread_domain_data *tdd) { struct thread *itd, *ntd; lwpid_t tidbatch[16]; @@ -494,11 +527,20 @@ * Reading upfront is pessimal if followed by concurrent atomic_swap, * but most of the time the list is empty. */ - if (thread_zombies == NULL) + if (tdd->tdd_zombies == NULL) return; - itd = (struct thread *)atomic_swap_ptr((uintptr_t *)&thread_zombies, + itd = (struct thread *)atomic_swap_ptr((uintptr_t *)&tdd->tdd_zombies, (uintptr_t)NULL); + if (itd == NULL) + return; + + /* + * Multiple CPUs can get here, the race is fine as ticks is only + * advisory. + */ + tdd->tdd_reapticks = ticks; + tidbatchn = 0; while (itd != NULL) { ntd = itd->td_zombie; @@ -520,6 +562,68 @@ } } +/* + * Reap zombies from all domains. + */ +static void +thread_reap_all(void) +{ + struct thread_domain_data *tdd; + int i, domain; + + domain = PCPU_GET(domain); + for (i = 0; i < vm_ndomains; i++) { + tdd = &thread_domain_data[(i + domain) % vm_ndomains]; + thread_reap_domain(tdd); + } +} + +/* + * Reap zombies from local domain. + */ +void +thread_reap(void) +{ + struct thread_domain_data *tdd; + int domain; + + domain = PCPU_GET(domain); + tdd = &thread_domain_data[domain]; + + thread_reap_domain(tdd); +} + +static void +thread_reap_task_cb(void *arg __unused, int pending __unused) +{ + + thread_reap_all(); +} + +static void +thread_reap_callout_cb(void *arg __unused) +{ + struct thread_domain_data *tdd; + int i, cticks, lticks; + bool wantreap; + + wantreap = false; + cticks = atomic_load_int(&ticks); + for (i = 0; i < vm_ndomains; i++) { + tdd = &thread_domain_data[i]; + lticks = tdd->tdd_reapticks; + if (tdd->tdd_zombies != NULL && + (u_int)(cticks - lticks) > 5 * hz) { + wantreap = true; + break; + } + } + + if (wantreap) + taskqueue_enqueue(taskqueue_thread, &thread_reap_task); + callout_reset(&thread_reap_callout, 5 * hz, thread_reap_callout_cb, NULL); +} + /* * Allocate a thread. */ Index: sys/sys/cdefs.h =================================================================== --- sys/sys/cdefs.h +++ sys/sys/cdefs.h @@ -241,6 +241,12 @@ #define __alignof(x) __offsetof(struct { char __a; x __b; }, __b) #endif +#ifdef SMP +#define __aligned_if_smp(x) __aligned(x) +#else +#define __aligned_if_smp(x) +#endif + /* * Keywords added in C11. */ Index: sys/vm/uma.h =================================================================== --- sys/vm/uma.h +++ sys/vm/uma.h @@ -300,6 +300,20 @@ #define UMA_ANYDOMAIN -1 /* Special value for domain search. */ +/* + * Returns domain backing the passed pointer. + */ +#ifdef NUMA +int uma_item_domain(void *item); +#else +static inline int +uma_item_domain(void *item) +{ + + return (0); +} +#endif + /* * Destroys an empty uma zone. If the zone is not empty uma complains loudly. * Index: sys/vm/uma_core.c =================================================================== --- sys/vm/uma_core.c +++ sys/vm/uma_core.c @@ -3259,6 +3259,16 @@ ("%s: unknown domain for item %p", __func__, item)); return (domain); } + +int +uma_item_domain(void *item) +{ + + /* + * XXX assert passed object was allocated by UMA. + */ + return (item_domain(item)); +} #endif #if defined(INVARIANTS) || defined(DEBUG_MEMGUARD) || defined(WITNESS)