Index: sys/kern/kern_thread.c =================================================================== --- sys/kern/kern_thread.c +++ sys/kern/kern_thread.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include #include @@ -128,7 +129,22 @@ */ static uma_zone_t thread_zone; -static __exclusive_cache_line struct thread *thread_zombies; +static void thread_reap_worker(void *arg); + +struct thread_domain_data { + struct thread *tdd_zombies; +} __aligned(CACHE_LINE_SIZE); + +static struct thread_domain_data thread_domain_data[MAXMEMDOM]; + +static int +thread_domain(struct thread *td) +{ + + return (uma_item_domain(td)); +} + +static void thread_reap_all(void); static void thread_zombie(struct thread *); static int thread_unsuspend_one(struct thread *td, struct proc *p, @@ -159,30 +175,45 @@ EVENTHANDLER_LIST_DEFINE(thread_fini); static bool -thread_count_inc(void) +thread_count_inc_try(void) { - static struct timeval lastfail; - static int curfail; int nthreads_new; - thread_reap(); - nthreads_new = atomic_fetchadd_int(&nthreads, 1) + 1; if (nthreads_new >= maxthread - 100) { if (priv_check_cred(curthread->td_ucred, PRIV_MAXPROC) != 0 || nthreads_new >= maxthread) { atomic_subtract_int(&nthreads, 1); - if (ppsratecheck(&lastfail, &curfail, 1)) { - printf("maxthread limit exceeded by uid %u " - "(pid %d); consider increasing kern.maxthread\n", - curthread->td_ucred->cr_ruid, curproc->p_pid); - } return (false); } } return (true); } +static bool +thread_count_inc(void) +{ + static struct timeval lastfail; + static int curfail; + + thread_reap(); + if (thread_count_inc_try()) { + return (true); + } + + thread_reap_all(); + if (thread_count_inc_try()) { + return (true); + } + + if (ppsratecheck(&lastfail, &curfail, 1)) { + printf("maxthread limit exceeded by uid %u " + "(pid %d); consider increasing kern.maxthread\n", + curthread->td_ucred->cr_ruid, curproc->p_pid); + } + return (false); +} + static void thread_count_sub(int n) { @@ -452,18 +483,40 @@ rw_init(&tidhashtbl_lock[i], "tidhash"); } +/* + * Spawn per-domain reaper threads. + * + * threadinit is called before we can use kthread_add, hence additional routine. + */ +static void +threadinit_reaper(void *arg __unused) +{ + int i, error; + + for (i = 0; i < vm_ndomains; i++) { + error = kthread_add(thread_reap_worker, + (void *)(uintptr_t)i, NULL, NULL, 0, 0, "tdreap%d", i); + if (error != 0) + panic("%s: could not start reaper thread %d: %d\n", + __func__, i, error); + } +} +SYSINIT(threadreapper, SI_SUB_SMP, SI_ORDER_ANY, threadinit_reaper, NULL); + /* * Place an unused thread on the zombie list. */ void thread_zombie(struct thread *td) { + struct thread_domain_data *tdd; struct thread *ztd; - ztd = atomic_load_ptr(&thread_zombies); + tdd = &thread_domain_data[thread_domain(td)]; + ztd = atomic_load_ptr(&tdd->tdd_zombies); for (;;) { td->td_zombie = ztd; - if (atomic_fcmpset_rel_ptr((uintptr_t *)&thread_zombies, + if (atomic_fcmpset_rel_ptr((uintptr_t *)&tdd->tdd_zombies, (uintptr_t *)&ztd, (uintptr_t)td)) break; continue; @@ -481,10 +534,10 @@ } /* - * Reap zombie threads. + * Reap zombies from passed domain. */ -void -thread_reap(void) +static void +thread_reap_domain(struct thread_domain_data *tdd) { struct thread *itd, *ntd; lwpid_t tidbatch[16]; @@ -494,11 +547,14 @@ * Reading upfront is pessimal if followed by concurrent atomic_swap, * but most of the time the list is empty. */ - if (thread_zombies == NULL) + if (tdd->tdd_zombies == NULL) return; - itd = (struct thread *)atomic_swap_ptr((uintptr_t *)&thread_zombies, + itd = (struct thread *)atomic_swap_ptr((uintptr_t *)&tdd->tdd_zombies, (uintptr_t)NULL); + if (itd == NULL) + return; + tidbatchn = 0; while (itd != NULL) { ntd = itd->td_zombie; @@ -520,6 +576,56 @@ } } +/* + * Reap zombies from all domains. + */ +static void +thread_reap_all(void) +{ + struct thread_domain_data *tdd; + int i, domain; + + domain = PCPU_GET(domain); + for (i = 0; i < vm_ndomains; i++) { + tdd = &thread_domain_data[(i + domain) % vm_ndomains]; + thread_reap_domain(tdd); + } +} + +/* + * Reap zombies from local domain. + */ +void +thread_reap(void) +{ + struct thread_domain_data *tdd; + int domain; + + domain = PCPU_GET(domain); + tdd = &thread_domain_data[domain]; + + thread_reap_domain(tdd); +} + +/* + * Per-domain workers. + */ +static void +thread_reap_worker(void *arg) +{ + struct thread_domain_data *tdd; + int domain; + + domain = (uintptr_t)arg; + tdd = &thread_domain_data[domain]; + + for (;;) { + kthread_suspend_check(); + thread_reap_domain(tdd); + pause("threadreap", hz * 5); + } +} + /* * Allocate a thread. */ Index: sys/vm/uma_core.c =================================================================== --- sys/vm/uma_core.c +++ sys/vm/uma_core.c @@ -3259,6 +3259,16 @@ ("%s: unknown domain for item %p", __func__, item)); return (domain); } + +int +uma_item_domain(void *item) +{ + + /* + * XXX assert passed object was allocated by UMA. + */ + return (item_domain(item)); +} #endif #if defined(INVARIANTS) || defined(DEBUG_MEMGUARD) || defined(WITNESS)