Index: sys/kern/kern_thread.c =================================================================== --- sys/kern/kern_thread.c +++ sys/kern/kern_thread.c @@ -128,7 +128,22 @@ */ static uma_zone_t thread_zone; -static __exclusive_cache_line struct thread *thread_zombies; +struct thread_domain_data { + struct thread *tdd_zombies; + int tdd_localticks; + int tdd_remoteticks; +} __aligned(CACHE_LINE_SIZE); + +struct thread_domain_data thread_domain_data[MAXMEMDOM]; + +static int +thread_domain(struct thread *td) +{ + + return (uma_item_domain(td)); +} + +static void thread_reap_all(void); static void thread_zombie(struct thread *); static int thread_unsuspend_one(struct thread *td, struct proc *p, @@ -159,13 +174,13 @@ EVENTHANDLER_LIST_DEFINE(thread_fini); static bool -thread_count_inc(void) +thread_count_inc_hard(void) { static struct timeval lastfail; static int curfail; int nthreads_new; - thread_reap(); + thread_reap_all(); nthreads_new = atomic_fetchadd_int(&nthreads, 1) + 1; if (nthreads_new >= maxthread - 100) { @@ -183,6 +198,24 @@ return (true); } +static bool +thread_count_inc(void) +{ + int nthreads_new; + + thread_reap(); + + nthreads_new = atomic_fetchadd_int(&nthreads, 1) + 1; + if (nthreads_new >= maxthread - 100) { + if (priv_check_cred(curthread->td_ucred, PRIV_MAXPROC) != 0 || + nthreads_new >= maxthread) { + atomic_subtract_int(&nthreads, 1); + return (thread_count_inc_hard()); + } + } + return (true); +} + static void thread_count_sub(int n) { @@ -458,12 +491,14 @@ void thread_zombie(struct thread *td) { + struct thread_domain_data *tdd; struct thread *ztd; - ztd = atomic_load_ptr(&thread_zombies); + tdd = &thread_domain_data[thread_domain(td)]; + ztd = atomic_load_ptr(&tdd->tdd_zombies); for (;;) { td->td_zombie = ztd; - if (atomic_fcmpset_rel_ptr((uintptr_t *)&thread_zombies, + if (atomic_fcmpset_rel_ptr((uintptr_t *)&tdd->tdd_zombies, (uintptr_t *)&ztd, (uintptr_t)td)) break; continue; @@ -481,10 +516,19 @@ } /* - * Reap zombie threads. + * Zombie reaping support. + * + * Each domain has its own zombie list and a local ticks counter signifying + * the last time reaping took place. However, there may be no reaping-inducing + * activity on the domain making threads linger. In order to combat the problem + * remote tick counter is also added, letting CPUs decide whether to take a peek + * at the other domains and reap them. + * + * Note if there is 0 activity all caches can have lingering threads, but that's + * fine as there is nothing to do. */ -void -thread_reap(void) +static void +thread_reap_domain(struct thread_domain_data *tdd) { struct thread *itd, *ntd; lwpid_t tidbatch[16]; @@ -494,11 +538,20 @@ * Reading upfront is pessimal if followed by concurrent atomic_swap, * but most of the time the list is empty. */ - if (thread_zombies == NULL) + if (tdd->tdd_zombies == NULL) return; - itd = (struct thread *)atomic_swap_ptr((uintptr_t *)&thread_zombies, + itd = (struct thread *)atomic_swap_ptr((uintptr_t *)&tdd->tdd_zombies, (uintptr_t)NULL); + if (itd == NULL) + return; + + /* + * Multiple CPUs can get here, the race is fine as ticks is only + * advisory. + */ + tdd->tdd_localticks = ticks; + tidbatchn = 0; while (itd != NULL) { ntd = itd->td_zombie; @@ -520,6 +573,61 @@ } } +/* + * Reap zombies from all domains. + */ +static void +thread_reap_all(void) +{ + struct thread_domain_data *tdd; + int i, domain; + + domain = PCPU_GET(domain); + for (i = 0; i < vm_ndomains; i++) { + tdd = &thread_domain_data[(i + domain) % vm_ndomains]; + thread_reap_domain(tdd); + } +} + +/* + * Reap zombies from other domains if they linger too long. + */ +static void +thread_reap_rest_cond(int domain) +{ + struct thread_domain_data *tdd; + int cticks, lticks, i; + + domain++; + cticks = atomic_load_int(&ticks); + for (i = 0; i < vm_ndomains - 1; i++) { + tdd = &thread_domain_data[(i + domain) % vm_ndomains]; + lticks = atomic_load_int(&tdd->tdd_localticks); + if (lticks > cticks || lticks + 1000 < cticks) { + thread_reap_domain(tdd); + } + } +} + +void +thread_reap(void) +{ + struct thread_domain_data *tdd; + int domain, cticks, rticks; + + domain = PCPU_GET(domain); + tdd = &thread_domain_data[domain]; + + thread_reap_domain(tdd); + cticks = atomic_load_int(&ticks); + rticks = atomic_load_int(&tdd->tdd_remoteticks); + if (rticks > cticks || rticks + 1000 < cticks) { + if (atomic_cmpset_int(&tdd->tdd_remoteticks, rticks, cticks)) { + thread_reap_rest_cond(domain); + } + } +} + /* * Allocate a thread. */ Index: sys/vm/uma.h =================================================================== --- sys/vm/uma.h +++ sys/vm/uma.h @@ -300,6 +300,20 @@ #define UMA_ANYDOMAIN -1 /* Special value for domain search. */ +/* + * Returns domain backing the passed pointer. + */ +#ifdef NUMA +int uma_item_domain(void *); +#else +static inline int +uma_item_domain(void *) +{ + + return (0); +} +#endif + /* * Destroys an empty uma zone. If the zone is not empty uma complains loudly. * Index: sys/vm/uma_core.c =================================================================== --- sys/vm/uma_core.c +++ sys/vm/uma_core.c @@ -3259,6 +3259,16 @@ ("%s: unknown domain for item %p", __func__, item)); return (domain); } + +int +uma_item_domain(void *item) +{ + + /* + * XXX assert passed object was allocated by UMA. + */ + return (item_domain(item)); +} #endif #if defined(INVARIANTS) || defined(DEBUG_MEMGUARD) || defined(WITNESS)