Changeset View
Standalone View
sys/kern/kern_thread.c
Show First 20 Lines • Show All 46 Lines • ▼ Show 20 Lines | |||||
#include <sys/sdt.h> | #include <sys/sdt.h> | ||||
#include <sys/smp.h> | #include <sys/smp.h> | ||||
#include <sys/sched.h> | #include <sys/sched.h> | ||||
#include <sys/sleepqueue.h> | #include <sys/sleepqueue.h> | ||||
#include <sys/selinfo.h> | #include <sys/selinfo.h> | ||||
#include <sys/syscallsubr.h> | #include <sys/syscallsubr.h> | ||||
#include <sys/sysent.h> | #include <sys/sysent.h> | ||||
#include <sys/turnstile.h> | #include <sys/turnstile.h> | ||||
#include <sys/ktr.h> | #include <sys/ktr.h> | ||||
markj: taskqueue.h sorts before turnstile.h | |||||
#include <sys/rwlock.h> | #include <sys/rwlock.h> | ||||
#include <sys/umtx.h> | #include <sys/umtx.h> | ||||
#include <sys/vmmeter.h> | #include <sys/vmmeter.h> | ||||
#include <sys/cpuset.h> | #include <sys/cpuset.h> | ||||
#ifdef HWPMC_HOOKS | #ifdef HWPMC_HOOKS | ||||
#include <sys/pmckern.h> | #include <sys/pmckern.h> | ||||
#endif | #endif | ||||
#include <sys/priv.h> | #include <sys/priv.h> | ||||
▲ Show 20 Lines • Show All 59 Lines • ▼ Show 20 Lines | |||||
SDT_PROVIDER_DECLARE(proc); | SDT_PROVIDER_DECLARE(proc); | ||||
SDT_PROBE_DEFINE(proc, , , lwp__exit); | SDT_PROBE_DEFINE(proc, , , lwp__exit); | ||||
/* | /* | ||||
* thread related storage. | * thread related storage. | ||||
*/ | */ | ||||
static uma_zone_t thread_zone; | static uma_zone_t thread_zone; | ||||
static __exclusive_cache_line struct thread *thread_zombies; | struct thread_domain_data { | ||||
struct thread *tdd_zombies; | |||||
int tdd_localticks; | |||||
int tdd_remoteticks; | |||||
} __aligned(CACHE_LINE_SIZE); | |||||
struct thread_domain_data thread_domain_data[MAXMEMDOM]; | |||||
markjUnsubmitted Done Inline Actionsstatic markj: `static` | |||||
static int | |||||
thread_domain(struct thread *td) | |||||
Done Inline ActionsI do not like this name. It implies too much for casual reader, while it only means that struct td storage is from specific domain. Perhaps 'struct_thread_domain' would be better. Or remove the function at all, it seems to be single use. kib: I do not like this name. It implies too much for casual reader, while it only means that… | |||||
Done Inline Actionsrenamed; there will be more uses mjg: renamed; there will be more uses | |||||
{ | |||||
return (uma_item_domain(td)); | |||||
} | |||||
static void thread_reap_all(void); | |||||
static void thread_zombie(struct thread *); | static void thread_zombie(struct thread *); | ||||
static int thread_unsuspend_one(struct thread *td, struct proc *p, | static int thread_unsuspend_one(struct thread *td, struct proc *p, | ||||
bool boundary); | bool boundary); | ||||
static void thread_free_batched(struct thread *td); | static void thread_free_batched(struct thread *td); | ||||
static struct mtx tid_lock; | static struct mtx tid_lock; | ||||
static bitstr_t *tid_bitmap; | static bitstr_t *tid_bitmap; | ||||
Show All 13 Lines | |||||
#define TIDHASHLOCK(tid) (&tidhashtbl_lock[(tid) & tidhashlock]) | #define TIDHASHLOCK(tid) (&tidhashtbl_lock[(tid) & tidhashlock]) | ||||
EVENTHANDLER_LIST_DEFINE(thread_ctor); | EVENTHANDLER_LIST_DEFINE(thread_ctor); | ||||
EVENTHANDLER_LIST_DEFINE(thread_dtor); | EVENTHANDLER_LIST_DEFINE(thread_dtor); | ||||
EVENTHANDLER_LIST_DEFINE(thread_init); | EVENTHANDLER_LIST_DEFINE(thread_init); | ||||
EVENTHANDLER_LIST_DEFINE(thread_fini); | EVENTHANDLER_LIST_DEFINE(thread_fini); | ||||
static bool | static bool | ||||
thread_count_inc(void) | thread_count_inc_hard(void) | ||||
{ | { | ||||
static struct timeval lastfail; | static struct timeval lastfail; | ||||
static int curfail; | static int curfail; | ||||
int nthreads_new; | int nthreads_new; | ||||
thread_reap(); | thread_reap_all(); | ||||
nthreads_new = atomic_fetchadd_int(&nthreads, 1) + 1; | nthreads_new = atomic_fetchadd_int(&nthreads, 1) + 1; | ||||
if (nthreads_new >= maxthread - 100) { | if (nthreads_new >= maxthread - 100) { | ||||
if (priv_check_cred(curthread->td_ucred, PRIV_MAXPROC) != 0 || | if (priv_check_cred(curthread->td_ucred, PRIV_MAXPROC) != 0 || | ||||
nthreads_new >= maxthread) { | nthreads_new >= maxthread) { | ||||
atomic_subtract_int(&nthreads, 1); | atomic_subtract_int(&nthreads, 1); | ||||
markjUnsubmitted Done Inline ActionsCan we avoid duplicating these five lines of code? markj: Can we avoid duplicating these five lines of code? | |||||
mjgAuthorUnsubmitted Done Inline Actionsi don't see a handy way to do it and I don't think matters mjg: i don't see a handy way to do it and I don't think matters | |||||
markjUnsubmitted Done Inline ActionsDefine _thread_count_inc() or so which does it and returns the result, in thread_count_inc(), use the result to decide whether to reap and try again. markj: Define `_thread_count_inc()` or so which does it and returns the result, in `thread_count_inc… | |||||
mjgAuthorUnsubmitted Done Inline ActionsI was thinking thread_count_inc_try mjg: I was thinking thread_count_inc_try | |||||
if (ppsratecheck(&lastfail, &curfail, 1)) { | if (ppsratecheck(&lastfail, &curfail, 1)) { | ||||
printf("maxthread limit exceeded by uid %u " | printf("maxthread limit exceeded by uid %u " | ||||
"(pid %d); consider increasing kern.maxthread\n", | "(pid %d); consider increasing kern.maxthread\n", | ||||
curthread->td_ucred->cr_ruid, curproc->p_pid); | curthread->td_ucred->cr_ruid, curproc->p_pid); | ||||
} | } | ||||
return (false); | return (false); | ||||
} | } | ||||
} | } | ||||
return (true); | return (true); | ||||
} | } | ||||
static bool | |||||
thread_count_inc(void) | |||||
{ | |||||
int nthreads_new; | |||||
thread_reap(); | |||||
nthreads_new = atomic_fetchadd_int(&nthreads, 1) + 1; | |||||
if (nthreads_new >= maxthread - 100) { | |||||
if (priv_check_cred(curthread->td_ucred, PRIV_MAXPROC) != 0 || | |||||
nthreads_new >= maxthread) { | |||||
atomic_subtract_int(&nthreads, 1); | |||||
return (thread_count_inc_hard()); | |||||
} | |||||
} | |||||
return (true); | |||||
} | |||||
static void | static void | ||||
thread_count_sub(int n) | thread_count_sub(int n) | ||||
{ | { | ||||
atomic_subtract_int(&nthreads, n); | atomic_subtract_int(&nthreads, n); | ||||
} | } | ||||
static void | static void | ||||
▲ Show 20 Lines • Show All 259 Lines • ▼ Show 20 Lines | |||||
} | } | ||||
/* | /* | ||||
* Place an unused thread on the zombie list. | * Place an unused thread on the zombie list. | ||||
*/ | */ | ||||
void | void | ||||
thread_zombie(struct thread *td) | thread_zombie(struct thread *td) | ||||
{ | { | ||||
struct thread_domain_data *tdd; | |||||
struct thread *ztd; | struct thread *ztd; | ||||
ztd = atomic_load_ptr(&thread_zombies); | tdd = &thread_domain_data[thread_domain(td)]; | ||||
ztd = atomic_load_ptr(&tdd->tdd_zombies); | |||||
Not Done Inline ActionsI wrote D27207 to try and address the need for uma_item_domain(). With that you'd write tdd = &thread_domain_data[vm_phys_domain(vtophys(td))]; and only need to include <vm/pmap.h> and <vm/vm_phys.h>, which I think is reasonable. markj: I wrote D27207 to try and address the need for uma_item_domain(). With that you'd write
```… | |||||
Done Inline ActionsI really think this should be combined to vtodomain() or similar. For example the kernel can start handing out VAs which encode the target domain or there may be some other optimization which elides the need to grab the physical address. mjg: I really think this should be combined to vtodomain() or similar. For example the kernel can… | |||||
Not Done Inline Actionsvtodomain() would just expand to what I wrote above, and I'm not sure yet where it should go since it depends on both the pmap and vm_phys modules. Sure, additional optimizations are possible in some cases, but first I want the existing KPIs to be a bit cleaner. markj: vtodomain() would just expand to what I wrote above, and I'm not sure yet where it should go… | |||||
Done Inline ActionsThat's fine, the point is that should anything change here down the road consumers will only need to be recompiled to take advantage of it. mjg: That's fine, the point is that should anything change here down the road consumers will only… | |||||
Not Done Inline ActionsAssuming that generic optimizations are sufficient, yes. If you start doing things like encoding a domain ID in the VA, then you'd want more specialized interfaces anyway. I'm not sure how that should look, so I prefer to punt on it for now. markj: Assuming that generic optimizations are sufficient, yes. If you start doing things like… | |||||
Done Inline Actionsagain, it was just an example, general point being to hide the detail of translations from the consumer. I don't understand where the resistance to a vtodomain (or whatever other name) is coming from, but I'm not going to insist. Just provide something to call and I'll use it. mjg: again, it was just an example, general point being to hide the detail of translations from the… | |||||
for (;;) { | for (;;) { | ||||
td->td_zombie = ztd; | td->td_zombie = ztd; | ||||
Done Inline ActionsIf you pass &thread_domain_data[i] instead of i, this gets rid of cast gymnastic and one more line in thread_reap_worker. kib: If you pass &thread_domain_data[i] instead of i, this gets rid of cast gymnastic and one more… | |||||
if (atomic_fcmpset_rel_ptr((uintptr_t *)&thread_zombies, | if (atomic_fcmpset_rel_ptr((uintptr_t *)&tdd->tdd_zombies, | ||||
(uintptr_t *)&ztd, (uintptr_t)td)) | (uintptr_t *)&ztd, (uintptr_t)td)) | ||||
break; | break; | ||||
continue; | continue; | ||||
} | } | ||||
} | } | ||||
/* | /* | ||||
* Release a thread that has exited after cpu_throw(). | * Release a thread that has exited after cpu_throw(). | ||||
*/ | */ | ||||
void | void | ||||
thread_stash(struct thread *td) | thread_stash(struct thread *td) | ||||
{ | { | ||||
atomic_subtract_rel_int(&td->td_proc->p_exitthreads, 1); | atomic_subtract_rel_int(&td->td_proc->p_exitthreads, 1); | ||||
thread_zombie(td); | thread_zombie(td); | ||||
} | } | ||||
/* | /* | ||||
* Reap zombie threads. | * Zombie reaping support. | ||||
* | |||||
* Each domain has its own zombie list and a local ticks counter signifying | |||||
* the last time reaping took place. However, there may be no reaping-inducing | |||||
* activity on the domain making threads linger. In order to combat the problem | |||||
* remote tick counter is also added, letting CPUs decide whether to take a peek | |||||
* at the other domains and reap them. | |||||
* | |||||
* Note if there is 0 activity all caches can have lingering threads, but that's | |||||
* fine as there is nothing to do. | |||||
*/ | */ | ||||
void | static void | ||||
thread_reap(void) | thread_reap_domain(struct thread_domain_data *tdd) | ||||
{ | { | ||||
struct thread *itd, *ntd; | struct thread *itd, *ntd; | ||||
lwpid_t tidbatch[16]; | lwpid_t tidbatch[16]; | ||||
int tidbatchn; | int tidbatchn; | ||||
/* | /* | ||||
* Reading upfront is pessimal if followed by concurrent atomic_swap, | * Reading upfront is pessimal if followed by concurrent atomic_swap, | ||||
* but most of the time the list is empty. | * but most of the time the list is empty. | ||||
*/ | */ | ||||
if (thread_zombies == NULL) | if (tdd->tdd_zombies == NULL) | ||||
return; | return; | ||||
itd = (struct thread *)atomic_swap_ptr((uintptr_t *)&thread_zombies, | itd = (struct thread *)atomic_swap_ptr((uintptr_t *)&tdd->tdd_zombies, | ||||
(uintptr_t)NULL); | (uintptr_t)NULL); | ||||
if (itd == NULL) | |||||
return; | |||||
/* | |||||
* Multiple CPUs can get here, the race is fine as ticks is only | |||||
* advisory. | |||||
*/ | |||||
tdd->tdd_localticks = ticks; | |||||
tidbatchn = 0; | tidbatchn = 0; | ||||
while (itd != NULL) { | while (itd != NULL) { | ||||
ntd = itd->td_zombie; | ntd = itd->td_zombie; | ||||
tidbatch[tidbatchn] = itd->td_tid; | tidbatch[tidbatchn] = itd->td_tid; | ||||
tidbatchn++; | tidbatchn++; | ||||
thread_cow_free(itd); | thread_cow_free(itd); | ||||
thread_free_batched(itd); | thread_free_batched(itd); | ||||
if (tidbatchn == nitems(tidbatch)) { | if (tidbatchn == nitems(tidbatch)) { | ||||
tid_free_batch(tidbatch, tidbatchn); | tid_free_batch(tidbatch, tidbatchn); | ||||
thread_count_sub(tidbatchn); | thread_count_sub(tidbatchn); | ||||
tidbatchn = 0; | tidbatchn = 0; | ||||
} | } | ||||
itd = ntd; | itd = ntd; | ||||
} | } | ||||
if (tidbatchn != 0) { | if (tidbatchn != 0) { | ||||
tid_free_batch(tidbatch, tidbatchn); | tid_free_batch(tidbatch, tidbatchn); | ||||
thread_count_sub(tidbatchn); | thread_count_sub(tidbatchn); | ||||
} | |||||
} | |||||
/* | |||||
* Reap zombies from all domains. | |||||
*/ | |||||
static void | |||||
thread_reap_all(void) | |||||
{ | |||||
struct thread_domain_data *tdd; | |||||
int i, domain; | |||||
domain = PCPU_GET(domain); | |||||
for (i = 0; i < vm_ndomains; i++) { | |||||
tdd = &thread_domain_data[(i + domain) % vm_ndomains]; | |||||
thread_reap_domain(tdd); | |||||
} | |||||
} | |||||
/* | |||||
* Reap zombies from other domains if they linger too long. | |||||
*/ | |||||
static void | |||||
thread_reap_rest_cond(int domain) | |||||
{ | |||||
struct thread_domain_data *tdd; | |||||
int cticks, lticks, i; | |||||
domain++; | |||||
cticks = atomic_load_int(&ticks); | |||||
for (i = 0; i < vm_ndomains - 1; i++) { | |||||
tdd = &thread_domain_data[(i + domain) % vm_ndomains]; | |||||
lticks = atomic_load_int(&tdd->tdd_localticks); | |||||
if (lticks > cticks || lticks + 1000 < cticks) { | |||||
markjUnsubmitted Done Inline Actions1000 should instead be an expression that scales with hz. If the intent is to only reap once a second, I suggest rewriting this as (u_int)(ticks - lticks) >= hz. markj: `1000` should instead be an expression that scales with `hz`.
If the intent is to only reap… | |||||
mjgAuthorUnsubmitted Done Inline Actionsit was smaller, but will dedup to the above later. mjg: it was smaller, but will dedup to the above later. | |||||
thread_reap_domain(tdd); | |||||
} | |||||
} | |||||
} | |||||
void | |||||
thread_reap(void) | |||||
{ | |||||
struct thread_domain_data *tdd; | |||||
int domain, cticks, rticks; | |||||
domain = PCPU_GET(domain); | |||||
tdd = &thread_domain_data[domain]; | |||||
thread_reap_domain(tdd); | |||||
cticks = atomic_load_int(&ticks); | |||||
Done Inline ActionsI think this can cause shutdown to hang by up to 5s per domain. Instead of dedicated threads, you could use a self-arming callout to poll the per-domain queues and schedule a taskqueue thread to drain them. markj: I think this can cause shutdown to hang by up to 5s per domain.
Instead of dedicated threads… | |||||
rticks = atomic_load_int(&tdd->tdd_remoteticks); | |||||
if (rticks > cticks || rticks + 1000 < cticks) { | |||||
markjUnsubmitted Done Inline ActionsDitto. markj: Ditto. | |||||
if (atomic_cmpset_int(&tdd->tdd_remoteticks, rticks, cticks)) { | |||||
thread_reap_rest_cond(domain); | |||||
} | |||||
} | } | ||||
} | } | ||||
/* | /* | ||||
* Allocate a thread. | * Allocate a thread. | ||||
*/ | */ | ||||
struct thread * | struct thread * | ||||
thread_alloc(int pages) | thread_alloc(int pages) | ||||
Not Done Inline ActionsWe have taskqueue_enqueue_timeout(9) that hides this machinery. kib: We have taskqueue_enqueue_timeout(9) that hides this machinery. | |||||
Done Inline ActionsI have seen it. The taskqueue callback is only there just in case and it looks like it is more expensive to execute than mere callout, but I'm not going to insist one way or the other. mjg: I have seen it. The taskqueue callback is only there just in case and it looks like it is more… | |||||
{ | { | ||||
struct thread *td; | struct thread *td; | ||||
lwpid_t tid; | lwpid_t tid; | ||||
if (!thread_count_inc()) { | if (!thread_count_inc()) { | ||||
return (NULL); | return (NULL); | ||||
} | } | ||||
▲ Show 20 Lines • Show All 986 Lines • Show Last 20 Lines |
taskqueue.h sorts before turnstile.h