diff --git a/sys/compat/linuxkpi/common/src/linux_current.c b/sys/compat/linuxkpi/common/src/linux_current.c --- a/sys/compat/linuxkpi/common/src/linux_current.c +++ b/sys/compat/linuxkpi/common/src/linux_current.c @@ -45,7 +45,6 @@ static eventhandler_tag linuxkpi_thread_dtor_tag; -static atomic_t linux_current_allocs; static uma_zone_t linux_current_zone; static uma_zone_t linux_mm_zone; @@ -147,10 +146,6 @@ /* free mm_struct pointer, if any */ uma_zfree(linux_mm_zone, mm); - /* keep track of number of allocations */ - if (atomic_add_return(1, &linux_current_allocs) == INT_MAX) - panic("linux_alloc_current: Refcount too high!"); - return (0); } @@ -178,10 +173,6 @@ { mmput(ts->mm); uma_zfree(linux_current_zone, ts); - - /* keep track of number of allocations */ - if (atomic_sub_return(1, &linux_current_allocs) < 0) - panic("linux_free_current: Negative refcount!"); } static void @@ -306,9 +297,9 @@ atomic_thread_fence_seq_cst(); - lkpi_alloc_current = linux_alloc_current; linuxkpi_thread_dtor_tag = EVENTHANDLER_REGISTER(thread_dtor, linuxkpi_thread_dtor, NULL, EVENTHANDLER_PRI_ANY); + lkpi_alloc_current = linux_alloc_current; } SYSINIT(linux_current, SI_SUB_EVENTHANDLER, SI_ORDER_SECOND, linux_current_init, NULL); @@ -337,17 +328,10 @@ } sx_sunlock(&allproc_lock); - /* - * There is a window where threads are removed from the - * process list and where the thread destructor is invoked. - * Catch that window by waiting for all task_struct - * allocations to be returned before freeing the UMA zone. - */ - while (atomic_read(&linux_current_allocs) != 0) - pause("W", 1); + thread_reap_barrier(); EVENTHANDLER_DEREGISTER(thread_dtor, linuxkpi_thread_dtor_tag); - + uma_zdestroy(linux_current_zone); uma_zdestroy(linux_mm_zone); } diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c --- a/sys/kern/kern_thread.c +++ b/sys/kern/kern_thread.c @@ -541,7 +541,8 @@ TASK_INIT(&thread_reap_task, 0, thread_reap_task_cb, NULL); callout_init(&thread_reap_callout, 1); - callout_reset(&thread_reap_callout, 5 * hz, thread_reap_callout_cb, NULL); + callout_reset(&thread_reap_callout, 5 * hz, + thread_reap_callout_cb, NULL); } /* @@ -704,7 +705,51 @@ if (wantreap) taskqueue_enqueue(taskqueue_thread, &thread_reap_task); - callout_reset(&thread_reap_callout, 5 * hz, thread_reap_callout_cb, NULL); + callout_reset(&thread_reap_callout, 5 * hz, + thread_reap_callout_cb, NULL); +} + +static void +thread_reap_barrier_task_cb(void *arg, int pending) +{ + struct task *t; + + thread_reap_all(); + t = arg; + atomic_store_rel_ptr((uintptr_t *)&t->ta_context, 0); +} + +/* + * Calling this function guarantees that any thread that exited before + * the call is reaped when the function returns. By 'exited' we mean + * a thread removed from the process linkage with thread_unlink(). + * Practically this means that caller must lock/unlock corresponding + * process lock before the call, to synchronize with thread_exit(). + */ +void +thread_reap_barrier(void) +{ + struct thread *td; + struct task *t; + + td = curthread; + + /* + * First do context switches to each CPU to ensure that all + * PCPU pc_deadthreads are moved to zombie list. + */ + quiesce_all_cpus("", PDROP); + + /* + * Second, fire the task in the same thread as normal + * thread_reap() is done, to serialize reaping. + */ + t = malloc(sizeof(*t), M_TEMP, M_WAITOK); + TASK_INIT(t, 0, thread_reap_barrier_task_cb, t); + taskqueue_enqueue(taskqueue_thread, t); + while (atomic_load_acq_ptr((uintptr_t *)&t->ta_context) != 0) + tsleep(t, 0, "thrrbb", 1); + free(t, M_TEMP); } /* diff --git a/sys/kern/subr_smp.c b/sys/kern/subr_smp.c --- a/sys/kern/subr_smp.c +++ b/sys/kern/subr_smp.c @@ -943,25 +943,31 @@ } /* + * If (prio & PDROP) == 0: * Wait for specified idle threads to switch once. This ensures that even * preempted threads have cycled through the switch function once, * exiting their codepaths. This allows us to change global pointers * with no other synchronization. + * If (prio & PDROP) != 0: + * Force the specified CPUs to switch context at least once. */ int quiesce_cpus(cpuset_t map, const char *wmesg, int prio) { struct pcpu *pcpu; - u_int gen[MAXCPU]; + u_int *gen; int error; int cpu; error = 0; - for (cpu = 0; cpu <= mp_maxid; cpu++) { - if (!CPU_ISSET(cpu, &map) || CPU_ABSENT(cpu)) - continue; - pcpu = pcpu_find(cpu); - gen[cpu] = pcpu->pc_idlethread->td_generation; + if ((prio & PDROP) == 0) { + gen = malloc(sizeof(u_int) * MAXCPU, M_TEMP, M_WAITOK); + for (cpu = 0; cpu <= mp_maxid; cpu++) { + if (!CPU_ISSET(cpu, &map) || CPU_ABSENT(cpu)) + continue; + pcpu = pcpu_find(cpu); + gen[cpu] = pcpu->pc_idlethread->td_generation; + } } for (cpu = 0; cpu <= mp_maxid; cpu++) { if (!CPU_ISSET(cpu, &map) || CPU_ABSENT(cpu)) @@ -970,8 +976,10 @@ thread_lock(curthread); sched_bind(curthread, cpu); thread_unlock(curthread); + if ((prio & PDROP) != 0) + continue; while (gen[cpu] == pcpu->pc_idlethread->td_generation) { - error = tsleep(quiesce_cpus, prio, wmesg, 1); + error = tsleep(quiesce_cpus, prio & ~PDROP, wmesg, 1); if (error != EWOULDBLOCK) goto out; error = 0; @@ -981,6 +989,8 @@ thread_lock(curthread); sched_unbind(curthread); thread_unlock(curthread); + if ((prio & PDROP) == 0) + free(gen, M_TEMP); return (error); } diff --git a/sys/sys/proc.h b/sys/sys/proc.h --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -1188,6 +1188,7 @@ void thread_exit(void) __dead2; void thread_free(struct thread *td); void thread_link(struct thread *td, struct proc *p); +void thread_reap_barrier(void); int thread_single(struct proc *p, int how); void thread_single_end(struct proc *p, int how); void thread_stash(struct thread *td);