diff --git a/share/man/man9/unr.9 b/share/man/man9/unr.9 --- a/share/man/man9/unr.9 +++ b/share/man/man9/unr.9 @@ -24,7 +24,7 @@ .\" .\" $FreeBSD$ .\" -.Dd October 4, 2017 +.Dd April 21, 2022 .Dt UNR 9 .Os .Sh NAME @@ -72,6 +72,9 @@ is not .Dv NULL , it is used for locking when allocating and freeing units. +If the passed value is the token +.Va UNR_NO_MTX , +then no locking is applied internally. Otherwise, internal mutex is used. .It Fn clear_unrhdr uh Clear all units from the specified unit number allocator entity. diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c --- a/sys/kern/kern_proc.c +++ b/sys/kern/kern_proc.c @@ -3396,6 +3396,28 @@ CTLFLAG_ANYBODY | CTLFLAG_MPSAFE, sysctl_kern_proc_vm_layout, "Process virtual address space layout info"); +static struct thread *stop_all_proc_blocker; + +void +stop_all_proc_block(void) +{ + sx_xlock(&allproc_lock); + while (stop_all_proc_blocker != NULL) + sx_sleep(&stop_all_proc_blocker, &allproc_lock, 0, "sapblk", 0); + stop_all_proc_blocker = curthread; + sx_xunlock(&allproc_lock); +} + +void +stop_all_proc_unblock(void) +{ + sx_xlock(&allproc_lock); + MPASS(stop_all_proc_blocker == curthread); + stop_all_proc_blocker = NULL; + wakeup(&stop_all_proc_blocker); + sx_xunlock(&allproc_lock); +} + int allproc_gen; /* @@ -3411,6 +3433,8 @@ int r, gen; bool restart, seen_stopped, seen_exiting, stopped_some; + stop_all_proc_block(); + cp = curproc; allproc_loop: sx_xlock(&allproc_lock); @@ -3502,6 +3526,8 @@ goto again; } sx_xunlock(&allproc_lock); + + stop_all_proc_unblock(); } /* #define TOTAL_STOP_DEBUG 1 */ diff --git a/sys/kern/kern_procctl.c b/sys/kern/kern_procctl.c --- a/sys/kern/kern_procctl.c +++ b/sys/kern/kern_procctl.c @@ -31,6 +31,7 @@ __FBSDID("$FreeBSD$"); #include +#include #include #include #include @@ -243,22 +244,94 @@ } static void +reap_kill_proc_relock(struct proc *p, int xlocked) +{ + PROC_UNLOCK(p); + if (xlocked) + sx_xlock(&proctree_lock); + else + sx_slock(&proctree_lock); + PROC_LOCK(p); +} + +static bool +reap_kill_proc_locked(struct thread *td, struct proc *p2, + ksiginfo_t *ksi, struct procctl_reaper_kill *rk, int *error) +{ + int error1, r, xlocked; + bool need_stop; + + PROC_LOCK_ASSERT(p2, MA_OWNED); + PROC_ASSERT_HELD(p2); + + error1 = p_cansignal(td, p2, rk->rk_sig); + if (error1 != 0) { + if (*error == ESRCH) { + rk->rk_fpid = p2->p_pid; + *error = error1; + } + return (true); + } + + /* + * The need_stop indicates if the target process needs to be + * suspended before being signalled. This is needed when we + * guarantee that all processes in subtree are signalled, + * avoiding the race with some process not yet fully linked + * into all structures during fork, ignored by iterator, and + * then escaping signalling. + * + * If need_stop is true, then reap_kill_proc() returns true if + * the process was successfully stopped and signalled, and + * false if stopping failed and the signal was not sent. + * + * The thread cannot usefully stop itself anyway, and if other + * thread of the current process forks while the current + * thread signals the whole subtree, it is an application + * race. + */ + need_stop = p2 != td->td_proc && + (p2->p_flag & (P_KPROC | P_SYSTEM)) == 0 && + (rk->rk_flags & REAPER_KILL_CHILDREN) == 0; + + if (need_stop) { + if (P_SHOULDSTOP(p2) == P_STOPPED_SINGLE) + return (false); /* retry later */ + xlocked = sx_xlocked(&proctree_lock); + sx_unlock(&proctree_lock); + r = thread_single(p2, SINGLE_ALLPROC); + if (r != 0) { + reap_kill_proc_relock(p2, xlocked); + return (false); + } + } + + pksignal(p2, rk->rk_sig, ksi); + rk->rk_killed++; + *error = error1; + + if (need_stop) { + reap_kill_proc_relock(p2, xlocked); + thread_single_end(p2, SINGLE_ALLPROC); + } + return (true); +} + +static bool reap_kill_proc(struct thread *td, struct proc *p2, ksiginfo_t *ksi, struct procctl_reaper_kill *rk, int *error) { - int error1; + bool res; + res = true; PROC_LOCK(p2); - error1 = p_cansignal(td, p2, rk->rk_sig); - if (error1 == 0) { - pksignal(p2, rk->rk_sig, ksi); - rk->rk_killed++; - *error = error1; - } else if (*error == ESRCH) { - rk->rk_fpid = p2->p_pid; - *error = error1; + if ((p2->p_flag & P_WEXIT) == 0) { + _PHOLD_LITE(p2); + res = reap_kill_proc_locked(td, p2, ksi, rk, error); + _PRELE(p2); } PROC_UNLOCK(p2); + return (res); } struct reap_kill_tracker { @@ -278,13 +351,80 @@ TAILQ_INSERT_TAIL(tracker, t, link); } +static void +reap_kill_children(struct thread *td, struct proc *reaper, + struct procctl_reaper_kill *rk, ksiginfo_t *ksi, int *error) +{ + struct proc *p2; + + LIST_FOREACH(p2, &reaper->p_children, p_sibling) { + (void)reap_kill_proc(td, p2, ksi, rk, error); + /* + * Do not end the loop on error, signal everything we + * can. + */ + } +} + +static bool +reap_kill_subtree_once(struct thread *td, struct proc *p, struct proc *reaper, + struct procctl_reaper_kill *rk, ksiginfo_t *ksi, int *error, + struct unrhdr *pids) +{ + struct reap_kill_tracker_head tracker; + struct reap_kill_tracker *t; + struct proc *p2; + bool res; + + res = false; + TAILQ_INIT(&tracker); + reap_kill_sched(&tracker, reaper); + while ((t = TAILQ_FIRST(&tracker)) != NULL) { + MPASS((t->parent->p_treeflag & P_TREE_REAPER) != 0); + TAILQ_REMOVE(&tracker, t, link); + LIST_FOREACH(p2, &t->parent->p_reaplist, p_reapsibling) { + if (t->parent == reaper && + (rk->rk_flags & REAPER_KILL_SUBTREE) != 0 && + p2->p_reapsubtree != rk->rk_subtree) + continue; + if ((p2->p_treeflag & P_TREE_REAPER) != 0) + reap_kill_sched(&tracker, p2); + if (alloc_unr_specific(pids, p2->p_pid) != p2->p_pid) + continue; + if (!reap_kill_proc(td, p2, ksi, rk, error)) + free_unr(pids, p2->p_pid); + res = true; + } + free(t, M_TEMP); + } + return (res); +} + +static void +reap_kill_subtree(struct thread *td, struct proc *p, struct proc *reaper, + struct procctl_reaper_kill *rk, ksiginfo_t *ksi, int *error) +{ + struct unrhdr pids; + + /* + * pids records processes which were already signalled, to + * avoid doubling signals to them if iteration needs to be + * repeated. + */ + init_unrhdr(&pids, 1, PID_MAX, UNR_NO_MTX); + stop_all_proc_block(); + while (reap_kill_subtree_once(td, p, reaper, rk, ksi, error, &pids)) + ; + stop_all_proc_unblock(); + clean_unrhdr(&pids); + clear_unrhdr(&pids); +} + static int reap_kill(struct thread *td, struct proc *p, void *data) { - struct proc *reap, *p2; + struct proc *reaper; ksiginfo_t ksi; - struct reap_kill_tracker_head tracker; - struct reap_kill_tracker *t; struct procctl_reaper_kill *rk; int error; @@ -299,7 +439,7 @@ (REAPER_KILL_CHILDREN | REAPER_KILL_SUBTREE)) return (EINVAL); PROC_UNLOCK(p); - reap = (p->p_treeflag & P_TREE_REAPER) == 0 ? p->p_reaper : p; + reaper = (p->p_treeflag & P_TREE_REAPER) == 0 ? p->p_reaper : p; ksiginfo_init(&ksi); ksi.ksi_signo = rk->rk_sig; ksi.ksi_code = SI_USER; @@ -309,32 +449,9 @@ rk->rk_killed = 0; rk->rk_fpid = -1; if ((rk->rk_flags & REAPER_KILL_CHILDREN) != 0) { - for (p2 = LIST_FIRST(&reap->p_children); p2 != NULL; - p2 = LIST_NEXT(p2, p_sibling)) { - reap_kill_proc(td, p2, &ksi, rk, &error); - /* - * Do not end the loop on error, signal - * everything we can. - */ - } + reap_kill_children(td, reaper, rk, &ksi, &error); } else { - TAILQ_INIT(&tracker); - reap_kill_sched(&tracker, reap); - while ((t = TAILQ_FIRST(&tracker)) != NULL) { - MPASS((t->parent->p_treeflag & P_TREE_REAPER) != 0); - TAILQ_REMOVE(&tracker, t, link); - for (p2 = LIST_FIRST(&t->parent->p_reaplist); p2 != NULL; - p2 = LIST_NEXT(p2, p_reapsibling)) { - if (t->parent == reap && - (rk->rk_flags & REAPER_KILL_SUBTREE) != 0 && - p2->p_reapsubtree != rk->rk_subtree) - continue; - if ((p2->p_treeflag & P_TREE_REAPER) != 0) - reap_kill_sched(&tracker, p2); - reap_kill_proc(td, p2, &ksi, rk, &error); - } - free(t, M_TEMP); - } + reap_kill_subtree(td, p, reaper, rk, &ksi, &error); } PROC_LOCK(p); return (error); diff --git a/sys/kern/subr_unit.c b/sys/kern/subr_unit.c --- a/sys/kern/subr_unit.c +++ b/sys/kern/subr_unit.c @@ -312,12 +312,15 @@ { struct unr *up; - mtx_assert(uh->mtx, MA_OWNED); + if (uh->mtx != NULL) + mtx_assert(uh->mtx, MA_OWNED); while ((up = TAILQ_FIRST(&uh->ppfree)) != NULL) { TAILQ_REMOVE(&uh->ppfree, up, list); - mtx_unlock(uh->mtx); + if (uh->mtx != NULL) + mtx_unlock(uh->mtx); Free(up); - mtx_lock(uh->mtx); + if (uh->mtx != NULL) + mtx_lock(uh->mtx); } } @@ -326,9 +329,11 @@ clean_unrhdr(struct unrhdr *uh) { - mtx_lock(uh->mtx); + if (uh->mtx != NULL) + mtx_lock(uh->mtx); clean_unrhdrl(uh); - mtx_unlock(uh->mtx); + if (uh->mtx != NULL) + mtx_unlock(uh->mtx); } void @@ -337,7 +342,9 @@ KASSERT(low >= 0 && low <= high, ("UNR: use error: new_unrhdr(%d, %d)", low, high)); - if (mutex != NULL) + if (mutex == UNR_NO_MTX) + uh->mtx = NULL; + else if (mutex != NULL) uh->mtx = mutex; else uh->mtx = &unitmtx; @@ -347,6 +354,8 @@ uh->high = high; uh->first = 0; uh->last = 1 + (high - low); + uh->busy = 0; + uh->alloc = 0; check_unrhdr(uh, __LINE__); } @@ -606,7 +615,8 @@ u_int x; int y; - mtx_assert(uh->mtx, MA_OWNED); + if (uh->mtx != NULL) + mtx_assert(uh->mtx, MA_OWNED); check_unrhdr(uh, __LINE__); x = uh->low + uh->first; @@ -651,10 +661,12 @@ { int i; - mtx_lock(uh->mtx); + if (uh->mtx != NULL) + mtx_lock(uh->mtx); i = alloc_unrl(uh); clean_unrhdrl(uh); - mtx_unlock(uh->mtx); + if (uh->mtx != NULL) + mtx_unlock(uh->mtx); return (i); } @@ -665,7 +677,8 @@ struct unrb *ub; u_int i, last, tl; - mtx_assert(uh->mtx, MA_OWNED); + if (uh->mtx != NULL) + mtx_assert(uh->mtx, MA_OWNED); if (item < uh->low + uh->first || item > uh->high) return (-1); @@ -771,9 +784,11 @@ p1 = Malloc(sizeof(struct unr)); p2 = Malloc(sizeof(struct unr)); - mtx_lock(uh->mtx); + if (uh->mtx != NULL) + mtx_lock(uh->mtx); i = alloc_unr_specificl(uh, item, &p1, &p2); - mtx_unlock(uh->mtx); + if (uh->mtx != NULL) + mtx_unlock(uh->mtx); if (p1 != NULL) Free(p1); @@ -904,10 +919,12 @@ WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "free_unr"); p1 = Malloc(sizeof(struct unr)); p2 = Malloc(sizeof(struct unr)); - mtx_lock(uh->mtx); + if (uh->mtx != NULL) + mtx_lock(uh->mtx); free_unrl(uh, item, &p1, &p2); clean_unrhdrl(uh); - mtx_unlock(uh->mtx); + if (uh->mtx != NULL) + mtx_unlock(uh->mtx); if (p1 != NULL) Free(p1); if (p2 != NULL) diff --git a/sys/sys/proc.h b/sys/sys/proc.h --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -1232,6 +1232,8 @@ void thread_unsuspend(struct proc *p); void thread_wait(struct proc *p); +void stop_all_proc_block(void); +void stop_all_proc_unblock(void); void stop_all_proc(void); void resume_all_proc(void); diff --git a/sys/sys/systm.h b/sys/sys/systm.h --- a/sys/sys/systm.h +++ b/sys/sys/systm.h @@ -498,6 +498,7 @@ * Unit number allocation API. (kern/subr_unit.c) */ struct unrhdr; +#define UNR_NO_MTX ((void *)(uintptr_t)-1) struct unrhdr *new_unrhdr(int low, int high, struct mtx *mutex); void init_unrhdr(struct unrhdr *uh, int low, int high, struct mtx *mutex); void delete_unrhdr(struct unrhdr *uh);