diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -495,6 +495,7 @@ LIST_INSERT_HEAD(&allproc, p, p_list); LIST_INSERT_HEAD(PIDHASH(0), p, p_hash); mtx_init(&pgrp0.pg_mtx, "process group", NULL, MTX_DEF | MTX_DUPOK); + sx_init(&pgrp0.pg_killsx, "killpg racer"); p->p_pgrp = &pgrp0; LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash); LIST_INIT(&pgrp0.pg_members); diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -220,13 +220,19 @@ p->p_flag2 |= P2_WEXIT; } +void +exit1(struct thread *td, int rval, int signo) +{ + exit2(td, rval, signo, false); +} + /* * Exit: deallocate address space and other resources, change proc state to * zombie, and unlink proc from allproc and parent's lists. Save exit status * and rusage for wait(). Check for child processes and orphan them. */ void -exit1(struct thread *td, int rval, int signo) +exit2(struct thread *td, int rval, int signo, bool dec_killpg_cnt) { struct proc *p, *nq, *q, *t; struct thread *tdt; @@ -304,6 +310,11 @@ ("exit1: proc %p exiting with %d threads", p, p->p_numthreads)); racct_sub(p, RACCT_NTHR, 1); + if (dec_killpg_cnt) { + MPASS(atomic_load_int(&p->p_killpg_cnt) > 0); + atomic_add_int(&p->p_killpg_cnt, -1); + } + /* Let event handler change exit status */ p->p_xexit = rval; p->p_xsig = signo; diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -856,11 +856,13 @@ struct vmspace *vm2; struct ucred *cred; struct file *fp_procdesc; + struct pgrp *pg; vm_ooffset_t mem_charged; int error, nprocs_new; static int curfail; static struct timeval lastfail; int flags, pages; + bool killsx_locked; flags = fr->fr_flags; pages = fr->fr_pages; @@ -917,6 +919,7 @@ fp_procdesc = NULL; newproc = NULL; vm2 = NULL; + killsx_locked = false; /* * Increment the nprocs resource before allocations occur. @@ -946,6 +949,29 @@ } } + /* + * Atomically check for signals and block threads from sending + * a signal to our process group until the child is visible. + */ + pg = p1->p_pgrp; + if (sx_slock_sig(&pg->pg_killsx) != 0) { + error = ERESTART; + goto fail2; + } else if (__predict_false(p1->p_pgrp != pg || sig_intr() != 0 || + atomic_load_int(&p1->p_killpg_cnt) != 0)) { + /* + * Either the process was moved to other process + * group, or there is pending signal. sx_slock_sig() + * does not check for signals if not sleeping for the + * lock. + */ + sx_sunlock(&pg->pg_killsx); + error = ERESTART; + goto fail2; + } else { + killsx_locked = true; + } + /* * If required, create a process descriptor in the parent first; we * will abandon it if something goes wrong. We don't finit() until @@ -1037,6 +1063,7 @@ } do_fork(td, fr, newproc, td2, vm2, fp_procdesc); + sx_sunlock(&pg->pg_killsx); return (0); fail0: error = EAGAIN; @@ -1055,6 +1082,8 @@ fdrop(fp_procdesc, td); } atomic_add_int(&nprocs, -1); + if (killsx_locked) + sx_sunlock(&pg->pg_killsx); pause("fork", hz / 2); return (error); } diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c --- a/sys/kern/kern_proc.c +++ b/sys/kern/kern_proc.c @@ -310,6 +310,7 @@ pg = mem; mtx_init(&pg->pg_mtx, "process group", NULL, MTX_DEF | MTX_DUPOK); + sx_init(&pg->pg_killsx, "killpg racer"); return (0); } @@ -573,6 +574,7 @@ int enterpgrp(struct proc *p, pid_t pgid, struct pgrp *pgrp, struct session *sess) { + struct pgrp *old_pgrp; sx_assert(&proctree_lock, SX_XLOCKED); @@ -584,6 +586,11 @@ KASSERT(!SESS_LEADER(p), ("enterpgrp: session leader attempted setpgrp")); + old_pgrp = p->p_pgrp; + if (!sx_try_xlock(&old_pgrp->pg_killsx)) + return (ERESTART); + MPASS(old_pgrp == p->p_pgrp); + if (sess != NULL) { /* * new session @@ -625,6 +632,7 @@ doenterpgrp(p, pgrp); + sx_xunlock(&old_pgrp->pg_killsx); return (0); } @@ -634,6 +642,7 @@ int enterthispgrp(struct proc *p, struct pgrp *pgrp) { + struct pgrp *old_pgrp; sx_assert(&proctree_lock, SX_XLOCKED); PROC_LOCK_ASSERT(p, MA_NOTOWNED); @@ -646,8 +655,19 @@ KASSERT(pgrp != p->p_pgrp, ("%s: p %p belongs to pgrp %p", __func__, p, pgrp)); + old_pgrp = p->p_pgrp; + if (!sx_try_xlock(&old_pgrp->pg_killsx)) + return (ERESTART); + MPASS(old_pgrp == p->p_pgrp); + if (!sx_try_xlock(&pgrp->pg_killsx)) { + sx_xunlock(&old_pgrp->pg_killsx); + return (ERESTART); + } + doenterpgrp(p, pgrp); + sx_xunlock(&pgrp->pg_killsx); + sx_xunlock(&old_pgrp->pg_killsx); return (0); } diff --git a/sys/kern/kern_prot.c b/sys/kern/kern_prot.c --- a/sys/kern/kern_prot.c +++ b/sys/kern/kern_prot.c @@ -332,12 +332,13 @@ struct pgrp *newpgrp; struct session *newsess; - error = 0; pgrp = NULL; newpgrp = uma_zalloc(pgrp_zone, M_WAITOK); newsess = malloc(sizeof(struct session), M_SESSION, M_WAITOK | M_ZERO); +again: + error = 0; sx_xlock(&proctree_lock); if (p->p_pgid == p->p_pid || (pgrp = pgfind(p->p_pid)) != NULL) { @@ -345,7 +346,12 @@ PGRP_UNLOCK(pgrp); error = EPERM; } else { - (void)enterpgrp(p, p->p_pid, newpgrp, newsess); + error = enterpgrp(p, p->p_pid, newpgrp, newsess); + if (error == ERESTART) { + sx_xunlock(&proctree_lock); + goto again; + } + MPASS(error == 0); td->td_retval[0] = p->p_pid; newpgrp = NULL; newsess = NULL; @@ -391,10 +397,11 @@ if (uap->pgid < 0) return (EINVAL); - error = 0; - newpgrp = uma_zalloc(pgrp_zone, M_WAITOK); +again: + error = 0; + sx_xlock(&proctree_lock); if (uap->pid != 0 && uap->pid != curp->p_pid) { if ((targp = pfind(uap->pid)) == NULL) { @@ -456,6 +463,8 @@ sx_xunlock(&proctree_lock); KASSERT((error == 0) || (newpgrp != NULL), ("setpgid failed and newpgrp is NULL")); + if (error == ERESTART) + goto again; uma_zfree(pgrp_zone, newpgrp); return (error); } diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c --- a/sys/kern/kern_sig.c +++ b/sys/kern/kern_sig.c @@ -121,6 +121,7 @@ static struct thread *sigtd(struct proc *p, int sig, bool fast_sigblock); static void sigqueue_start(void); static void sigfastblock_setpend(struct thread *td, bool resched); +static void sigexit1(struct thread *td, int sig, ksiginfo_t *ksi) __dead2; static uma_zone_t ksiginfo_zone = NULL; struct filterops sig_filtops = { @@ -1458,7 +1459,7 @@ #endif if (sig == SIGKILL) { proc_td_siginfo_capture(td, &ksi->ksi_info); - sigexit(td, sig); + sigexit1(td, sig, ksi); } } PROC_UNLOCK(p); @@ -1825,7 +1826,7 @@ /* * Common code for kill process group/broadcast kill. - * cp is calling process. + * td is the calling thread, as usual. */ static int killpg1(struct thread *td, int sig, int pgid, int all, ksiginfo_t *ksi) @@ -1847,6 +1848,7 @@ prison_proc_iterate(td->td_ucred->cr_prison, kill_processes_prison_cb, &arg); } else { +again: sx_slock(&proctree_lock); if (pgid == 0) { /* @@ -1862,10 +1864,17 @@ } } sx_sunlock(&proctree_lock); + if (!sx_try_xlock(&pgrp->pg_killsx)) { + PGRP_UNLOCK(pgrp); + sx_xlock(&pgrp->pg_killsx); + sx_xunlock(&pgrp->pg_killsx); + goto again; + } LIST_FOREACH(p, &pgrp->pg_members, p_pglist) { killpg1_sendsig(p, false, &arg); } PGRP_UNLOCK(pgrp); + sx_xunlock(&pgrp->pg_killsx); } MPASS(arg.ret != 0 || arg.found || !arg.sent); if (arg.ret == 0 && !arg.sent) @@ -1928,8 +1937,10 @@ case -1: /* broadcast signal */ return (killpg1(td, signum, 0, 1, &ksi)); case 0: /* signal own process group */ + ksi.ksi_flags |= KSI_KILLPG; return (killpg1(td, signum, 0, 0, &ksi)); default: /* negative explicit process group */ + ksi.ksi_flags |= KSI_KILLPG; return (killpg1(td, signum, -pid, 0, &ksi)); } /* NOTREACHED */ @@ -1980,6 +1991,7 @@ ksi.ksi_code = SI_USER; ksi.ksi_pid = td->td_proc->p_pid; ksi.ksi_uid = td->td_ucred->cr_ruid; + ksi.ksi_flags |= KSI_KILLPG; return (killpg1(td, uap->signum, uap->pgid, 0, &ksi)); } #endif /* COMPAT_43 */ @@ -2367,6 +2379,10 @@ ret = sigqueue_add(sigqueue, sig, ksi); if (ret != 0) return (ret); + if ((ksi->ksi_flags & KSI_KILLPG) != 0) { + sx_assert(&p->p_pgrp->pg_killsx, SX_XLOCKED); + atomic_add_int(&p->p_killpg_cnt, 1); + } signotify(td); /* * Defer further processing for signals which are held, @@ -3085,6 +3101,15 @@ } } +static void +sig_handle_killpg(struct proc *p, ksiginfo_t *ksi) +{ + if ((ksi->ksi_flags & KSI_KILLPG) != 0) { + MPASS(atomic_load_int(&p->p_killpg_cnt) > 0); + atomic_add_int(&p->p_killpg_cnt, -1); + } +} + enum sigstatus { SIGSTATUS_HANDLE, SIGSTATUS_HANDLED, @@ -3164,8 +3189,10 @@ * Keep looking if the debugger discarded or * replaced the signal. */ - if (sig == 0) + if (sig == 0) { + sig_handle_killpg(p, &ksi); return (SIGSTATUS_HANDLED); + } /* * If the signal became masked, re-queue it. @@ -3209,6 +3236,7 @@ printf("Process (pid %lu) got signal %d\n", (u_long)p->p_pid, sig); #endif + sig_handle_killpg(p, &ksi); return (SIGSTATUS_IGNORE); } @@ -3226,6 +3254,7 @@ P_SINGLE_EXIT)) != 0 || ((p->p_pgrp-> pg_flags & PGRP_ORPHANED) != 0 && (prop & SIGPROP_TTYSTOP) != 0)) { + sig_handle_killpg(p, &ksi); mtx_lock(&ps->ps_mtx); return (SIGSTATUS_IGNORE); } @@ -3237,6 +3266,7 @@ } WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &p->p_mtx.lock_object, "Catching SIGSTOP"); + sig_handle_killpg(p, &ksi); sigqueue_delete(&td->td_sigqueue, sig); sigqueue_delete(&p->p_sigqueue, sig); p->p_flag |= P_STOPPED_SIG; @@ -3253,16 +3283,19 @@ * Default action is to ignore; drop it if * not in kern_sigtimedwait(). */ + sig_handle_killpg(p, &ksi); return (SIGSTATUS_IGNORE); } else { return (SIGSTATUS_HANDLE); } case (intptr_t)SIG_IGN: - if ((td->td_flags & TDF_SIGWAIT) == 0) + if ((td->td_flags & TDF_SIGWAIT) == 0) { + sig_handle_killpg(p, &ksi); return (SIGSTATUS_IGNORE); - else + } else { return (SIGSTATUS_HANDLE); + } default: /* @@ -3417,7 +3450,7 @@ */ mtx_unlock(&ps->ps_mtx); proc_td_siginfo_capture(td, &ksi.ksi_info); - sigexit(td, sig); + sigexit1(td, sig, &ksi); /* NOTREACHED */ } else { /* @@ -3447,6 +3480,7 @@ } (*p->p_sysent->sv_sendsig)(action, &ksi, &returnmask); postsig_done(sig, td, ps); + sig_handle_killpg(p, &ksi); } return (1); } @@ -3602,8 +3636,8 @@ * If dumping core, save the signal number for the debugger. Calls exit and * does not return. */ -void -sigexit(struct thread *td, int sig) +static void +sigexit1(struct thread *td, int sig, ksiginfo_t *ksi) { struct proc *p = td->td_proc; @@ -3642,10 +3676,16 @@ sig & WCOREFLAG ? " (core dumped)" : ""); } else PROC_UNLOCK(p); - exit1(td, 0, sig); + exit2(td, 0, sig, ksi != NULL && (ksi->ksi_flags & KSI_KILLPG) != 0); /* NOTREACHED */ } +void +sigexit(struct thread *td, int sig) +{ + sigexit1(td, sig, NULL); +} + /* * Send queued SIGCHLD to parent when child process's state * is changed. diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c --- a/sys/kern/kern_thread.c +++ b/sys/kern/kern_thread.c @@ -99,7 +99,7 @@ "struct proc KBI p_pid"); _Static_assert(offsetof(struct proc, p_filemon) == 0x3c8, "struct proc KBI p_filemon"); -_Static_assert(offsetof(struct proc, p_comm) == 0x3e0, +_Static_assert(offsetof(struct proc, p_comm) == 0x3e4, "struct proc KBI p_comm"); _Static_assert(offsetof(struct proc, p_emuldata) == 0x4d0, "struct proc KBI p_emuldata"); diff --git a/sys/sys/proc.h b/sys/sys/proc.h --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -113,6 +113,8 @@ pid_t pg_id; /* (c) Process group id. */ struct mtx pg_mtx; /* Mutex to protect members */ int pg_flags; /* (m) PGRP_ flags */ + struct sx pg_killsx; /* Mutual exclusion between group member + * fork() and killpg() */ }; #define PGRP_ORPHANED 0x00000001 /* Group is orphaned */ @@ -720,6 +722,7 @@ int p_pendingexits; /* (c) Count of pending thread exits. */ struct filemon *p_filemon; /* (c) filemon-specific data. */ int p_pdeathsig; /* (c) Signal from parent on exit. */ + int p_killpg_cnt; /* End area that is zeroed on creation. */ #define p_endzero p_magic @@ -1234,6 +1237,7 @@ void cpu_exit(struct thread *); void exit1(struct thread *, int, int) __dead2; +void exit2(struct thread *, int, int, bool) __dead2; void cpu_copy_thread(struct thread *td, struct thread *td0); bool cpu_exec_vmspace_reuse(struct proc *p, struct vm_map *map); int cpu_fetch_syscall_args(struct thread *td); diff --git a/sys/sys/signalvar.h b/sys/sys/signalvar.h --- a/sys/sys/signalvar.h +++ b/sys/sys/signalvar.h @@ -240,7 +240,8 @@ #define KSI_SIGQ 0x08 /* Generated by sigqueue, might ret EAGAIN. */ #define KSI_HEAD 0x10 /* Insert into head, not tail. */ #define KSI_PTRACE 0x20 /* Generated by ptrace. */ -#define KSI_COPYMASK (KSI_TRAP | KSI_SIGQ | KSI_PTRACE) +#define KSI_KILLPG 0x40 /* killpg - update p_killpg_cnt */ +#define KSI_COPYMASK (KSI_TRAP | KSI_SIGQ | KSI_PTRACE | KSI_KILLPG) #define KSI_ONQ(ksi) ((ksi)->ksi_sigq != NULL)