diff --git a/kern/kern_jail.c b/kern/kern_jail.c --- a/kern/kern_jail.c +++ b/kern/kern_jail.c @@ -141,12 +141,13 @@ static int do_jail_attach(struct thread *td, struct prison *pr, int drflags); static void prison_complete(void *context, int pending); static void prison_deref(struct prison *pr, int flags); +static void prison_deref_kill(struct prison *pr, struct prisonlist *freeprison); static int prison_lock_xlock(struct prison *pr, int flags); static void prison_free_not_last(struct prison *pr); +static void prison_proc_free_not_last(struct prison *pr); static void prison_set_allow_locked(struct prison *pr, unsigned flag, int enable); static char *prison_path(struct prison *pr1, struct prison *pr2); -static void prison_remove_one(struct prison *pr); #ifdef RACCT static void prison_racct_attach(struct prison *pr); static void prison_racct_modify(struct prison *pr); @@ -156,9 +157,10 @@ /* Flags for prison_deref */ #define PD_DEREF 0x01 /* Decrement pr_ref */ #define PD_DEUREF 0x02 /* Decrement pr_uref */ -#define PD_LOCKED 0x04 /* pr_mtx is held */ -#define PD_LIST_SLOCKED 0x08 /* allprison_lock is held shared */ -#define PD_LIST_XLOCKED 0x10 /* allprison_lock is held exclusive */ +#define PD_KILL 0x04 /* Remove jail, kill processes, etc */ +#define PD_LOCKED 0x10 /* pr_mtx is held */ +#define PD_LIST_SLOCKED 0x20 /* allprison_lock is held shared */ +#define PD_LIST_XLOCKED 0x40 /* allprison_lock is held exclusive */ /* * Parameter names corresponding to PR_* flag values. Size values are for kvm @@ -1758,6 +1760,12 @@ pr->pr_flags = (pr->pr_flags & ~ch_flags) | pr_flags; mtx_unlock(&pr->pr_mtx); drflags &= ~PD_LOCKED; + /* + * Any errors past this point will need to de-persist newly created + * prisons, as well as call remove methods. + */ + if (created) + drflags |= PD_KILL; #ifdef RACCT if (racct_enable && created) @@ -1818,14 +1826,11 @@ if (born) { error = osd_jail_call(pr, PR_METHOD_CREATE, opts); if (error) - goto done_remove; + goto done_deref; } error = osd_jail_call(pr, PR_METHOD_SET, opts); - if (error) { - if (born) - goto done_remove; + if (error) goto done_deref; - } /* New prisons are now ready to be seen. */ if (created) { @@ -1841,8 +1846,6 @@ drflags &= ~(PD_LOCKED | PD_LIST_XLOCKED); if (error) { vfs_opterror(opts, "attach failed"); - if (born) - goto done_remove; goto done_deref; } } @@ -1857,28 +1860,9 @@ } #endif + drflags &= ~PD_KILL; td->td_retval[0] = pr->pr_id; - goto done_deref; - done_remove: - /* - * prison_deref will call the remove method when alive prisons die; - * otherwise it needs to be called now. - */ - if (!(drflags & (PD_LIST_SLOCKED | PD_LIST_XLOCKED))) { - sx_xlock(&allprison_lock); - drflags |= PD_LIST_XLOCKED; - } - if (pr->pr_state != PRISON_STATE_ALIVE) - (void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL); - /* Remove the persist flag from new (or resurrected) prisons. */ - drflags = prison_lock_xlock(pr, drflags); - if (pr->pr_flags & PR_PERSIST) { - pr->pr_flags &= ~PR_PERSIST; - drflags |= PD_DEUREF; - prison_free_not_last(pr); - } - done_deref: /* Release any temporary prison holds and/or locks. */ if (pr != NULL) @@ -2302,8 +2286,8 @@ int sys_jail_remove(struct thread *td, struct jail_remove_args *uap) { - struct prison *pr, *cpr, *lpr; - int descend, error; + struct prison *pr; + int error; error = priv_check(td, PRIV_JAIL_REMOVE); if (error) @@ -2315,80 +2299,16 @@ sx_xunlock(&allprison_lock); return (EINVAL); } - - /* Remove all descendants of this prison, then remove this prison. */ - prison_hold(pr); - if (!LIST_EMPTY(&pr->pr_children)) { + if (!prison_isalive(pr)) { + /* Silently ignore already-dying prisons. */ mtx_unlock(&pr->pr_mtx); - lpr = NULL; - FOREACH_PRISON_DESCENDANT(pr, cpr, descend) { - KASSERT(prison_isvalid(cpr), - ("Found invalid prison %p", cpr)); - prison_hold(cpr); - if (lpr != NULL) { - mtx_lock(&lpr->pr_mtx); - prison_remove_one(lpr); - sx_xlock(&allprison_lock); - } - lpr = cpr; - } - if (lpr != NULL) { - mtx_lock(&lpr->pr_mtx); - prison_remove_one(lpr); - sx_xlock(&allprison_lock); - } - mtx_lock(&pr->pr_mtx); + sx_xunlock(&allprison_lock); + return (0); } - prison_remove_one(pr); + prison_deref(pr, PD_KILL | PD_LOCKED | PD_LIST_XLOCKED); return (0); } -static void -prison_remove_one(struct prison *pr) -{ - struct proc *p; - int drflags; - - drflags = PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED; - - /* If the prison was persistent, it is not anymore. */ - if (pr->pr_flags & PR_PERSIST) { - prison_free_not_last(pr); - drflags |= PD_DEUREF; - pr->pr_flags &= ~PR_PERSIST; - } - - /* - * jail_remove added a reference. If that's the only one, remove - * the prison now. refcount(9) doesn't guarantee the cache coherence - * of non-zero counters, so force it here. - */ - KASSERT(refcount_load(&pr->pr_ref) > 0, - ("prison_remove_one removing a dead prison (jid=%d)", pr->pr_id)); - if (atomic_load_acq_int(&pr->pr_ref) == 1) { - prison_deref(pr, drflags); - return; - } - - mtx_unlock(&pr->pr_mtx); - sx_xunlock(&allprison_lock); - drflags &= ~(PD_LOCKED | PD_LIST_XLOCKED); - /* - * Kill all processes unfortunate enough to be attached to this prison. - */ - sx_slock(&allproc_lock); - FOREACH_PROC_IN_SYSTEM(p) { - PROC_LOCK(p); - if (p->p_state != PRS_NEW && p->p_ucred && - p->p_ucred->cr_prison == pr) - kern_psignal(p, SIGKILL); - PROC_UNLOCK(p); - } - sx_sunlock(&allproc_lock); - /* Remove the temporary reference added by jail_remove. */ - prison_deref(pr, drflags); -} - /* * struct jail_attach_args { * int jid; @@ -2735,6 +2655,24 @@ } } +static void +prison_proc_free_not_last(struct prison *pr) +{ +#ifdef INVARIANTS + int lastref; + + KASSERT(refcount_load(&pr->pr_uref) > 0, + ("Trying to free dead prison %p (jid=%d).", + pr, pr->pr_id)); + lastref = refcount_release(&pr->pr_uref); + KASSERT(!lastref, + ("prison_free_not_last freed last ref on prison %p (jid=%d).", + pr, pr->pr_id)); +#else + refcount_release(&pr>pr_uref); +#endif +} + /* * Complete a call to either prison_free or prison_proc_free. */ @@ -2770,14 +2708,45 @@ prison_deref(struct prison *pr, int flags) { struct prisonlist freeprison; - struct prison *ppr, *rpr, *tpr; + struct prison *killpr, *ppr, *rpr, *tpr; + struct proc *p; + killpr = NULL; TAILQ_INIT(&freeprison); /* * Release this prison as requested, which may cause its parent to be * released, and then maybe its grandparent, etc. */ for (;;) { + if (flags & PD_KILL) { + /* Kill the prison and its descendents. */ + flags &= ~PD_KILL; + if (!(flags & PD_DEREF)) { + prison_hold(pr); + flags |= PD_DEREF; + } + flags = prison_lock_xlock(pr, flags); + if (pr->pr_state != PRISON_STATE_DYING) { + /* + * The prison might currently be invalid, + * but call it alive now so PD_DEUREF will + * notice it later. + */ + pr->pr_state = PRISON_STATE_ALIVE; + if (!(flags & PD_DEUREF)) { + refcount_acquire(&pr->pr_uref); + flags |= PD_DEUREF; + } + prison_deref_kill(pr, &freeprison); + } + /* + * Any remaining user references are probably processes + * that need to be killed, either in this prison or its + * descendants. + */ + if (atomic_load_acq_int(&pr->pr_uref) > 1) + killpr = pr; + } if (flags & PD_DEUREF) { /* Drop a user reference. */ KASSERT(refcount_load(&pr->pr_uref) > 0, @@ -2858,6 +2827,28 @@ sx_sunlock(&allprison_lock); else if (flags & PD_LIST_XLOCKED) sx_xunlock(&allprison_lock); + + if (killpr != NULL) { + /* + * Find and kill any processes attached to the killed prison + * or its descendants. + */ + sx_slock(&allproc_lock); + FOREACH_PROC_IN_SYSTEM(p) { + PROC_LOCK(p); + if (p->p_state != PRS_NEW && p->p_ucred != NULL) { + for (ppr = p->p_ucred->cr_prison; + ppr != &prison0; + ppr = ppr->pr_parent) + if (ppr == killpr) { + kern_psignal(p, SIGKILL); + break; + } + } + PROC_UNLOCK(p); + } + sx_sunlock(&allproc_lock); + } /* * Finish removing any unreferenced prisons, which couldn't happen @@ -2887,6 +2878,71 @@ TAILQ_REMOVE(&freeprison, rpr, pr_list); free(rpr, M_PRISON); } +} + +/* + * Kill the prison and its descendants. Mark them as dying, clear the + * persist flag, and call module remove methods. + */ +static void +prison_deref_kill(struct prison *pr, struct prisonlist *freeprison) +{ + struct prison *cpr, *ppr; + bool descend; + + /* + * The operation each descendant is similar to what prison_deref() + * does when losing the last references, plus clearing PR_PERSIST. + */ + mtx_unlock(&pr->pr_mtx); + FOREACH_PRISON_DESCENDANT_PRE_POST(pr, cpr, descend) { + if (!prison_isalive(cpr)) + continue; + if (descend) { + prison_hold(cpr); + prison_proc_hold(cpr); + continue; + } + mtx_lock(&cpr->pr_mtx); + if (cpr->pr_flags & PR_PERSIST) { + cpr->pr_flags &= ~PR_PERSIST; + prison_proc_free_not_last(cpr); + prison_free_not_last(cpr); + } + if (refcount_release(&cpr->pr_uref)) { + cpr->pr_state = PRISON_STATE_DYING; + mtx_unlock(&cpr->pr_mtx); + (void)osd_jail_call(cpr, PR_METHOD_REMOVE, NULL); + mtx_lock(&cpr->pr_mtx); + } + if (refcount_release(&cpr->pr_ref)) { + cpr->pr_state = PRISON_STATE_INVALID; + TAILQ_REMOVE(&allprison, cpr, pr_list); + TAILQ_INSERT_TAIL(freeprison, cpr, pr_list); + mtx_unlock(&cpr->pr_mtx); + ppr = cpr->pr_parent; + prison_proc_free_not_last(ppr); + prison_free_not_last(ppr); + for (; ppr != NULL; ppr = ppr->pr_parent) + ppr->pr_childcount--; + } + else + mtx_unlock(&cpr->pr_mtx); + } + + mtx_lock(&pr->pr_mtx); + if (pr->pr_flags & PR_PERSIST) { + pr->pr_flags &= ~PR_PERSIST; + prison_proc_free_not_last(pr); + prison_free_not_last(pr); + } + + /* + * Disconnect unreferenced descendants from their parents, + * which couldn't easily be done mid-loop. + */ + TAILQ_FOREACH(cpr, freeprison, pr_list) + LIST_REMOVE(cpr, pr_sibling); } /* diff --git a/sys/jail.h b/sys/jail.h --- a/sys/jail.h +++ b/sys/jail.h @@ -341,6 +341,19 @@ else /* + * Traverse a prison's descendants, visiting both preorder and postorder. + */ +#define FOREACH_PRISON_DESCENDANT_PRE_POST(ppr, cpr, descend) \ + for ((cpr) = (ppr), (descend) = 1; \ + ((cpr) = (descend) \ + ? ((descend) = !LIST_EMPTY(&(cpr)->pr_children)) \ + ? LIST_FIRST(&(cpr)->pr_children) \ + : (cpr) \ + : ((descend) = LIST_NEXT(cpr, pr_sibling) != NULL) \ + ? LIST_NEXT(cpr, pr_sibling) \ + : cpr->pr_parent) != (ppr);) + +/* * Attributes of the physical system, and the root of the jail tree. */ extern struct prison prison0;