diff --git a/lib/libc/sys/jail.2 b/lib/libc/sys/jail.2 --- a/lib/libc/sys/jail.2 +++ b/lib/libc/sys/jail.2 @@ -25,7 +25,7 @@ .\" .\" $FreeBSD$ .\" -.Dd February 8, 2012 +.Dd January 25, 2021 .Dt JAIL 2 .Os .Sh NAME @@ -187,7 +187,12 @@ .Fn jail_attach system call. .It Dv JAIL_DYING -Allow setting a jail that is in the process of being removed. +This is deprecated and has no effect. +It used to allow setting a jail that is in the process of being removed. +Now such jails are always replaced when a new jail is created with the same +.Va jid +or +.Va name . .El .Pp The diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -710,6 +710,16 @@ */ knote_fork(p1->p_klist, p2->p_pid); + /* + * See if the containing prison died while the process was still new. + */ + if (!prison_isalive(p2->p_ucred->cr_prison)) { + /* Folow the prison into death. */ + PROC_LOCK(p2); + kern_psignal(p2, SIGKILL); + PROC_UNLOCK(p2); + } + /* * Now can be swapped. */ diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c --- a/sys/kern/kern_jail.c +++ b/sys/kern/kern_jail.c @@ -106,6 +106,7 @@ .pr_path = "/", .pr_securelevel = -1, .pr_devfs_rsnum = 0, + .pr_state = PRISON_STATE_ALIVE, .pr_childmax = JAIL_MAX, .pr_hostuuid = DEFAULT_HOSTUUID, .pr_children = LIST_HEAD_INITIALIZER(prison0.pr_children), @@ -140,10 +141,16 @@ static int do_jail_attach(struct thread *td, struct prison *pr); static void prison_complete(void *context, int pending); static void prison_deref(struct prison *pr, int flags); +static int prison_deref_lock(struct prison *pr, int flags); +static int prison_deref_kill(struct prison *pr, struct prisonlist *freeprison); +static void prison_deref_kill_descend(struct prison *pr); +static int prison_deref_kill_ascend(struct prison *pr, + struct prisonlist *freeprison); +static void prison_free_not_last(struct prison *pr); +static void prison_proc_free_not_last(struct prison *pr); static void prison_set_allow_locked(struct prison *pr, unsigned flag, int enable); static char *prison_path(struct prison *pr1, struct prison *pr2); -static void prison_remove_one(struct prison *pr); #ifdef RACCT static void prison_racct_attach(struct prison *pr); static void prison_racct_modify(struct prison *pr); @@ -153,9 +160,10 @@ /* Flags for prison_deref */ #define PD_DEREF 0x01 /* Decrement pr_ref */ #define PD_DEUREF 0x02 /* Decrement pr_uref */ -#define PD_LOCKED 0x04 /* pr_mtx is held */ -#define PD_LIST_SLOCKED 0x08 /* allprison_lock is held shared */ -#define PD_LIST_XLOCKED 0x10 /* allprison_lock is held exclusive */ +#define PD_KILL 0x04 /* Remove jail, kill processes, etc */ +#define PD_LOCKED 0x08 /* pr_mtx is held */ +#define PD_LIST_SLOCKED 0x10 /* allprison_lock is held shared */ +#define PD_LIST_XLOCKED 0x20 /* allprison_lock is held exclusive */ /* * Parameter names corresponding to PR_* flag values. Size values are for kvm @@ -526,10 +534,10 @@ #endif unsigned long hid; size_t namelen, onamelen, pnamelen; - int born, created, cuflags, descend, drflags, enforce; + int created, cuflags, descend, drflags, enforce; int error, errmsg_len, errmsg_pos; int gotchildmax, gotenforce, gothid, gotrsnum, gotslevel; - int jid, jsys, len, level; + int jid, jsys, len, level, tjid; int childmax, osreldt, rsnum, slevel; #if defined(INET) || defined(INET6) int ii, ij; @@ -540,9 +548,8 @@ #ifdef INET6 int ip6s, redo_ip6; #endif - uint64_t pr_allow, ch_allow, pr_flags, ch_flags; + uint64_t pr_allow, ch_allow, pr_flags, ch_flags, tallow; uint64_t pr_allow_diff; - unsigned tallow; char numbuf[12]; error = priv_check(td, PRIV_JAIL_SET); @@ -550,9 +557,6 @@ error = priv_check(td, PRIV_JAIL_ATTACH); if (error) return (error); - mypr = td->td_ucred->cr_prison; - if ((flags & JAIL_CREATE) && mypr->pr_childmax == 0) - return (EPERM); if (flags & ~JAIL_SET_MASK) return (EINVAL); @@ -661,12 +665,6 @@ } ch_flags |= jsf->new | jsf->disable; } - if ((flags & (JAIL_CREATE | JAIL_UPDATE | JAIL_ATTACH)) == JAIL_CREATE - && !(pr_flags & PR_PERSIST)) { - error = EINVAL; - vfs_opterror(opts, "new jail must persist or attach"); - goto done_errmsg; - } #ifdef VIMAGE if ((flags & JAIL_UPDATE) && (ch_flags & PR_VNET)) { error = EINVAL; @@ -984,9 +982,10 @@ * Find the specified jail, or at least its parent. * This abuses the file error codes ENOENT and EEXIST. */ + ppr = mypr = td->td_ucred->cr_prison; pr = NULL; - ppr = mypr; inspr = NULL; + deadpr = NULL; if (cuflags == JAIL_CREATE && jid == 0 && name != NULL) { namelc = strrchr(name, '.'); jid = strtoul(namelc != NULL ? namelc + 1 : name, &p, 10); @@ -1006,68 +1005,45 @@ * where it can be inserted later. */ TAILQ_FOREACH(inspr, &allprison, pr_list) { - if (inspr->pr_id == jid) { - mtx_lock(&inspr->pr_mtx); - if (prison_isvalid(inspr)) { - pr = inspr; - drflags |= PD_LOCKED; - inspr = NULL; - } else - mtx_unlock(&inspr->pr_mtx); - break; - } + if (inspr->pr_id < jid) + continue; if (inspr->pr_id > jid) break; - } - if (pr != NULL) { - ppr = pr->pr_parent; - /* Create: jid must not exist. */ - if (cuflags == JAIL_CREATE) { - /* - * Even creators that cannot see the jail will - * get EEXIST. - */ - error = EEXIST; - vfs_opterror(opts, "jail %d already exists", - jid); - goto done_deref; - } - if (!prison_ischild(mypr, pr)) { - /* - * Updaters get ENOENT if they cannot see the - * jail. This is true even for CREATE | UPDATE, - * which normally cannot give this error. - */ - error = ENOENT; - vfs_opterror(opts, "jail %d not found", jid); - goto done_deref; - } else if (!prison_isalive(pr)) { - if (!(flags & JAIL_DYING)) { - error = ENOENT; - vfs_opterror(opts, "jail %d is dying", - jid); - goto done_deref; - } else if ((flags & JAIL_ATTACH) || - (pr_flags & PR_PERSIST)) { - /* - * A dying jail might be resurrected - * (via attach or persist), but first - * it must determine if another jail - * has claimed its name. Accomplish - * this by implicitly re-setting the - * name. - */ - if (name == NULL) - name = prison_name(mypr, pr); - } - } - } else { - /* Update: jid must exist. */ - if (cuflags == JAIL_UPDATE) { - error = ENOENT; - vfs_opterror(opts, "jail %d not found", jid); - goto done_deref; + if (inspr->pr_state != PRISON_STATE_DYING) { + /* The jail exists. */ + pr = inspr; + mtx_lock(&pr->pr_mtx); + drflags |= PD_LOCKED; + ppr = pr->pr_parent; + } else { + /* Note a dying jail to handle later. */ + deadpr = inspr; } + inspr = NULL; + break; + } + + if (cuflags == JAIL_CREATE && pr != NULL) { + /* + * Creators get EEXIST if the jail already exists, + * even if they cannot see it. + */ + error = EEXIST; + vfs_opterror(opts, "jail %d already exists", jid); + goto done_deref; + } + if ((pr == NULL) + ? cuflags == JAIL_UPDATE + : !prison_isalive(pr) || !prison_ischild(mypr, pr)) { + /* + * Updaters get ENOENT for noexistent jails, + * or if the jail exists but they cannot see it. + * The latter case is true even for CREATE | UPDATE, + * which normally cannot give this error. + */ + error = ENOENT; + vfs_opterror(opts, "jail %d not found", jid); + goto done_deref; } } /* @@ -1099,7 +1075,9 @@ } else { *namelc = '\0'; ppr = prison_find_name(mypr, name); - if (ppr == NULL) { + if (ppr == NULL || !prison_isalive(ppr)) { + if (ppr != NULL) + mtx_unlock(&ppr->pr_mtx); error = ENOENT; vfs_opterror(opts, "jail \"%s\" not found", name); @@ -1113,61 +1091,34 @@ if (namelc[0] != '\0') { pnamelen = (ppr == &prison0) ? 0 : strlen(ppr->pr_name) + 1; - name_again: - deadpr = NULL; FOREACH_PRISON_CHILD(ppr, tpr) { - if (tpr != pr && - !strcmp(tpr->pr_name + pnamelen, namelc)) { - mtx_lock(&tpr->pr_mtx); - if (prison_isalive(tpr)) { - if (pr == NULL && - cuflags != JAIL_CREATE) { - /* - * Use this jail - * for updates. - */ - pr = tpr; - drflags |= PD_LOCKED; - break; - } - /* - * Create, or update(jid): - * name must not exist in an - * active sibling jail. - */ - error = EEXIST; - mtx_unlock(&tpr->pr_mtx); - vfs_opterror(opts, - "jail \"%s\" already exists", - name); - goto done_deref; - } - if (pr == NULL && - cuflags != JAIL_CREATE && - prison_isvalid(tpr)) - deadpr = tpr; - mtx_unlock(&tpr->pr_mtx); - } - } - /* If no active jail is found, use a dying one. */ - if (deadpr != NULL && pr == NULL) { - if (flags & JAIL_DYING) { - mtx_lock(&deadpr->pr_mtx); - if (!prison_isvalid(deadpr)) { - mtx_unlock(&deadpr->pr_mtx); - goto name_again; - } - pr = deadpr; - drflags |= PD_LOCKED; - } else if (cuflags == JAIL_UPDATE) { - error = ENOENT; + if (tpr == pr || + tpr->pr_state == PRISON_STATE_DYING || + strcmp(tpr->pr_name + pnamelen, namelc)) + continue; + if (cuflags == JAIL_CREATE || pr != NULL) { + /* + * Create, or update(jid): name must + * not exist in an active sibling jail. + */ + error = EEXIST; vfs_opterror(opts, - "jail \"%s\" is dying", name); + "jail \"%s\" already exists", name); goto done_deref; } + /* Use this jail for updates. */ + pr = tpr; + mtx_lock(&pr->pr_mtx); + drflags |= PD_LOCKED; + break; } - /* Update: name must exist if no jid. */ - else if (cuflags == JAIL_UPDATE && pr == NULL) { + /* + * Update: name must exist if no jid. As with the jid + * case, the jail must be currently visible, or else + * even CREATE | UPDATE will get an error. + */ + if ((pr == NULL) + ? cuflags == JAIL_UPDATE : !prison_isalive(pr)) { error = ENOENT; vfs_opterror(opts, "jail \"%s\" not found", name); @@ -1182,7 +1133,7 @@ goto done_deref; } - /* If there's no prison to update, create a new one and link it in. */ + /* If there's no prison to update, create a new one. */ created = pr == NULL; if (created) { for (tpr = mypr; tpr != NULL; tpr = tpr->pr_parent) @@ -1191,36 +1142,75 @@ vfs_opterror(opts, "prison limit exceeded"); goto done_deref; } - mtx_lock(&ppr->pr_mtx); - if (!prison_isvalid(ppr)) { - mtx_unlock(&ppr->pr_mtx); + if (!prison_isalive(ppr)) { error = ENOENT; vfs_opterror(opts, "jail \"%s\" not found", prison_name(mypr, ppr)); goto done_deref; } prison_hold(ppr); - refcount_acquire(&ppr->pr_uref); - mtx_unlock(&ppr->pr_mtx); - - if (jid == 0 && (jid = get_next_prid(&inspr)) == 0) { - error = EAGAIN; - vfs_opterror(opts, "no available jail IDs"); - pr = ppr; - drflags |= PD_DEREF | PD_DEUREF; - goto done_deref; + prison_proc_hold(ppr); + + /* + * If no jid was explicitly given, or if a dying jail is being + * replaced, find free ID. + */ + if (jid > 0 && deadpr == NULL) + tjid = jid; + else { + tjid = get_next_prid(&inspr); + if (tjid == 0) { + error = EAGAIN; + vfs_opterror(opts, "no available jail IDs"); + pr = ppr; + drflags |= PD_DEREF | PD_DEUREF; + goto done_deref; + } } + /* + * Start the prison with a reference, matching the one added + * to existing prisons. + */ pr = malloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO); + pr->pr_state = PRISON_STATE_INVALID; + refcount_init(&pr->pr_ref, 1); + refcount_init(&pr->pr_uref, 1); + drflags |= PD_DEREF | PD_DEUREF; LIST_INIT(&pr->pr_children); mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF | MTX_DUPOK); TASK_INIT(&pr->pr_task, 0, prison_complete, pr); - pr->pr_id = jid; + if (deadpr == NULL) { + pr->pr_id = jid = tjid; + tpr = pr; + } else { + pr->pr_id = jid; + /* + * The prison being created has the same ID as a dying + * one. Handle this by swapping the new prison with + * the dying one, and then re-adding the dying jail + * with the new ID. This may cause some confusion to + * user space, but only to those listing dying jails. + */ + TAILQ_INSERT_BEFORE(deadpr, pr, pr_list); + TAILQ_REMOVE(&allprison, deadpr, pr_list); + if (inspr == deadpr) + inspr = pr; + mtx_lock(&deadpr->pr_mtx); + deadpr->pr_id = tjid; + mtx_unlock(&deadpr->pr_mtx); + tpr = deadpr; + } + + /* + * Link the prison into the allprison list in ID order, + * and into its parent's child list in no particular order. + */ if (inspr != NULL) - TAILQ_INSERT_BEFORE(inspr, pr, pr_list); + TAILQ_INSERT_BEFORE(inspr, tpr, pr_list); else - TAILQ_INSERT_TAIL(&allprison, pr, pr_list); + TAILQ_INSERT_TAIL(&allprison, tpr, pr_list); pr->pr_parent = ppr; LIST_INSERT_HEAD(&ppr->pr_children, pr, pr_sibling); @@ -1305,18 +1295,14 @@ mtx_lock(&pr->pr_mtx); drflags |= PD_LOCKED; - /* - * New prisons do not yet have a reference, because we do not - * want others to see the incomplete prison once the - * allprison_lock is downgraded. - */ } else { /* * Grab a reference for existing prisons, to ensure they * continue to exist for the duration of the call. */ prison_hold(pr); - drflags |= PD_DEREF; + prison_proc_hold(pr); + drflags |= PD_DEREF | PD_DEUREF; #if defined(VIMAGE) && (defined(INET) || defined(INET6)) if ((pr->pr_flags & PR_VNET) && (ch_flags & (PR_IP4_USER | PR_IP6_USER))) { @@ -1434,7 +1420,7 @@ #ifdef VIMAGE (tpr != tppr && (tpr->pr_flags & PR_VNET)) || #endif - refcount_load(&tpr->pr_uref) == 0) { + tpr->pr_state == PRISON_STATE_DYING) { descend = 0; continue; } @@ -1502,7 +1488,7 @@ #ifdef VIMAGE (tpr != tppr && (tpr->pr_flags & PR_VNET)) || #endif - refcount_load(&tpr->pr_uref) == 0) { + tpr->pr_state == PRISON_STATE_DYING) { descend = 0; continue; } @@ -1731,23 +1717,26 @@ prison_set_allow_locked(pr, tallow, 0); /* * Persistent prisons get an extra reference, and prisons losing their - * persist flag lose that reference. Only do this for existing prisons - * for now, so new ones will remain unseen until after the module - * handlers have completed. + * persist flag lose that reference. */ - born = !prison_isalive(pr); - if (!created && (ch_flags & PR_PERSIST & (pr_flags ^ pr->pr_flags))) { + if (ch_flags & PR_PERSIST & (pr_flags ^ pr->pr_flags)) { if (pr_flags & PR_PERSIST) { prison_hold(pr); - refcount_acquire(&pr->pr_uref); + prison_proc_hold(pr); } else { - refcount_release(&pr->pr_ref); - drflags |= PD_DEUREF; + prison_proc_free_not_last(pr); + prison_free_not_last(pr); } } pr->pr_flags = (pr->pr_flags & ~ch_flags) | pr_flags; mtx_unlock(&pr->pr_mtx); drflags &= ~PD_LOCKED; + /* + * Any errors past this point will need to de-persist newly created + * prisons, as well as call remove methods. + */ + if (created) + drflags |= PD_KILL; #ifdef RACCT if (racct_enable && created) @@ -1807,19 +1796,14 @@ /* Let the modules do their work. */ sx_downgrade(&allprison_lock); drflags = (drflags & ~PD_LIST_XLOCKED) | PD_LIST_SLOCKED; - if (born) { + if (created) { error = osd_jail_call(pr, PR_METHOD_CREATE, opts); - if (error) { - (void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL); + if (error) goto done_deref; - } } error = osd_jail_call(pr, PR_METHOD_SET, opts); - if (error) { - if (born) - (void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL); + if (error) goto done_deref; - } /* Attach this process to the prison if requested. */ if (flags & JAIL_ATTACH) { @@ -1827,10 +1811,6 @@ error = do_jail_attach(td, pr); drflags &= ~PD_LIST_SLOCKED; if (error) { - if (created) { - /* do_jail_attach has removed the prison. */ - pr = NULL; - } vfs_opterror(opts, "attach failed"); goto done_deref; } @@ -1846,25 +1826,39 @@ } #endif - td->td_retval[0] = pr->pr_id; - + /* + * Now that everything is done, a newly created prison should be alive, + * either from persistence, attaching, or perhaps a module parameter. + */ if (created) { + drflags = prison_deref_lock(pr, drflags); + if (!prison_isalive(ppr)) { + /* + * The parent prison died while this one was being + * created. + */ + error = ENOENT; + vfs_opterror(opts, "jail \"%s\" not found", + prison_name(mypr, ppr)); + goto done_deref; + } /* - * Add a reference to newly created persistent prisons - * (which was not done earlier so that the prison would - * not be publicly visible). + * We are holding one temporary user reference, so there + * must be more than that for the prison to continue to + * exist. That usually comes from persistence or attaching, + * though modules may also add a reference. */ - if (pr_flags & PR_PERSIST) { - mtx_lock(&pr->pr_mtx); - drflags |= PD_LOCKED; - refcount_acquire(&pr->pr_ref); - refcount_acquire(&pr->pr_uref); - } else { - /* Non-persistent jails need no further changes. */ - pr = NULL; + if (atomic_load_acq_int(&pr->pr_uref) <= 1) { + error = EINVAL; + vfs_opterror(opts, "new jail must persist or attach"); + goto done_deref; } + pr->pr_state = PRISON_STATE_ALIVE; + drflags &= ~PD_KILL; } + td->td_retval[0] = pr->pr_id; + done_deref: /* Release any temporary prison holds and/or locks. */ if (pr != NULL) @@ -1933,13 +1927,8 @@ TAILQ_FOREACH(inspr, &allprison, pr_list) { if (inspr->pr_id < jid) continue; - if (inspr->pr_id > jid || - refcount_load(&inspr->pr_ref) == 0) { - /* - * Found an opening. This may be a gap - * in the list, or a dead jail with the - * same ID. - */ + if (inspr->pr_id > jid) { + /* Found an opening. */ maxid = 0; break; } @@ -2028,15 +2017,13 @@ error = vfs_copyopt(opts, "lastjid", &jid, sizeof(jid)); if (error == 0) { TAILQ_FOREACH(pr, &allprison, pr_list) { - if (pr->pr_id > jid && prison_ischild(mypr, pr)) { - mtx_lock(&pr->pr_mtx); - if ((flags & JAIL_DYING) - ? prison_isvalid(pr) : prison_isalive(pr)) + if (pr->pr_id > jid && prison_ischild(mypr, pr) && + ((flags & JAIL_DYING) + ? prison_isvalid(pr) : prison_isalive(pr))) break; - mtx_unlock(&pr->pr_mtx); - } } if (pr != NULL) { + mtx_lock(&pr->pr_mtx); drflags |= PD_LOCKED; goto found_prison; } @@ -2052,8 +2039,8 @@ pr = prison_find_child(mypr, jid); if (pr != NULL) { drflags |= PD_LOCKED; - if (!(prison_isalive(pr) || - (flags & JAIL_DYING))) { + if (!((flags & JAIL_DYING) || + prison_isalive(pr))) { error = ENOENT; vfs_opterror(opts, "jail %d is dying", jid); @@ -2077,7 +2064,7 @@ pr = prison_find_name(mypr, name); if (pr != NULL) { drflags |= PD_LOCKED; - if (!(prison_isalive(pr) || (flags & JAIL_DYING))) { + if (!((flags & JAIL_DYING) || prison_isalive(pr))) { error = ENOENT; vfs_opterror(opts, "jail \"%s\" is dying", name); @@ -2295,8 +2282,8 @@ int sys_jail_remove(struct thread *td, struct jail_remove_args *uap) { - struct prison *pr, *cpr, *lpr, *tpr; - int descend, error; + struct prison *pr; + int error; error = priv_check(td, PRIV_JAIL_REMOVE); if (error) @@ -2308,86 +2295,16 @@ sx_xunlock(&allprison_lock); return (EINVAL); } - - /* Remove all descendants of this prison, then remove this prison. */ - prison_hold(pr); - if (!LIST_EMPTY(&pr->pr_children)) { + if (!prison_isalive(pr)) { + /* Silently ignore already-dying prisons. */ mtx_unlock(&pr->pr_mtx); - lpr = NULL; - FOREACH_PRISON_DESCENDANT(pr, cpr, descend) { - mtx_lock(&cpr->pr_mtx); - if (prison_isvalid(cpr)) { - tpr = cpr; - prison_hold(cpr); - } else { - /* Already removed - do not do it again. */ - tpr = NULL; - } - mtx_unlock(&cpr->pr_mtx); - if (lpr != NULL) { - mtx_lock(&lpr->pr_mtx); - prison_remove_one(lpr); - sx_xlock(&allprison_lock); - } - lpr = tpr; - } - if (lpr != NULL) { - mtx_lock(&lpr->pr_mtx); - prison_remove_one(lpr); - sx_xlock(&allprison_lock); - } - mtx_lock(&pr->pr_mtx); + sx_xunlock(&allprison_lock); + return (0); } - prison_remove_one(pr); + prison_deref(pr, PD_KILL | PD_LOCKED | PD_LIST_XLOCKED); return (0); } -static void -prison_remove_one(struct prison *pr) -{ - struct proc *p; - int drflags; - - drflags = PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED; - - /* If the prison was persistent, it is not anymore. */ - if (pr->pr_flags & PR_PERSIST) { - refcount_release(&pr->pr_ref); - drflags |= PD_DEUREF; - pr->pr_flags &= ~PR_PERSIST; - } - - /* - * jail_remove added a reference. If that's the only one, remove - * the prison now. refcount(9) doesn't guarantee the cache coherence - * of non-zero counters, so force it here. - */ - KASSERT(refcount_load(&pr->pr_ref) > 0, - ("prison_remove_one removing a dead prison (jid=%d)", pr->pr_id)); - if (atomic_load_acq_int(&pr->pr_ref) == 1) { - prison_deref(pr, drflags); - return; - } - - mtx_unlock(&pr->pr_mtx); - sx_xunlock(&allprison_lock); - drflags &= ~(PD_LOCKED | PD_LIST_XLOCKED); - /* - * Kill all processes unfortunate enough to be attached to this prison. - */ - sx_slock(&allproc_lock); - FOREACH_PROC_IN_SYSTEM(p) { - PROC_LOCK(p); - if (p->p_state != PRS_NEW && p->p_ucred && - p->p_ucred->cr_prison == pr) - kern_psignal(p, SIGKILL); - PROC_UNLOCK(p); - } - sx_sunlock(&allproc_lock); - /* Remove the temporary reference added by jail_remove. */ - prison_deref(pr, drflags); -} - /* * struct jail_attach_args { * int jid; @@ -2403,14 +2320,7 @@ if (error) return (error); - /* - * Start with exclusive hold on allprison_lock to ensure that a possible - * PR_METHOD_REMOVE call isn't concurrent with jail_set or jail_remove. - * But then immediately downgrade it since we don't need to stop - * readers. - */ - sx_xlock(&allprison_lock); - sx_downgrade(&allprison_lock); + sx_slock(&allprison_lock); pr = prison_find_child(td->td_ucred->cr_prison, uap->jid); if (pr == NULL) { sx_sunlock(&allprison_lock); @@ -2442,8 +2352,8 @@ * a process root from one prison, but attached to the jail * of another. */ - refcount_acquire(&pr->pr_ref); - refcount_acquire(&pr->pr_uref); + prison_hold(pr); + prison_proc_hold(pr); mtx_unlock(&pr->pr_mtx); /* Let modules do whatever they need to prepare for attaching. */ @@ -2490,6 +2400,18 @@ #endif prison_deref(oldcred->cr_prison, PD_DEREF | PD_DEUREF); crfree(oldcred); + + /* + * See if the target prison died between unlocking the prison + * and changing the credentials. + */ + if (pr->pr_state == PRISON_STATE_DYING) { + /* Follow the prison into death. */ + PROC_LOCK(p); + kern_psignal(p, SIGKILL); + PROC_UNLOCK(p); + } + return (0); e_unlock: @@ -2513,14 +2435,14 @@ sx_assert(&allprison_lock, SX_LOCKED); TAILQ_FOREACH(pr, &allprison, pr_list) { if (pr->pr_id == prid) { - mtx_lock(&pr->pr_mtx); - if (prison_isvalid(pr)) + if (prison_isvalid(pr)) { + mtx_lock(&pr->pr_mtx); return (pr); + } /* * Any active prison with the same ID would have * been inserted before a dead one. */ - mtx_unlock(&pr->pr_mtx); break; } if (pr->pr_id > prid) @@ -2540,11 +2462,9 @@ sx_assert(&allprison_lock, SX_LOCKED); FOREACH_PRISON_DESCENDANT(mypr, pr, descend) { - if (pr->pr_id == prid) { + if (pr->pr_id == prid && prison_isvalid(pr)) { mtx_lock(&pr->pr_mtx); - if (prison_isvalid(pr)) - return (pr); - mtx_unlock(&pr->pr_mtx); + return (pr); } } return (NULL); @@ -2562,26 +2482,20 @@ sx_assert(&allprison_lock, SX_LOCKED); mylen = (mypr == &prison0) ? 0 : strlen(mypr->pr_name) + 1; - again: deadpr = NULL; FOREACH_PRISON_DESCENDANT(mypr, pr, descend) { if (!strcmp(pr->pr_name + mylen, name)) { - mtx_lock(&pr->pr_mtx); - if (prison_isalive(pr)) + if (prison_isalive(pr)) { + mtx_lock(&pr->pr_mtx); return (pr); + } if (prison_isvalid(pr)) deadpr = pr; - mtx_unlock(&pr->pr_mtx); } } /* There was no valid prison - perhaps there was a dying one. */ - if (deadpr != NULL) { + if (deadpr != NULL) mtx_lock(&deadpr->pr_mtx); - if (!prison_isvalid(deadpr)) { - mtx_unlock(&deadpr->pr_mtx); - goto again; - } - } return (deadpr); } @@ -2624,8 +2538,9 @@ prison_hold(struct prison *pr) { #ifdef INVARIANTS - int was_valid = refcount_acquire_if_not_zero(&pr->pr_ref); + int was_valid; + was_valid = refcount_acquire_if_not_zero(&pr->pr_ref); KASSERT(was_valid, ("Trying to hold dead prison %p (jid=%d).", pr, pr->pr_id)); #else @@ -2635,45 +2550,52 @@ /* * Remove a prison reference. If that was the last reference, the - * prison will be removed (at a later time). Return with the prison - * unlocked. + * prison will be removed (at a later time). */ void prison_free_locked(struct prison *pr) { - int lastref; - mtx_assert(&pr->pr_mtx, MA_OWNED); + /* + * Locking is no longer required, but unlock because the caller + * expects it. + */ + mtx_unlock(&pr->pr_mtx); + prison_free(pr); +} + +void +prison_free(struct prison *pr) +{ + KASSERT(refcount_load(&pr->pr_ref) > 0, ("Trying to free dead prison %p (jid=%d).", pr, pr->pr_id)); - lastref = refcount_release(&pr->pr_ref); - mtx_unlock(&pr->pr_mtx); - if (lastref) { + if (!refcount_release_if_not_last(&pr->pr_ref)) { /* - * Don't remove the prison itself in this context, + * Don't remove the last reference in this context, * in case there are locks held. */ taskqueue_enqueue(taskqueue_thread, &pr->pr_task); } } -void -prison_free(struct prison *pr) +static void +prison_free_not_last(struct prison *pr) { +#ifdef INVARIANTS + int lastref; - /* - * Locking is only required when releasing the last reference. - * This allows assurance that a locked prison will remain valid - * until it is unlocked. - */ KASSERT(refcount_load(&pr->pr_ref) > 0, ("Trying to free dead prison %p (jid=%d).", pr, pr->pr_id)); - if (refcount_release_if_not_last(&pr->pr_ref)) - return; - mtx_lock(&pr->pr_mtx); - prison_free_locked(pr); + lastref = refcount_release(&pr->pr_ref); + KASSERT(!lastref, + ("prison_free_not_last freed last ref on prison %p (jid=%d).", + pr, pr->pr_id)); +#else + refcount_release(&pr>pr_ref); +#endif } /* @@ -2682,15 +2604,17 @@ * user-visible, except through the the jail system calls. It is also * an error to hold an invalid prison. A prison record will remain * alive as long as it has at least one user reference, and will not - * be set to the dying state was long as the prison mutex is held. + * be set to the dying state was long as either the prison mutex or + * the allprison lock is held (allprison_lock may be shared). */ void prison_proc_hold(struct prison *pr) { #ifdef INVARIANTS - int was_alive = refcount_acquire_if_not_zero(&pr->pr_uref); + int was_alive; - KASSERT(was_alive, + was_alive = refcount_acquire_if_not_zero(&pr->pr_uref); + KASSERT(was_alive && refcount_load(&pr->pr_ref) > 0, ("Cannot add a process to a non-alive prison (jid=%d)", pr->pr_id)); #else refcount_acquire(&pr->pr_uref); @@ -2706,13 +2630,8 @@ prison_proc_free(struct prison *pr) { - /* - * Locking is only required when releasing the last reference. - * This allows assurance that a locked prison will remain alive - * until it is unlocked. - */ KASSERT(refcount_load(&pr->pr_uref) > 0, - ("Trying to kill a process in a dead prison (jid=%d)", pr->pr_id)); + ("Trying to kill a process in a dying prison (jid=%d)", pr->pr_id)); if (!refcount_release_if_not_last(&pr->pr_uref)) { /* * Don't remove the last user reference in this context, @@ -2720,11 +2639,28 @@ * but also half dead. Add a reference so any calls to * prison_free() won't re-submit the task. */ - refcount_acquire(&pr->pr_ref); + prison_hold(pr); taskqueue_enqueue(taskqueue_thread, &pr->pr_task); } } +static void +prison_proc_free_not_last(struct prison *pr) +{ +#ifdef INVARIANTS + int lastref; + + KASSERT(refcount_load(&pr->pr_uref) > 0, + ("Trying to kill a process in a dead prison (jid=%d)", pr->pr_id)); + lastref = refcount_release(&pr->pr_uref); + KASSERT(!lastref, + ("prison_proc_free_not_last freed last uref on prison %p (jid=%d).", + pr, pr->pr_id)); +#else + refcount_release(&pr>pr_uref); +#endif +} + /* * Complete a call to either prison_free or prison_proc_free. */ @@ -2732,129 +2668,350 @@ prison_complete(void *context, int pending) { struct prison *pr = context; + int flags; - sx_xlock(&allprison_lock); - mtx_lock(&pr->pr_mtx); /* - * If this is completing a call to prison_proc_free, there will still - * be a user reference held; clear that as well as the reference that - * was added. No references are expected if this is completing a call - * to prison_free, but prison_deref is still called for the cleanup. + * This could be called to release the last reference, or the + * last user reference; the existence of a user reference implies + * the latter. There will always be a reference to remove, as + * prison_proc_free adds one. */ - prison_deref(pr, refcount_load(&pr->pr_uref) > 0 - ? PD_DEREF | PD_DEUREF | PD_LOCKED | PD_LIST_XLOCKED - : PD_LOCKED | PD_LIST_XLOCKED); + flags = prison_deref_lock(pr, PD_DEREF); + if (refcount_load(&pr->pr_uref) > 0) + flags |= PD_DEUREF; + prison_deref(pr, flags); } /* - * Remove a prison reference and/or user reference (usually). + * Remove a prison reference and/or user reference (usually), which + * may or may not end up removing the prison itself, or putting it into + * a "dying" state while it cleans itself up. Optionally forcibly + * remove a prison and its descendents, including killing all associated + * processes. + * * This assumes context that allows sleeping (for allprison_lock), * with no non-sleeping locks held, except perhaps the prison itself. - * If there are no more references, release and delist the prison. * On completion, the prison lock and the allprison lock are both * unlocked. */ static void prison_deref(struct prison *pr, int flags) { - struct prison *ppr, *tpr; - int lastref, lasturef; + struct prisonlist freeprison; + struct prison *killpr, *rpr, *ppr, *tpr; + struct proc *p; + int killflags; - if (!(flags & PD_LOCKED)) - mtx_lock(&pr->pr_mtx); + killpr = NULL; + TAILQ_INIT(&freeprison); + /* + * Release this prison as requested, which may cause its parent to be + * released, and then maybe its grandparent, etc. + */ for (;;) { + killflags = 0; + if (flags & PD_KILL) { + /* Kill the prison and its descendents. */ + flags &= ~PD_KILL; + flags = prison_deref_lock(pr, flags); + killflags = prison_deref_kill(pr, &freeprison); + if (killflags & PD_KILL) + killpr = pr; + } if (flags & PD_DEUREF) { + /* Drop a user reference. */ + flags &= ~PD_DEUREF; KASSERT(refcount_load(&pr->pr_uref) > 0, - ("prison_deref PD_DEUREF on a dead prison (jid=%d)", + ("prison_deref PD_DEUREF on a dying prison (jid=%d)", pr->pr_id)); - lasturef = refcount_release(&pr->pr_uref); - if (lasturef) - refcount_acquire(&pr->pr_ref); - KASSERT(refcount_load(&prison0.pr_uref) > 0, - ("prison0 pr_uref=0")); - } else - lasturef = 0; + if (!refcount_release_if_not_last(&pr->pr_uref)) { + flags = prison_deref_lock(pr, flags); + if (refcount_release(&pr->pr_uref) && + pr->pr_state != PRISON_STATE_DYING) { + /* + * When the last user reference goes, + * this becomes a DYING prison (unless + * it was one already). + */ + KASSERT( + refcount_load(&prison0.pr_ref) != 0, + ("prison0 pr_ref=0")); + if (pr->pr_state == PRISON_STATE_ALIVE) + { + prison_hold(pr); + mtx_unlock(&pr->pr_mtx); + (void)osd_jail_call(pr, + PR_METHOD_REMOVE, NULL); + mtx_lock(&pr->pr_mtx); + if (!(flags & PD_DEREF)) + flags |= PD_DEREF; + else + prison_free_not_last( + pr); + } + pr->pr_state = PRISON_STATE_DYING; + for (ppr = pr->pr_parent; + ppr != NULL; + ppr = ppr->pr_parent) + ppr->pr_childcount--; + /* This now refers to the parent. */ + flags |= PD_DEUREF; + } + } + } if (flags & PD_DEREF) { + /* Drop a reference. */ + flags &= ~PD_DEREF; KASSERT(refcount_load(&pr->pr_ref) > 0, ("prison_deref PD_DEREF on a dead prison (jid=%d)", pr->pr_id)); - lastref = refcount_release(&pr->pr_ref); - } - else - lastref = refcount_load(&pr->pr_ref) == 0; - mtx_unlock(&pr->pr_mtx); - - /* - * Tell the modules if the last user reference was removed - * (even it sticks around in dying state). - */ - if (lasturef) { - if (!(flags & (PD_LIST_SLOCKED | PD_LIST_XLOCKED))) { - if (atomic_load_acq_int(&pr->pr_ref) > 1) { - sx_slock(&allprison_lock); - flags |= PD_LIST_SLOCKED; - } else { - sx_xlock(&allprison_lock); - flags |= PD_LIST_XLOCKED; + if (!refcount_release_if_not_last(&pr->pr_ref)) { + flags = prison_deref_lock(pr, flags); + if (refcount_release(&pr->pr_ref)) { + /* + * When the last reference goes, + * prepare to remove the prison. + */ + KASSERT( + refcount_load(&pr->pr_uref) == 0, + ("prison_deref: last ref, " + "but still has %d urefs (jid=%d)", + pr->pr_uref, pr->pr_id)); + KASSERT( + refcount_load(&prison0.pr_ref) != 0, + ("prison0 pr_ref=0")); + TAILQ_REMOVE(&allprison, pr, pr_list); + TAILQ_INSERT_TAIL(&freeprison, pr, + pr_list); + LIST_REMOVE(pr, pr_sibling); + /* This now refers to the parent. */ + flags |= PD_DEREF; } } - (void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL); - mtx_lock(&pr->pr_mtx); - lastref = refcount_release(&pr->pr_ref); + } + flags |= killflags & (PD_DEUREF | PD_DEREF); + if (flags & (PD_DEUREF | PD_DEREF)) { + /* + * A prison was marked as dying or removed, which + * means its parent now drops a reference. + */ mtx_unlock(&pr->pr_mtx); + pr = pr->pr_parent; + mtx_lock(&pr->pr_mtx); + continue; } + break; + } - /* If the prison still has references, nothing else to do. */ - if (!lastref) { - if (flags & PD_LIST_SLOCKED) - sx_sunlock(&allprison_lock); - else if (flags & PD_LIST_XLOCKED) - sx_xunlock(&allprison_lock); - return; - } + /* Release all the prison locks. */ + if (flags & PD_LOCKED) + mtx_unlock(&pr->pr_mtx); + if (flags & PD_LIST_SLOCKED) + sx_sunlock(&allprison_lock); + else if (flags & PD_LIST_XLOCKED) + sx_xunlock(&allprison_lock); - if (flags & PD_LIST_SLOCKED) { - if (!sx_try_upgrade(&allprison_lock)) { - sx_sunlock(&allprison_lock); - sx_xlock(&allprison_lock); + if (killpr != NULL) { + /* + * The killed prison or descendants still had some user + * references, which are likely attached processes. + * So find and kill any such processes. + */ + sx_slock(&allproc_lock); + FOREACH_PROC_IN_SYSTEM(p) { + PROC_LOCK(p); + if (p->p_state != PRS_NEW && p->p_ucred != NULL) { + for (ppr = p->p_ucred->cr_prison; + ppr != &prison0; + ppr = ppr->pr_parent) + if (ppr == killpr) { + kern_psignal(p, SIGKILL); + break; + } } - } else if (!(flags & PD_LIST_XLOCKED)) - sx_xlock(&allprison_lock); - - TAILQ_REMOVE(&allprison, pr, pr_list); - LIST_REMOVE(pr, pr_sibling); - ppr = pr->pr_parent; - for (tpr = ppr; tpr != NULL; tpr = tpr->pr_parent) - tpr->pr_childcount--; - sx_xunlock(&allprison_lock); + PROC_UNLOCK(p); + } + sx_sunlock(&allproc_lock); + } + TAILQ_FOREACH_SAFE(rpr, &freeprison, pr_list, tpr) { + /* + * Finish removing unreferenced prisons, which couldn't happen + * while allprison_lock was held (to avoid a LOR on vrele). + */ #ifdef VIMAGE - if (pr->pr_vnet != ppr->pr_vnet) - vnet_destroy(pr->pr_vnet); + if (rpr->pr_vnet != rpr->pr_parent->pr_vnet) + vnet_destroy(rpr->pr_vnet); #endif - if (pr->pr_root != NULL) - vrele(pr->pr_root); - mtx_destroy(&pr->pr_mtx); + if (rpr->pr_root != NULL) + vrele(rpr->pr_root); + mtx_destroy(&rpr->pr_mtx); #ifdef INET - free(pr->pr_ip4, M_PRISON); + free(rpr->pr_ip4, M_PRISON); #endif #ifdef INET6 - free(pr->pr_ip6, M_PRISON); + free(rpr->pr_ip6, M_PRISON); #endif - if (pr->pr_cpuset != NULL) - cpuset_rel(pr->pr_cpuset); - osd_jail_exit(pr); + if (rpr->pr_cpuset != NULL) + cpuset_rel(rpr->pr_cpuset); + osd_jail_exit(rpr); #ifdef RACCT if (racct_enable) - prison_racct_detach(pr); + prison_racct_detach(rpr); #endif - free(pr, M_PRISON); + TAILQ_REMOVE(&freeprison, rpr, pr_list); + free(rpr, M_PRISON); + } +} + +/* + * Make sure allprison_lock is held exclusive, and the prison is locked. + * Return the new deref flags. + */ +static int +prison_deref_lock(struct prison *pr, int flags) +{ - /* Removing a prison frees a reference on its parent. */ - pr = ppr; + if (!(flags & PD_LIST_XLOCKED)) { + /* + * Get allprison_lock, which may be an upgrade, + * and may require unlocking the prison. + */ + if (flags & PD_LOCKED) { + mtx_lock(&pr->pr_mtx); + flags &= PD_LOCKED; + } + if (flags & PD_LIST_SLOCKED) { + if (!sx_try_upgrade(&allprison_lock)) { + sx_sunlock(&allprison_lock); + sx_xlock(&allprison_lock); + } + flags &= ~PD_LIST_SLOCKED; + } else + sx_xlock(&allprison_lock); + flags |= PD_LIST_XLOCKED; + } + if (!(flags & PD_LOCKED)) { + /* Lock the prison mutex. */ mtx_lock(&pr->pr_mtx); - flags = PD_DEREF | PD_DEUREF; + flags |= PD_LOCKED; } + return flags; +} + +/* + * Kill the prison and its descendants. Mark them as dying, clear the + * persist flag, and call module remove methods. Return flags + * indicating further action required. + */ +static int +prison_deref_kill(struct prison *pr, struct prisonlist *freeprison) +{ + struct prison *cpr, *ppr; + int flags, cflags, killed; + bool descend; + + /* If the prison is already dying, there's nothing to kill. */ + if (pr->pr_state == PRISON_STATE_DYING) + return 0; + /* + * The operation on the prison and each descendant is similar to what + * prison_deref() does when losing the last user or system reference, + * plus extra work to clear PR_PERSIST. + */ + flags = 0; + killed = 1; + prison_deref_kill_descend(pr); + + FOREACH_PRISON_DESCENDANT_PRE_POST(pr, cpr, descend) { + if (descend) { + if (cpr->pr_state != PRISON_STATE_ALIVE) { + /* + * Only kill alive descendents, as dying + * prison don't need killing, and new + * prisons will kill themselves later. + */ + descend = false; + continue; + } + killed++; + mtx_lock(&cpr->pr_mtx); + prison_deref_kill_descend(cpr); + } else { + /* + * PR_REMOVE should only be set within this function, + * with its exclusive hold on allprison_lock, so we + * don't need to worry about other threads' caches. + */ + if (!(cpr->pr_flags & PR_REMOVE)) + continue; + cflags = prison_deref_kill_ascend(cpr, freeprison); + flags |= (cflags & PD_KILL); + mtx_unlock(&cpr->pr_mtx); + if (!refcount_release_if_not_last( + &cpr->pr_parent->pr_uref)) { + mtx_lock(&cpr->pr_parent->pr_mtx); + (void)refcount_release( + &cpr->pr_parent->pr_uref); + mtx_unlock(&cpr->pr_parent->pr_mtx); + } + if (cflags & PD_DEREF) + prison_free_not_last(cpr->pr_parent); + } + } + + flags |= prison_deref_kill_ascend(pr, freeprison); + for (ppr = pr->pr_parent; + ppr != NULL; + ppr = ppr->pr_parent) + ppr->pr_childcount -= killed; + + /* + * Disconnect unreferenced descendant prisons from their parents, + * which couldn't easily be done mid-loop. + */ + TAILQ_FOREACH(cpr, freeprison, pr_list) + LIST_REMOVE(cpr, pr_sibling); + return flags; +} + +static void +prison_deref_kill_descend(struct prison *pr) +{ + + pr->pr_state = PRISON_STATE_DYING; + pr->pr_flags |= PR_REMOVE; + prison_hold(pr); + mtx_unlock(&pr->pr_mtx); +} + +static int +prison_deref_kill_ascend(struct prison *pr, struct prisonlist *freeprison) +{ + int flags; + + flags = PD_DEUREF; + (void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL); + mtx_lock(&pr->pr_mtx); + pr->pr_flags &= ~PR_REMOVE; + if (pr->pr_flags & PR_PERSIST) { + pr->pr_flags &= ~PR_PERSIST; + if (!refcount_release(&pr->pr_uref)) + flags |= PD_KILL; + prison_free_not_last(pr); + } else if (refcount_load(&pr->pr_uref) > 0) + flags |= PD_KILL; + if (refcount_release(&pr->pr_ref)) { + KASSERT(refcount_load(&pr->pr_uref) == 0, + ("prison_deref_kill: last ref, " + "but still has %d urefs (jid=%d)", + pr->pr_uref, pr->pr_id)); + TAILQ_REMOVE(&allprison, pr, pr_list); + TAILQ_INSERT_TAIL(freeprison, pr, pr_list); + flags |= PD_DEREF; + } + pr->pr_childcount = 0; + return flags; } /* @@ -3019,32 +3176,34 @@ } /* - * Return true if the prison is currently alive. A prison is alive if it is - * valid and it holds user references. + * Return true if the prison is currently alive. */ bool prison_isalive(struct prison *pr) { - mtx_assert(&pr->pr_mtx, MA_OWNED); - if (__predict_false(refcount_load(&pr->pr_ref) == 0)) - return (false); - if (__predict_false(refcount_load(&pr->pr_uref) == 0)) + if (__predict_false(pr->pr_state != PRISON_STATE_ALIVE)) return (false); return (true); } /* - * Return true if the prison is currently valid. A prison is valid if it has - * been fully created, and is not being destroyed. Note that dying prisons - * are still considered valid. + * Return true if the prison is currently valid, i.e. is has been fully + * created. Note that dying prisons are still considered valid. */ bool prison_isvalid(struct prison *pr) { - mtx_assert(&pr->pr_mtx, MA_OWNED); - if (__predict_false(refcount_load(&pr->pr_ref) == 0)) + /* + * A prison is also invalid if it has no references, but that should + * never be the case when the right locks are held (prison mutex, or + * allprison_lock at least shared). + */ + KASSERT(refcount_load(&pr->pr_ref) > 0, + ("prison_invalid checking dead prison %p (jid=%d).", + pr, pr->pr_id)); + if (__predict_false(pr->pr_state == PRISON_STATE_INVALID)) return (false); return (true); } @@ -3678,6 +3837,8 @@ #if defined(INET) || defined(INET6) again: #endif + if (!prison_isvalid(cpr)) + continue; mtx_lock(&cpr->pr_mtx); #ifdef INET if (cpr->pr_ip4s > 0) { @@ -3705,15 +3866,10 @@ cpr->pr_ip6s * sizeof(struct in6_addr)); } #endif - if (!prison_isvalid(cpr)) { - mtx_unlock(&cpr->pr_mtx); - continue; - } bzero(xp, sizeof(*xp)); xp->pr_version = XPRISON_VERSION; xp->pr_id = cpr->pr_id; - xp->pr_state = prison_isalive(cpr) - ? PRISON_STATE_ALIVE : PRISON_STATE_DYING; + xp->pr_state = cpr->pr_state; strlcpy(xp->pr_path, prison_path(pr, cpr), sizeof(xp->pr_path)); strlcpy(xp->pr_host, cpr->pr_hostname, sizeof(xp->pr_host)); strlcpy(xp->pr_name, prison_name(pr, cpr), sizeof(xp->pr_name)); @@ -4364,6 +4520,7 @@ db_printf(" parent = %p\n", pr->pr_parent); db_printf(" ref = %d\n", pr->pr_ref); db_printf(" uref = %d\n", pr->pr_uref); + db_printf(" state = %d\n", pr->pr_state); db_printf(" path = %s\n", pr->pr_path); db_printf(" cpuset = %d\n", pr->pr_cpuset ? pr->pr_cpuset->cs_id : -1); diff --git a/sys/sys/jail.h b/sys/sys/jail.h --- a/sys/sys/jail.h +++ b/sys/sys/jail.h @@ -88,9 +88,11 @@ }; #define XPRISON_VERSION 3 -#define PRISON_STATE_INVALID 0 -#define PRISON_STATE_ALIVE 1 -#define PRISON_STATE_DYING 2 +enum prison_state { + PRISON_STATE_INVALID = 0, /* New prison, not ready to be seen */ + PRISON_STATE_ALIVE, /* Current prison, visible to all */ + PRISON_STATE_DYING /* Removed, but holding resources, */ +}; /* optionally visible. */ /* * Flags for jail_set and jail_get. @@ -99,7 +101,7 @@ #define JAIL_UPDATE 0x02 /* Update parameters of existing jail */ #define JAIL_ATTACH 0x04 /* Attach to jail upon creation */ #define JAIL_DYING 0x08 /* Allow getting a dying jail */ -#define JAIL_SET_MASK 0x0f +#define JAIL_SET_MASK 0x0f /* JAIL_DYING is deprecated/ignored here */ #define JAIL_GET_MASK 0x08 #define JAIL_SYS_DISABLE 0 @@ -155,7 +157,9 @@ * (m) locked by pr_mtx * (p) locked by pr_mtx, and also at least shared allprison_lock required * to update - * (r) atomic via refcount(9), pr_mtx required to decrement to zero + * (q) locked both pr_mtx and allprison_lock + * (r) atomic via refcount(9), pr_mtx and allprison_lock required to + * decrement to zero */ struct prison { TAILQ_ENTRY(prison) pr_list; /* (a) all prisons */ @@ -179,12 +183,13 @@ struct prison_racct *pr_prison_racct; /* (c) racct jail proxy */ void *pr_sparep[3]; int pr_childcount; /* (a) number of child jails */ - int pr_childmax; /* (p) maximum child jails */ + int pr_childmax; /* (a) maximum child jails */ unsigned pr_allow; /* (p) PR_ALLOW_* flags */ int pr_securelevel; /* (p) securelevel */ int pr_enforce_statfs; /* (p) statfs permission */ int pr_devfs_rsnum; /* (p) devfs ruleset */ - int pr_spare[3]; + enum prison_state pr_state; /* (q) state in life cycle */ + int pr_spare[2]; int pr_osreldate; /* (c) kern.osreldate value */ unsigned long pr_hostid; /* (p) jail hostid */ char pr_name[MAXHOSTNAMELEN]; /* (p) admin jail name */ @@ -216,6 +221,7 @@ /* primary jail address. */ /* Internal flag bits */ +#define PR_REMOVE 0x01000000 /* In process of being removed */ #define PR_IP4 0x02000000 /* IPv4 restricted or disabled */ /* by this jail or an ancestor */ #define PR_IP6 0x04000000 /* IPv6 restricted or disabled */ @@ -333,6 +339,19 @@ ; \ else +/* + * As FOREACH_PRISON_DESCENDANT, but visit both preorder and postorder. + */ +#define FOREACH_PRISON_DESCENDANT_PRE_POST(ppr, cpr, descend) \ + for ((cpr) = (ppr), (descend) = 1; \ + ((cpr) = (descend) \ + ? ((descend) = !LIST_EMPTY(&(cpr)->pr_children)) \ + ? LIST_FIRST(&(cpr)->pr_children) \ + : (cpr) \ + : ((descend) = LIST_NEXT(cpr, pr_sibling) != NULL) \ + ? LIST_NEXT(cpr, pr_sibling) \ + : cpr->pr_parent) != (ppr);) + /* * Attributes of the physical system, and the root of the jail tree. */ diff --git a/usr.sbin/jail/jail.8 b/usr.sbin/jail/jail.8 --- a/usr.sbin/jail/jail.8 +++ b/usr.sbin/jail/jail.8 @@ -25,7 +25,7 @@ .\" .\" $FreeBSD$ .\" -.Dd November 18, 2020 +.Dd January 25, 2021 .Dt JAIL 8 .Os .Sh NAME @@ -136,10 +136,6 @@ .Pp Other available options are: .Bl -tag -width indent -.It Fl d -Allow making changes to a dying jail, equivalent to the -.Va allow.dying -parameter. .It Fl f Ar conf_file Use configuration file .Ar conf_file @@ -207,6 +203,17 @@ .It Fl v Print a message on every operation, such as running commands and mounting filesystems. +.It Fl d +This is deprecated and is equivalent to the +.Va allow.dying +parameter, which is also deprecated. +It used to allow making changes to a +.Va dying +jail. +Now such jails are always replaced when a new jail is created with the same +.Va jid +or +.Va name . .El .Pp If no arguments are given after the options, the operation (except @@ -903,9 +910,14 @@ .Pa /proc directory. .It Va allow.dying -Allow making changes to a +This deprecated and has no effect. +It used to allow making changes to a .Va dying jail. +Now such jails are always replaced when a new jail is created with the same +.Va jid +or +.Va name . .It Va depend Specify a jail (or jails) that this jail depends on. When this jail is to be created, any jail(s) it depends on must already exist. diff --git a/usr.sbin/jail/jail.c b/usr.sbin/jail/jail.c --- a/usr.sbin/jail/jail.c +++ b/usr.sbin/jail/jail.c @@ -65,7 +65,7 @@ static void clear_persist(struct cfjail *j); static int update_jail(struct cfjail *j); static int rdtun_params(struct cfjail *j, int dofail); -static void running_jid(struct cfjail *j, int dflag); +static void running_jid(struct cfjail *j); static void jail_quoted_warnx(const struct cfjail *j, const char *name_msg, const char *noname_msg); static int jailparam_set_note(const struct cfjail *j, struct jailparam *jp, @@ -140,7 +140,7 @@ char *JidFile; size_t sysvallen; unsigned op, pi; - int ch, docf, error, i, oldcl, sysval; + int ch, docf, error, i, oldcl, sysval, dying_warned; int dflag, Rflag; #if defined(INET) || defined(INET6) char *cs, *ncs; @@ -377,6 +377,7 @@ * operation on it. When that is done, the jail may be finished, * or it may go back for the next step. */ + dying_warned = 0; while ((j = next_jail())) { if (j->flags & JF_FAILED) { @@ -397,11 +398,13 @@ import_params(j) < 0) continue; } + if (j->intparams[IP_ALLOW_DYING] && !dying_warned) { + warnx("%s", "the 'allow.dying' parameter and '-d' flag" + "are deprecated and have no effect."); + dying_warned = 1; + } if (!j->jid) - running_jid(j, - (j->flags & (JF_SET | JF_DEPEND)) == JF_SET - ? dflag || bool_param(j->intparams[IP_ALLOW_DYING]) - : 0); + running_jid(j); if (finish_command(j)) continue; @@ -613,11 +616,10 @@ int create_jail(struct cfjail *j) { - struct iovec jiov[4]; struct stat st; - struct jailparam *jp, *setparams, *setparams2, *sjp; + struct jailparam *jp, *setparams, *sjp; const char *path; - int dopersist, ns, jid, dying, didfail; + int dopersist, ns; /* * Check the jail's path, with a better error message than jail_set @@ -657,57 +659,8 @@ *sjp++ = *jp; ns = sjp - setparams; - didfail = 0; j->jid = jailparam_set_note(j, setparams, ns, JAIL_CREATE); - if (j->jid < 0 && errno == EEXIST && - bool_param(j->intparams[IP_ALLOW_DYING]) && - int_param(j->intparams[KP_JID], &jid) && jid != 0) { - /* - * The jail already exists, but may be dying. - * Make sure it is, in which case an update is appropriate. - */ - jiov[0].iov_base = __DECONST(char *, "jid"); - jiov[0].iov_len = sizeof("jid"); - jiov[1].iov_base = &jid; - jiov[1].iov_len = sizeof(jid); - jiov[2].iov_base = __DECONST(char *, "dying"); - jiov[2].iov_len = sizeof("dying"); - jiov[3].iov_base = &dying; - jiov[3].iov_len = sizeof(dying); - if (jail_get(jiov, 4, JAIL_DYING) < 0) { - /* - * It could be that the jail just barely finished - * dying, or it could be that the jid never existed - * but the name does. In either case, another try - * at creating the jail should do the right thing. - */ - if (errno == ENOENT) - j->jid = jailparam_set_note(j, setparams, ns, - JAIL_CREATE); - } else if (dying) { - j->jid = jid; - if (rdtun_params(j, 1) < 0) { - j->jid = -1; - didfail = 1; - } else { - sjp = setparams2 = alloca((j->njp + dopersist) * - sizeof(struct jailparam)); - for (jp = setparams; jp < setparams + ns; jp++) - if (!JP_RDTUN(jp) || - !strcmp(jp->jp_name, "jid")) - *sjp++ = *jp; - j->jid = jailparam_set_note(j, setparams2, - sjp - setparams2, JAIL_UPDATE | JAIL_DYING); - /* - * Again, perhaps the jail just finished dying. - */ - if (j->jid < 0 && errno == ENOENT) - j->jid = jailparam_set_note(j, - setparams, ns, JAIL_CREATE); - } - } - } - if (j->jid < 0 && !didfail) { + if (j->jid < 0) { jail_warnx(j, "%s", jail_errmsg); failed(j); } @@ -772,9 +725,7 @@ if (!JP_RDTUN(jp)) *++sjp = *jp; - jid = jailparam_set_note(j, setparams, ns, - bool_param(j->intparams[IP_ALLOW_DYING]) - ? JAIL_UPDATE | JAIL_DYING : JAIL_UPDATE); + jid = jailparam_set_note(j, setparams, ns, JAIL_UPDATE); if (jid < 0) { jail_warnx(j, "%s", jail_errmsg); failed(j); @@ -813,8 +764,7 @@ rtjp->jp_value = NULL; } rval = 0; - if (jailparam_get(rtparams, nrt, - bool_param(j->intparams[IP_ALLOW_DYING]) ? JAIL_DYING : 0) > 0) { + if (jailparam_get(rtparams, nrt, 0) > 0) { rtjp = rtparams + 1; for (jp = j->jp; rtjp < rtparams + nrt; jp++) { if (JP_RDTUN(jp) && strcmp(jp->jp_name, "jid")) { @@ -851,7 +801,7 @@ * Get the jail's jid if it is running. */ static void -running_jid(struct cfjail *j, int dflag) +running_jid(struct cfjail *j) { struct iovec jiov[2]; const char *pval; @@ -877,7 +827,7 @@ j->jid = -1; return; } - j->jid = jail_get(jiov, 2, dflag ? JAIL_DYING : 0); + j->jid = jail_get(jiov, 2, 0); } static void @@ -906,10 +856,9 @@ jid = jailparam_set(jp, njp, flags); if (verbose > 0) { - jail_note(j, "jail_set(%s%s)", + jail_note(j, "jail_set(%s)", (flags & (JAIL_CREATE | JAIL_UPDATE)) == JAIL_CREATE - ? "JAIL_CREATE" : "JAIL_UPDATE", - (flags & JAIL_DYING) ? " | JAIL_DYING" : ""); + ? "JAIL_CREATE" : "JAIL_UPDATE"); for (i = 0; i < njp; i++) { printf(" %s", jp[i].jp_name); if (jp[i].jp_value == NULL)