Page MenuHomeFreeBSD

D28150.id82895.diff
No OneTemporary

D28150.id82895.diff

diff --git a/lib/libc/sys/jail.2 b/lib/libc/sys/jail.2
--- a/lib/libc/sys/jail.2
+++ b/lib/libc/sys/jail.2
@@ -25,7 +25,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd February 8, 2012
+.Dd January 25, 2021
.Dt JAIL 2
.Os
.Sh NAME
@@ -187,7 +187,12 @@
.Fn jail_attach
system call.
.It Dv JAIL_DYING
-Allow setting a jail that is in the process of being removed.
+This is deprecated and has no effect.
+It used to allow setting a jail that is in the process of being removed.
+Now such jails are always replaced when a new jail is created with the same
+.Va jid
+or
+.Va name .
.El
.Pp
The
diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c
@@ -710,6 +710,16 @@
*/
knote_fork(p1->p_klist, p2->p_pid);
+ /*
+ * See if the containing prison died while the process was still new.
+ */
+ if (!prison_isalive(p2->p_ucred->cr_prison)) {
+ /* Folow the prison into death. */
+ PROC_LOCK(p2);
+ kern_psignal(p2, SIGKILL);
+ PROC_UNLOCK(p2);
+ }
+
/*
* Now can be swapped.
*/
diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c
--- a/sys/kern/kern_jail.c
+++ b/sys/kern/kern_jail.c
@@ -106,6 +106,7 @@
.pr_path = "/",
.pr_securelevel = -1,
.pr_devfs_rsnum = 0,
+ .pr_state = PRISON_STATE_ALIVE,
.pr_childmax = JAIL_MAX,
.pr_hostuuid = DEFAULT_HOSTUUID,
.pr_children = LIST_HEAD_INITIALIZER(prison0.pr_children),
@@ -140,10 +141,16 @@
static int do_jail_attach(struct thread *td, struct prison *pr);
static void prison_complete(void *context, int pending);
static void prison_deref(struct prison *pr, int flags);
+static int prison_deref_lock(struct prison *pr, int flags);
+static int prison_deref_kill(struct prison *pr, struct prisonlist *freeprison);
+static void prison_deref_kill_descend(struct prison *pr);
+static int prison_deref_kill_ascend(struct prison *pr,
+ struct prisonlist *freeprison);
+static void prison_free_not_last(struct prison *pr);
+static void prison_proc_free_not_last(struct prison *pr);
static void prison_set_allow_locked(struct prison *pr, unsigned flag,
int enable);
static char *prison_path(struct prison *pr1, struct prison *pr2);
-static void prison_remove_one(struct prison *pr);
#ifdef RACCT
static void prison_racct_attach(struct prison *pr);
static void prison_racct_modify(struct prison *pr);
@@ -153,9 +160,10 @@
/* Flags for prison_deref */
#define PD_DEREF 0x01 /* Decrement pr_ref */
#define PD_DEUREF 0x02 /* Decrement pr_uref */
-#define PD_LOCKED 0x04 /* pr_mtx is held */
-#define PD_LIST_SLOCKED 0x08 /* allprison_lock is held shared */
-#define PD_LIST_XLOCKED 0x10 /* allprison_lock is held exclusive */
+#define PD_KILL 0x04 /* Remove jail, kill processes, etc */
+#define PD_LOCKED 0x08 /* pr_mtx is held */
+#define PD_LIST_SLOCKED 0x10 /* allprison_lock is held shared */
+#define PD_LIST_XLOCKED 0x20 /* allprison_lock is held exclusive */
/*
* Parameter names corresponding to PR_* flag values. Size values are for kvm
@@ -526,10 +534,10 @@
#endif
unsigned long hid;
size_t namelen, onamelen, pnamelen;
- int born, created, cuflags, descend, drflags, enforce;
+ int created, cuflags, descend, drflags, enforce;
int error, errmsg_len, errmsg_pos;
int gotchildmax, gotenforce, gothid, gotrsnum, gotslevel;
- int jid, jsys, len, level;
+ int jid, jsys, len, level, tjid;
int childmax, osreldt, rsnum, slevel;
#if defined(INET) || defined(INET6)
int ii, ij;
@@ -540,9 +548,8 @@
#ifdef INET6
int ip6s, redo_ip6;
#endif
- uint64_t pr_allow, ch_allow, pr_flags, ch_flags;
+ uint64_t pr_allow, ch_allow, pr_flags, ch_flags, tallow;
uint64_t pr_allow_diff;
- unsigned tallow;
char numbuf[12];
error = priv_check(td, PRIV_JAIL_SET);
@@ -550,9 +557,6 @@
error = priv_check(td, PRIV_JAIL_ATTACH);
if (error)
return (error);
- mypr = td->td_ucred->cr_prison;
- if ((flags & JAIL_CREATE) && mypr->pr_childmax == 0)
- return (EPERM);
if (flags & ~JAIL_SET_MASK)
return (EINVAL);
@@ -661,12 +665,6 @@
}
ch_flags |= jsf->new | jsf->disable;
}
- if ((flags & (JAIL_CREATE | JAIL_UPDATE | JAIL_ATTACH)) == JAIL_CREATE
- && !(pr_flags & PR_PERSIST)) {
- error = EINVAL;
- vfs_opterror(opts, "new jail must persist or attach");
- goto done_errmsg;
- }
#ifdef VIMAGE
if ((flags & JAIL_UPDATE) && (ch_flags & PR_VNET)) {
error = EINVAL;
@@ -984,9 +982,10 @@
* Find the specified jail, or at least its parent.
* This abuses the file error codes ENOENT and EEXIST.
*/
+ ppr = mypr = td->td_ucred->cr_prison;
pr = NULL;
- ppr = mypr;
inspr = NULL;
+ deadpr = NULL;
if (cuflags == JAIL_CREATE && jid == 0 && name != NULL) {
namelc = strrchr(name, '.');
jid = strtoul(namelc != NULL ? namelc + 1 : name, &p, 10);
@@ -1006,68 +1005,45 @@
* where it can be inserted later.
*/
TAILQ_FOREACH(inspr, &allprison, pr_list) {
- if (inspr->pr_id == jid) {
- mtx_lock(&inspr->pr_mtx);
- if (prison_isvalid(inspr)) {
- pr = inspr;
- drflags |= PD_LOCKED;
- inspr = NULL;
- } else
- mtx_unlock(&inspr->pr_mtx);
- break;
- }
+ if (inspr->pr_id < jid)
+ continue;
if (inspr->pr_id > jid)
break;
- }
- if (pr != NULL) {
- ppr = pr->pr_parent;
- /* Create: jid must not exist. */
- if (cuflags == JAIL_CREATE) {
- /*
- * Even creators that cannot see the jail will
- * get EEXIST.
- */
- error = EEXIST;
- vfs_opterror(opts, "jail %d already exists",
- jid);
- goto done_deref;
- }
- if (!prison_ischild(mypr, pr)) {
- /*
- * Updaters get ENOENT if they cannot see the
- * jail. This is true even for CREATE | UPDATE,
- * which normally cannot give this error.
- */
- error = ENOENT;
- vfs_opterror(opts, "jail %d not found", jid);
- goto done_deref;
- } else if (!prison_isalive(pr)) {
- if (!(flags & JAIL_DYING)) {
- error = ENOENT;
- vfs_opterror(opts, "jail %d is dying",
- jid);
- goto done_deref;
- } else if ((flags & JAIL_ATTACH) ||
- (pr_flags & PR_PERSIST)) {
- /*
- * A dying jail might be resurrected
- * (via attach or persist), but first
- * it must determine if another jail
- * has claimed its name. Accomplish
- * this by implicitly re-setting the
- * name.
- */
- if (name == NULL)
- name = prison_name(mypr, pr);
- }
- }
- } else {
- /* Update: jid must exist. */
- if (cuflags == JAIL_UPDATE) {
- error = ENOENT;
- vfs_opterror(opts, "jail %d not found", jid);
- goto done_deref;
+ if (inspr->pr_state != PRISON_STATE_DYING) {
+ /* The jail exists. */
+ pr = inspr;
+ mtx_lock(&pr->pr_mtx);
+ drflags |= PD_LOCKED;
+ ppr = pr->pr_parent;
+ } else {
+ /* Note a dying jail to handle later. */
+ deadpr = inspr;
}
+ inspr = NULL;
+ break;
+ }
+
+ if (cuflags == JAIL_CREATE && pr != NULL) {
+ /*
+ * Creators get EEXIST if the jail already exists,
+ * even if they cannot see it.
+ */
+ error = EEXIST;
+ vfs_opterror(opts, "jail %d already exists", jid);
+ goto done_deref;
+ }
+ if ((pr == NULL)
+ ? cuflags == JAIL_UPDATE
+ : !prison_isalive(pr) || !prison_ischild(mypr, pr)) {
+ /*
+ * Updaters get ENOENT for noexistent jails,
+ * or if the jail exists but they cannot see it.
+ * The latter case is true even for CREATE | UPDATE,
+ * which normally cannot give this error.
+ */
+ error = ENOENT;
+ vfs_opterror(opts, "jail %d not found", jid);
+ goto done_deref;
}
}
/*
@@ -1099,7 +1075,9 @@
} else {
*namelc = '\0';
ppr = prison_find_name(mypr, name);
- if (ppr == NULL) {
+ if (ppr == NULL || !prison_isalive(ppr)) {
+ if (ppr != NULL)
+ mtx_unlock(&ppr->pr_mtx);
error = ENOENT;
vfs_opterror(opts,
"jail \"%s\" not found", name);
@@ -1113,61 +1091,34 @@
if (namelc[0] != '\0') {
pnamelen =
(ppr == &prison0) ? 0 : strlen(ppr->pr_name) + 1;
- name_again:
- deadpr = NULL;
FOREACH_PRISON_CHILD(ppr, tpr) {
- if (tpr != pr &&
- !strcmp(tpr->pr_name + pnamelen, namelc)) {
- mtx_lock(&tpr->pr_mtx);
- if (prison_isalive(tpr)) {
- if (pr == NULL &&
- cuflags != JAIL_CREATE) {
- /*
- * Use this jail
- * for updates.
- */
- pr = tpr;
- drflags |= PD_LOCKED;
- break;
- }
- /*
- * Create, or update(jid):
- * name must not exist in an
- * active sibling jail.
- */
- error = EEXIST;
- mtx_unlock(&tpr->pr_mtx);
- vfs_opterror(opts,
- "jail \"%s\" already exists",
- name);
- goto done_deref;
- }
- if (pr == NULL &&
- cuflags != JAIL_CREATE &&
- prison_isvalid(tpr))
- deadpr = tpr;
- mtx_unlock(&tpr->pr_mtx);
- }
- }
- /* If no active jail is found, use a dying one. */
- if (deadpr != NULL && pr == NULL) {
- if (flags & JAIL_DYING) {
- mtx_lock(&deadpr->pr_mtx);
- if (!prison_isvalid(deadpr)) {
- mtx_unlock(&deadpr->pr_mtx);
- goto name_again;
- }
- pr = deadpr;
- drflags |= PD_LOCKED;
- } else if (cuflags == JAIL_UPDATE) {
- error = ENOENT;
+ if (tpr == pr ||
+ tpr->pr_state == PRISON_STATE_DYING ||
+ strcmp(tpr->pr_name + pnamelen, namelc))
+ continue;
+ if (cuflags == JAIL_CREATE || pr != NULL) {
+ /*
+ * Create, or update(jid): name must
+ * not exist in an active sibling jail.
+ */
+ error = EEXIST;
vfs_opterror(opts,
- "jail \"%s\" is dying", name);
+ "jail \"%s\" already exists", name);
goto done_deref;
}
+ /* Use this jail for updates. */
+ pr = tpr;
+ mtx_lock(&pr->pr_mtx);
+ drflags |= PD_LOCKED;
+ break;
}
- /* Update: name must exist if no jid. */
- else if (cuflags == JAIL_UPDATE && pr == NULL) {
+ /*
+ * Update: name must exist if no jid. As with the jid
+ * case, the jail must be currently visible, or else
+ * even CREATE | UPDATE will get an error.
+ */
+ if ((pr == NULL)
+ ? cuflags == JAIL_UPDATE : !prison_isalive(pr)) {
error = ENOENT;
vfs_opterror(opts, "jail \"%s\" not found",
name);
@@ -1182,7 +1133,7 @@
goto done_deref;
}
- /* If there's no prison to update, create a new one and link it in. */
+ /* If there's no prison to update, create a new one. */
created = pr == NULL;
if (created) {
for (tpr = mypr; tpr != NULL; tpr = tpr->pr_parent)
@@ -1191,36 +1142,75 @@
vfs_opterror(opts, "prison limit exceeded");
goto done_deref;
}
- mtx_lock(&ppr->pr_mtx);
- if (!prison_isvalid(ppr)) {
- mtx_unlock(&ppr->pr_mtx);
+ if (!prison_isalive(ppr)) {
error = ENOENT;
vfs_opterror(opts, "jail \"%s\" not found",
prison_name(mypr, ppr));
goto done_deref;
}
prison_hold(ppr);
- refcount_acquire(&ppr->pr_uref);
- mtx_unlock(&ppr->pr_mtx);
-
- if (jid == 0 && (jid = get_next_prid(&inspr)) == 0) {
- error = EAGAIN;
- vfs_opterror(opts, "no available jail IDs");
- pr = ppr;
- drflags |= PD_DEREF | PD_DEUREF;
- goto done_deref;
+ prison_proc_hold(ppr);
+
+ /*
+ * If no jid was explicitly given, or if a dying jail is being
+ * replaced, find free ID.
+ */
+ if (jid > 0 && deadpr == NULL)
+ tjid = jid;
+ else {
+ tjid = get_next_prid(&inspr);
+ if (tjid == 0) {
+ error = EAGAIN;
+ vfs_opterror(opts, "no available jail IDs");
+ pr = ppr;
+ drflags |= PD_DEREF | PD_DEUREF;
+ goto done_deref;
+ }
}
+ /*
+ * Start the prison with a reference, matching the one added
+ * to existing prisons.
+ */
pr = malloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO);
+ pr->pr_state = PRISON_STATE_INVALID;
+ refcount_init(&pr->pr_ref, 1);
+ refcount_init(&pr->pr_uref, 1);
+ drflags |= PD_DEREF | PD_DEUREF;
LIST_INIT(&pr->pr_children);
mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF | MTX_DUPOK);
TASK_INIT(&pr->pr_task, 0, prison_complete, pr);
- pr->pr_id = jid;
+ if (deadpr == NULL) {
+ pr->pr_id = jid = tjid;
+ tpr = pr;
+ } else {
+ pr->pr_id = jid;
+ /*
+ * The prison being created has the same ID as a dying
+ * one. Handle this by swapping the new prison with
+ * the dying one, and then re-adding the dying jail
+ * with the new ID. This may cause some confusion to
+ * user space, but only to those listing dying jails.
+ */
+ TAILQ_INSERT_BEFORE(deadpr, pr, pr_list);
+ TAILQ_REMOVE(&allprison, deadpr, pr_list);
+ if (inspr == deadpr)
+ inspr = pr;
+ mtx_lock(&deadpr->pr_mtx);
+ deadpr->pr_id = tjid;
+ mtx_unlock(&deadpr->pr_mtx);
+ tpr = deadpr;
+ }
+
+ /*
+ * Link the prison into the allprison list in ID order,
+ * and into its parent's child list in no particular order.
+ */
if (inspr != NULL)
- TAILQ_INSERT_BEFORE(inspr, pr, pr_list);
+ TAILQ_INSERT_BEFORE(inspr, tpr, pr_list);
else
- TAILQ_INSERT_TAIL(&allprison, pr, pr_list);
+ TAILQ_INSERT_TAIL(&allprison, tpr, pr_list);
pr->pr_parent = ppr;
LIST_INSERT_HEAD(&ppr->pr_children, pr, pr_sibling);
@@ -1305,18 +1295,14 @@
mtx_lock(&pr->pr_mtx);
drflags |= PD_LOCKED;
- /*
- * New prisons do not yet have a reference, because we do not
- * want others to see the incomplete prison once the
- * allprison_lock is downgraded.
- */
} else {
/*
* Grab a reference for existing prisons, to ensure they
* continue to exist for the duration of the call.
*/
prison_hold(pr);
- drflags |= PD_DEREF;
+ prison_proc_hold(pr);
+ drflags |= PD_DEREF | PD_DEUREF;
#if defined(VIMAGE) && (defined(INET) || defined(INET6))
if ((pr->pr_flags & PR_VNET) &&
(ch_flags & (PR_IP4_USER | PR_IP6_USER))) {
@@ -1434,7 +1420,7 @@
#ifdef VIMAGE
(tpr != tppr && (tpr->pr_flags & PR_VNET)) ||
#endif
- refcount_load(&tpr->pr_uref) == 0) {
+ tpr->pr_state == PRISON_STATE_DYING) {
descend = 0;
continue;
}
@@ -1502,7 +1488,7 @@
#ifdef VIMAGE
(tpr != tppr && (tpr->pr_flags & PR_VNET)) ||
#endif
- refcount_load(&tpr->pr_uref) == 0) {
+ tpr->pr_state == PRISON_STATE_DYING) {
descend = 0;
continue;
}
@@ -1731,23 +1717,26 @@
prison_set_allow_locked(pr, tallow, 0);
/*
* Persistent prisons get an extra reference, and prisons losing their
- * persist flag lose that reference. Only do this for existing prisons
- * for now, so new ones will remain unseen until after the module
- * handlers have completed.
+ * persist flag lose that reference.
*/
- born = !prison_isalive(pr);
- if (!created && (ch_flags & PR_PERSIST & (pr_flags ^ pr->pr_flags))) {
+ if (ch_flags & PR_PERSIST & (pr_flags ^ pr->pr_flags)) {
if (pr_flags & PR_PERSIST) {
prison_hold(pr);
- refcount_acquire(&pr->pr_uref);
+ prison_proc_hold(pr);
} else {
- refcount_release(&pr->pr_ref);
- drflags |= PD_DEUREF;
+ prison_proc_free_not_last(pr);
+ prison_free_not_last(pr);
}
}
pr->pr_flags = (pr->pr_flags & ~ch_flags) | pr_flags;
mtx_unlock(&pr->pr_mtx);
drflags &= ~PD_LOCKED;
+ /*
+ * Any errors past this point will need to de-persist newly created
+ * prisons, as well as call remove methods.
+ */
+ if (created)
+ drflags |= PD_KILL;
#ifdef RACCT
if (racct_enable && created)
@@ -1807,19 +1796,14 @@
/* Let the modules do their work. */
sx_downgrade(&allprison_lock);
drflags = (drflags & ~PD_LIST_XLOCKED) | PD_LIST_SLOCKED;
- if (born) {
+ if (created) {
error = osd_jail_call(pr, PR_METHOD_CREATE, opts);
- if (error) {
- (void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL);
+ if (error)
goto done_deref;
- }
}
error = osd_jail_call(pr, PR_METHOD_SET, opts);
- if (error) {
- if (born)
- (void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL);
+ if (error)
goto done_deref;
- }
/* Attach this process to the prison if requested. */
if (flags & JAIL_ATTACH) {
@@ -1827,10 +1811,6 @@
error = do_jail_attach(td, pr);
drflags &= ~PD_LIST_SLOCKED;
if (error) {
- if (created) {
- /* do_jail_attach has removed the prison. */
- pr = NULL;
- }
vfs_opterror(opts, "attach failed");
goto done_deref;
}
@@ -1846,25 +1826,39 @@
}
#endif
- td->td_retval[0] = pr->pr_id;
-
+ /*
+ * Now that everything is done, a newly created prison should be alive,
+ * either from persistence, attaching, or perhaps a module parameter.
+ */
if (created) {
+ drflags = prison_deref_lock(pr, drflags);
+ if (!prison_isalive(ppr)) {
+ /*
+ * The parent prison died while this one was being
+ * created.
+ */
+ error = ENOENT;
+ vfs_opterror(opts, "jail \"%s\" not found",
+ prison_name(mypr, ppr));
+ goto done_deref;
+ }
/*
- * Add a reference to newly created persistent prisons
- * (which was not done earlier so that the prison would
- * not be publicly visible).
+ * We are holding one temporary user reference, so there
+ * must be more than that for the prison to continue to
+ * exist. That usually comes from persistence or attaching,
+ * though modules may also add a reference.
*/
- if (pr_flags & PR_PERSIST) {
- mtx_lock(&pr->pr_mtx);
- drflags |= PD_LOCKED;
- refcount_acquire(&pr->pr_ref);
- refcount_acquire(&pr->pr_uref);
- } else {
- /* Non-persistent jails need no further changes. */
- pr = NULL;
+ if (atomic_load_acq_int(&pr->pr_uref) <= 1) {
+ error = EINVAL;
+ vfs_opterror(opts, "new jail must persist or attach");
+ goto done_deref;
}
+ pr->pr_state = PRISON_STATE_ALIVE;
+ drflags &= ~PD_KILL;
}
+ td->td_retval[0] = pr->pr_id;
+
done_deref:
/* Release any temporary prison holds and/or locks. */
if (pr != NULL)
@@ -1933,13 +1927,8 @@
TAILQ_FOREACH(inspr, &allprison, pr_list) {
if (inspr->pr_id < jid)
continue;
- if (inspr->pr_id > jid ||
- refcount_load(&inspr->pr_ref) == 0) {
- /*
- * Found an opening. This may be a gap
- * in the list, or a dead jail with the
- * same ID.
- */
+ if (inspr->pr_id > jid) {
+ /* Found an opening. */
maxid = 0;
break;
}
@@ -2028,15 +2017,13 @@
error = vfs_copyopt(opts, "lastjid", &jid, sizeof(jid));
if (error == 0) {
TAILQ_FOREACH(pr, &allprison, pr_list) {
- if (pr->pr_id > jid && prison_ischild(mypr, pr)) {
- mtx_lock(&pr->pr_mtx);
- if ((flags & JAIL_DYING)
- ? prison_isvalid(pr) : prison_isalive(pr))
+ if (pr->pr_id > jid && prison_ischild(mypr, pr) &&
+ ((flags & JAIL_DYING)
+ ? prison_isvalid(pr) : prison_isalive(pr)))
break;
- mtx_unlock(&pr->pr_mtx);
- }
}
if (pr != NULL) {
+ mtx_lock(&pr->pr_mtx);
drflags |= PD_LOCKED;
goto found_prison;
}
@@ -2052,8 +2039,8 @@
pr = prison_find_child(mypr, jid);
if (pr != NULL) {
drflags |= PD_LOCKED;
- if (!(prison_isalive(pr) ||
- (flags & JAIL_DYING))) {
+ if (!((flags & JAIL_DYING) ||
+ prison_isalive(pr))) {
error = ENOENT;
vfs_opterror(opts, "jail %d is dying",
jid);
@@ -2077,7 +2064,7 @@
pr = prison_find_name(mypr, name);
if (pr != NULL) {
drflags |= PD_LOCKED;
- if (!(prison_isalive(pr) || (flags & JAIL_DYING))) {
+ if (!((flags & JAIL_DYING) || prison_isalive(pr))) {
error = ENOENT;
vfs_opterror(opts, "jail \"%s\" is dying",
name);
@@ -2295,8 +2282,8 @@
int
sys_jail_remove(struct thread *td, struct jail_remove_args *uap)
{
- struct prison *pr, *cpr, *lpr, *tpr;
- int descend, error;
+ struct prison *pr;
+ int error;
error = priv_check(td, PRIV_JAIL_REMOVE);
if (error)
@@ -2308,86 +2295,16 @@
sx_xunlock(&allprison_lock);
return (EINVAL);
}
-
- /* Remove all descendants of this prison, then remove this prison. */
- prison_hold(pr);
- if (!LIST_EMPTY(&pr->pr_children)) {
+ if (!prison_isalive(pr)) {
+ /* Silently ignore already-dying prisons. */
mtx_unlock(&pr->pr_mtx);
- lpr = NULL;
- FOREACH_PRISON_DESCENDANT(pr, cpr, descend) {
- mtx_lock(&cpr->pr_mtx);
- if (prison_isvalid(cpr)) {
- tpr = cpr;
- prison_hold(cpr);
- } else {
- /* Already removed - do not do it again. */
- tpr = NULL;
- }
- mtx_unlock(&cpr->pr_mtx);
- if (lpr != NULL) {
- mtx_lock(&lpr->pr_mtx);
- prison_remove_one(lpr);
- sx_xlock(&allprison_lock);
- }
- lpr = tpr;
- }
- if (lpr != NULL) {
- mtx_lock(&lpr->pr_mtx);
- prison_remove_one(lpr);
- sx_xlock(&allprison_lock);
- }
- mtx_lock(&pr->pr_mtx);
+ sx_xunlock(&allprison_lock);
+ return (0);
}
- prison_remove_one(pr);
+ prison_deref(pr, PD_KILL | PD_LOCKED | PD_LIST_XLOCKED);
return (0);
}
-static void
-prison_remove_one(struct prison *pr)
-{
- struct proc *p;
- int drflags;
-
- drflags = PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED;
-
- /* If the prison was persistent, it is not anymore. */
- if (pr->pr_flags & PR_PERSIST) {
- refcount_release(&pr->pr_ref);
- drflags |= PD_DEUREF;
- pr->pr_flags &= ~PR_PERSIST;
- }
-
- /*
- * jail_remove added a reference. If that's the only one, remove
- * the prison now. refcount(9) doesn't guarantee the cache coherence
- * of non-zero counters, so force it here.
- */
- KASSERT(refcount_load(&pr->pr_ref) > 0,
- ("prison_remove_one removing a dead prison (jid=%d)", pr->pr_id));
- if (atomic_load_acq_int(&pr->pr_ref) == 1) {
- prison_deref(pr, drflags);
- return;
- }
-
- mtx_unlock(&pr->pr_mtx);
- sx_xunlock(&allprison_lock);
- drflags &= ~(PD_LOCKED | PD_LIST_XLOCKED);
- /*
- * Kill all processes unfortunate enough to be attached to this prison.
- */
- sx_slock(&allproc_lock);
- FOREACH_PROC_IN_SYSTEM(p) {
- PROC_LOCK(p);
- if (p->p_state != PRS_NEW && p->p_ucred &&
- p->p_ucred->cr_prison == pr)
- kern_psignal(p, SIGKILL);
- PROC_UNLOCK(p);
- }
- sx_sunlock(&allproc_lock);
- /* Remove the temporary reference added by jail_remove. */
- prison_deref(pr, drflags);
-}
-
/*
* struct jail_attach_args {
* int jid;
@@ -2403,14 +2320,7 @@
if (error)
return (error);
- /*
- * Start with exclusive hold on allprison_lock to ensure that a possible
- * PR_METHOD_REMOVE call isn't concurrent with jail_set or jail_remove.
- * But then immediately downgrade it since we don't need to stop
- * readers.
- */
- sx_xlock(&allprison_lock);
- sx_downgrade(&allprison_lock);
+ sx_slock(&allprison_lock);
pr = prison_find_child(td->td_ucred->cr_prison, uap->jid);
if (pr == NULL) {
sx_sunlock(&allprison_lock);
@@ -2442,8 +2352,8 @@
* a process root from one prison, but attached to the jail
* of another.
*/
- refcount_acquire(&pr->pr_ref);
- refcount_acquire(&pr->pr_uref);
+ prison_hold(pr);
+ prison_proc_hold(pr);
mtx_unlock(&pr->pr_mtx);
/* Let modules do whatever they need to prepare for attaching. */
@@ -2490,6 +2400,18 @@
#endif
prison_deref(oldcred->cr_prison, PD_DEREF | PD_DEUREF);
crfree(oldcred);
+
+ /*
+ * See if the target prison died between unlocking the prison
+ * and changing the credentials.
+ */
+ if (pr->pr_state == PRISON_STATE_DYING) {
+ /* Follow the prison into death. */
+ PROC_LOCK(p);
+ kern_psignal(p, SIGKILL);
+ PROC_UNLOCK(p);
+ }
+
return (0);
e_unlock:
@@ -2513,14 +2435,14 @@
sx_assert(&allprison_lock, SX_LOCKED);
TAILQ_FOREACH(pr, &allprison, pr_list) {
if (pr->pr_id == prid) {
- mtx_lock(&pr->pr_mtx);
- if (prison_isvalid(pr))
+ if (prison_isvalid(pr)) {
+ mtx_lock(&pr->pr_mtx);
return (pr);
+ }
/*
* Any active prison with the same ID would have
* been inserted before a dead one.
*/
- mtx_unlock(&pr->pr_mtx);
break;
}
if (pr->pr_id > prid)
@@ -2540,11 +2462,9 @@
sx_assert(&allprison_lock, SX_LOCKED);
FOREACH_PRISON_DESCENDANT(mypr, pr, descend) {
- if (pr->pr_id == prid) {
+ if (pr->pr_id == prid && prison_isvalid(pr)) {
mtx_lock(&pr->pr_mtx);
- if (prison_isvalid(pr))
- return (pr);
- mtx_unlock(&pr->pr_mtx);
+ return (pr);
}
}
return (NULL);
@@ -2562,26 +2482,20 @@
sx_assert(&allprison_lock, SX_LOCKED);
mylen = (mypr == &prison0) ? 0 : strlen(mypr->pr_name) + 1;
- again:
deadpr = NULL;
FOREACH_PRISON_DESCENDANT(mypr, pr, descend) {
if (!strcmp(pr->pr_name + mylen, name)) {
- mtx_lock(&pr->pr_mtx);
- if (prison_isalive(pr))
+ if (prison_isalive(pr)) {
+ mtx_lock(&pr->pr_mtx);
return (pr);
+ }
if (prison_isvalid(pr))
deadpr = pr;
- mtx_unlock(&pr->pr_mtx);
}
}
/* There was no valid prison - perhaps there was a dying one. */
- if (deadpr != NULL) {
+ if (deadpr != NULL)
mtx_lock(&deadpr->pr_mtx);
- if (!prison_isvalid(deadpr)) {
- mtx_unlock(&deadpr->pr_mtx);
- goto again;
- }
- }
return (deadpr);
}
@@ -2624,8 +2538,9 @@
prison_hold(struct prison *pr)
{
#ifdef INVARIANTS
- int was_valid = refcount_acquire_if_not_zero(&pr->pr_ref);
+ int was_valid;
+ was_valid = refcount_acquire_if_not_zero(&pr->pr_ref);
KASSERT(was_valid,
("Trying to hold dead prison %p (jid=%d).", pr, pr->pr_id));
#else
@@ -2635,45 +2550,52 @@
/*
* Remove a prison reference. If that was the last reference, the
- * prison will be removed (at a later time). Return with the prison
- * unlocked.
+ * prison will be removed (at a later time).
*/
void
prison_free_locked(struct prison *pr)
{
- int lastref;
- mtx_assert(&pr->pr_mtx, MA_OWNED);
+ /*
+ * Locking is no longer required, but unlock because the caller
+ * expects it.
+ */
+ mtx_unlock(&pr->pr_mtx);
+ prison_free(pr);
+}
+
+void
+prison_free(struct prison *pr)
+{
+
KASSERT(refcount_load(&pr->pr_ref) > 0,
("Trying to free dead prison %p (jid=%d).",
pr, pr->pr_id));
- lastref = refcount_release(&pr->pr_ref);
- mtx_unlock(&pr->pr_mtx);
- if (lastref) {
+ if (!refcount_release_if_not_last(&pr->pr_ref)) {
/*
- * Don't remove the prison itself in this context,
+ * Don't remove the last reference in this context,
* in case there are locks held.
*/
taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
}
}
-void
-prison_free(struct prison *pr)
+static void
+prison_free_not_last(struct prison *pr)
{
+#ifdef INVARIANTS
+ int lastref;
- /*
- * Locking is only required when releasing the last reference.
- * This allows assurance that a locked prison will remain valid
- * until it is unlocked.
- */
KASSERT(refcount_load(&pr->pr_ref) > 0,
("Trying to free dead prison %p (jid=%d).",
pr, pr->pr_id));
- if (refcount_release_if_not_last(&pr->pr_ref))
- return;
- mtx_lock(&pr->pr_mtx);
- prison_free_locked(pr);
+ lastref = refcount_release(&pr->pr_ref);
+ KASSERT(!lastref,
+ ("prison_free_not_last freed last ref on prison %p (jid=%d).",
+ pr, pr->pr_id));
+#else
+ refcount_release(&pr>pr_ref);
+#endif
}
/*
@@ -2682,15 +2604,17 @@
* user-visible, except through the the jail system calls. It is also
* an error to hold an invalid prison. A prison record will remain
* alive as long as it has at least one user reference, and will not
- * be set to the dying state was long as the prison mutex is held.
+ * be set to the dying state was long as either the prison mutex or
+ * the allprison lock is held (allprison_lock may be shared).
*/
void
prison_proc_hold(struct prison *pr)
{
#ifdef INVARIANTS
- int was_alive = refcount_acquire_if_not_zero(&pr->pr_uref);
+ int was_alive;
- KASSERT(was_alive,
+ was_alive = refcount_acquire_if_not_zero(&pr->pr_uref);
+ KASSERT(was_alive && refcount_load(&pr->pr_ref) > 0,
("Cannot add a process to a non-alive prison (jid=%d)", pr->pr_id));
#else
refcount_acquire(&pr->pr_uref);
@@ -2706,13 +2630,8 @@
prison_proc_free(struct prison *pr)
{
- /*
- * Locking is only required when releasing the last reference.
- * This allows assurance that a locked prison will remain alive
- * until it is unlocked.
- */
KASSERT(refcount_load(&pr->pr_uref) > 0,
- ("Trying to kill a process in a dead prison (jid=%d)", pr->pr_id));
+ ("Trying to kill a process in a dying prison (jid=%d)", pr->pr_id));
if (!refcount_release_if_not_last(&pr->pr_uref)) {
/*
* Don't remove the last user reference in this context,
@@ -2720,11 +2639,28 @@
* but also half dead. Add a reference so any calls to
* prison_free() won't re-submit the task.
*/
- refcount_acquire(&pr->pr_ref);
+ prison_hold(pr);
taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
}
}
+static void
+prison_proc_free_not_last(struct prison *pr)
+{
+#ifdef INVARIANTS
+ int lastref;
+
+ KASSERT(refcount_load(&pr->pr_uref) > 0,
+ ("Trying to kill a process in a dead prison (jid=%d)", pr->pr_id));
+ lastref = refcount_release(&pr->pr_uref);
+ KASSERT(!lastref,
+ ("prison_proc_free_not_last freed last uref on prison %p (jid=%d).",
+ pr, pr->pr_id));
+#else
+ refcount_release(&pr>pr_uref);
+#endif
+}
+
/*
* Complete a call to either prison_free or prison_proc_free.
*/
@@ -2732,129 +2668,350 @@
prison_complete(void *context, int pending)
{
struct prison *pr = context;
+ int flags;
- sx_xlock(&allprison_lock);
- mtx_lock(&pr->pr_mtx);
/*
- * If this is completing a call to prison_proc_free, there will still
- * be a user reference held; clear that as well as the reference that
- * was added. No references are expected if this is completing a call
- * to prison_free, but prison_deref is still called for the cleanup.
+ * This could be called to release the last reference, or the
+ * last user reference; the existence of a user reference implies
+ * the latter. There will always be a reference to remove, as
+ * prison_proc_free adds one.
*/
- prison_deref(pr, refcount_load(&pr->pr_uref) > 0
- ? PD_DEREF | PD_DEUREF | PD_LOCKED | PD_LIST_XLOCKED
- : PD_LOCKED | PD_LIST_XLOCKED);
+ flags = prison_deref_lock(pr, PD_DEREF);
+ if (refcount_load(&pr->pr_uref) > 0)
+ flags |= PD_DEUREF;
+ prison_deref(pr, flags);
}
/*
- * Remove a prison reference and/or user reference (usually).
+ * Remove a prison reference and/or user reference (usually), which
+ * may or may not end up removing the prison itself, or putting it into
+ * a "dying" state while it cleans itself up. Optionally forcibly
+ * remove a prison and its descendents, including killing all associated
+ * processes.
+ *
* This assumes context that allows sleeping (for allprison_lock),
* with no non-sleeping locks held, except perhaps the prison itself.
- * If there are no more references, release and delist the prison.
* On completion, the prison lock and the allprison lock are both
* unlocked.
*/
static void
prison_deref(struct prison *pr, int flags)
{
- struct prison *ppr, *tpr;
- int lastref, lasturef;
+ struct prisonlist freeprison;
+ struct prison *killpr, *rpr, *ppr, *tpr;
+ struct proc *p;
+ int killflags;
- if (!(flags & PD_LOCKED))
- mtx_lock(&pr->pr_mtx);
+ killpr = NULL;
+ TAILQ_INIT(&freeprison);
+ /*
+ * Release this prison as requested, which may cause its parent to be
+ * released, and then maybe its grandparent, etc.
+ */
for (;;) {
+ killflags = 0;
+ if (flags & PD_KILL) {
+ /* Kill the prison and its descendents. */
+ flags &= ~PD_KILL;
+ flags = prison_deref_lock(pr, flags);
+ killflags = prison_deref_kill(pr, &freeprison);
+ if (killflags & PD_KILL)
+ killpr = pr;
+ }
if (flags & PD_DEUREF) {
+ /* Drop a user reference. */
+ flags &= ~PD_DEUREF;
KASSERT(refcount_load(&pr->pr_uref) > 0,
- ("prison_deref PD_DEUREF on a dead prison (jid=%d)",
+ ("prison_deref PD_DEUREF on a dying prison (jid=%d)",
pr->pr_id));
- lasturef = refcount_release(&pr->pr_uref);
- if (lasturef)
- refcount_acquire(&pr->pr_ref);
- KASSERT(refcount_load(&prison0.pr_uref) > 0,
- ("prison0 pr_uref=0"));
- } else
- lasturef = 0;
+ if (!refcount_release_if_not_last(&pr->pr_uref)) {
+ flags = prison_deref_lock(pr, flags);
+ if (refcount_release(&pr->pr_uref) &&
+ pr->pr_state != PRISON_STATE_DYING) {
+ /*
+ * When the last user reference goes,
+ * this becomes a DYING prison (unless
+ * it was one already).
+ */
+ KASSERT(
+ refcount_load(&prison0.pr_ref) != 0,
+ ("prison0 pr_ref=0"));
+ if (pr->pr_state == PRISON_STATE_ALIVE)
+ {
+ prison_hold(pr);
+ mtx_unlock(&pr->pr_mtx);
+ (void)osd_jail_call(pr,
+ PR_METHOD_REMOVE, NULL);
+ mtx_lock(&pr->pr_mtx);
+ if (!(flags & PD_DEREF))
+ flags |= PD_DEREF;
+ else
+ prison_free_not_last(
+ pr);
+ }
+ pr->pr_state = PRISON_STATE_DYING;
+ for (ppr = pr->pr_parent;
+ ppr != NULL;
+ ppr = ppr->pr_parent)
+ ppr->pr_childcount--;
+ /* This now refers to the parent. */
+ flags |= PD_DEUREF;
+ }
+ }
+ }
if (flags & PD_DEREF) {
+ /* Drop a reference. */
+ flags &= ~PD_DEREF;
KASSERT(refcount_load(&pr->pr_ref) > 0,
("prison_deref PD_DEREF on a dead prison (jid=%d)",
pr->pr_id));
- lastref = refcount_release(&pr->pr_ref);
- }
- else
- lastref = refcount_load(&pr->pr_ref) == 0;
- mtx_unlock(&pr->pr_mtx);
-
- /*
- * Tell the modules if the last user reference was removed
- * (even it sticks around in dying state).
- */
- if (lasturef) {
- if (!(flags & (PD_LIST_SLOCKED | PD_LIST_XLOCKED))) {
- if (atomic_load_acq_int(&pr->pr_ref) > 1) {
- sx_slock(&allprison_lock);
- flags |= PD_LIST_SLOCKED;
- } else {
- sx_xlock(&allprison_lock);
- flags |= PD_LIST_XLOCKED;
+ if (!refcount_release_if_not_last(&pr->pr_ref)) {
+ flags = prison_deref_lock(pr, flags);
+ if (refcount_release(&pr->pr_ref)) {
+ /*
+ * When the last reference goes,
+ * prepare to remove the prison.
+ */
+ KASSERT(
+ refcount_load(&pr->pr_uref) == 0,
+ ("prison_deref: last ref, "
+ "but still has %d urefs (jid=%d)",
+ pr->pr_uref, pr->pr_id));
+ KASSERT(
+ refcount_load(&prison0.pr_ref) != 0,
+ ("prison0 pr_ref=0"));
+ TAILQ_REMOVE(&allprison, pr, pr_list);
+ TAILQ_INSERT_TAIL(&freeprison, pr,
+ pr_list);
+ LIST_REMOVE(pr, pr_sibling);
+ /* This now refers to the parent. */
+ flags |= PD_DEREF;
}
}
- (void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL);
- mtx_lock(&pr->pr_mtx);
- lastref = refcount_release(&pr->pr_ref);
+ }
+ flags |= killflags & (PD_DEUREF | PD_DEREF);
+ if (flags & (PD_DEUREF | PD_DEREF)) {
+ /*
+ * A prison was marked as dying or removed, which
+ * means its parent now drops a reference.
+ */
mtx_unlock(&pr->pr_mtx);
+ pr = pr->pr_parent;
+ mtx_lock(&pr->pr_mtx);
+ continue;
}
+ break;
+ }
- /* If the prison still has references, nothing else to do. */
- if (!lastref) {
- if (flags & PD_LIST_SLOCKED)
- sx_sunlock(&allprison_lock);
- else if (flags & PD_LIST_XLOCKED)
- sx_xunlock(&allprison_lock);
- return;
- }
+ /* Release all the prison locks. */
+ if (flags & PD_LOCKED)
+ mtx_unlock(&pr->pr_mtx);
+ if (flags & PD_LIST_SLOCKED)
+ sx_sunlock(&allprison_lock);
+ else if (flags & PD_LIST_XLOCKED)
+ sx_xunlock(&allprison_lock);
- if (flags & PD_LIST_SLOCKED) {
- if (!sx_try_upgrade(&allprison_lock)) {
- sx_sunlock(&allprison_lock);
- sx_xlock(&allprison_lock);
+ if (killpr != NULL) {
+ /*
+ * The killed prison or descendants still had some user
+ * references, which are likely attached processes.
+ * So find and kill any such processes.
+ */
+ sx_slock(&allproc_lock);
+ FOREACH_PROC_IN_SYSTEM(p) {
+ PROC_LOCK(p);
+ if (p->p_state != PRS_NEW && p->p_ucred != NULL) {
+ for (ppr = p->p_ucred->cr_prison;
+ ppr != &prison0;
+ ppr = ppr->pr_parent)
+ if (ppr == killpr) {
+ kern_psignal(p, SIGKILL);
+ break;
+ }
}
- } else if (!(flags & PD_LIST_XLOCKED))
- sx_xlock(&allprison_lock);
-
- TAILQ_REMOVE(&allprison, pr, pr_list);
- LIST_REMOVE(pr, pr_sibling);
- ppr = pr->pr_parent;
- for (tpr = ppr; tpr != NULL; tpr = tpr->pr_parent)
- tpr->pr_childcount--;
- sx_xunlock(&allprison_lock);
+ PROC_UNLOCK(p);
+ }
+ sx_sunlock(&allproc_lock);
+ }
+ TAILQ_FOREACH_SAFE(rpr, &freeprison, pr_list, tpr) {
+ /*
+ * Finish removing unreferenced prisons, which couldn't happen
+ * while allprison_lock was held (to avoid a LOR on vrele).
+ */
#ifdef VIMAGE
- if (pr->pr_vnet != ppr->pr_vnet)
- vnet_destroy(pr->pr_vnet);
+ if (rpr->pr_vnet != rpr->pr_parent->pr_vnet)
+ vnet_destroy(rpr->pr_vnet);
#endif
- if (pr->pr_root != NULL)
- vrele(pr->pr_root);
- mtx_destroy(&pr->pr_mtx);
+ if (rpr->pr_root != NULL)
+ vrele(rpr->pr_root);
+ mtx_destroy(&rpr->pr_mtx);
#ifdef INET
- free(pr->pr_ip4, M_PRISON);
+ free(rpr->pr_ip4, M_PRISON);
#endif
#ifdef INET6
- free(pr->pr_ip6, M_PRISON);
+ free(rpr->pr_ip6, M_PRISON);
#endif
- if (pr->pr_cpuset != NULL)
- cpuset_rel(pr->pr_cpuset);
- osd_jail_exit(pr);
+ if (rpr->pr_cpuset != NULL)
+ cpuset_rel(rpr->pr_cpuset);
+ osd_jail_exit(rpr);
#ifdef RACCT
if (racct_enable)
- prison_racct_detach(pr);
+ prison_racct_detach(rpr);
#endif
- free(pr, M_PRISON);
+ TAILQ_REMOVE(&freeprison, rpr, pr_list);
+ free(rpr, M_PRISON);
+ }
+}
+
+/*
+ * Make sure allprison_lock is held exclusive, and the prison is locked.
+ * Return the new deref flags.
+ */
+static int
+prison_deref_lock(struct prison *pr, int flags)
+{
- /* Removing a prison frees a reference on its parent. */
- pr = ppr;
+ if (!(flags & PD_LIST_XLOCKED)) {
+ /*
+ * Get allprison_lock, which may be an upgrade,
+ * and may require unlocking the prison.
+ */
+ if (flags & PD_LOCKED) {
+ mtx_lock(&pr->pr_mtx);
+ flags &= PD_LOCKED;
+ }
+ if (flags & PD_LIST_SLOCKED) {
+ if (!sx_try_upgrade(&allprison_lock)) {
+ sx_sunlock(&allprison_lock);
+ sx_xlock(&allprison_lock);
+ }
+ flags &= ~PD_LIST_SLOCKED;
+ } else
+ sx_xlock(&allprison_lock);
+ flags |= PD_LIST_XLOCKED;
+ }
+ if (!(flags & PD_LOCKED)) {
+ /* Lock the prison mutex. */
mtx_lock(&pr->pr_mtx);
- flags = PD_DEREF | PD_DEUREF;
+ flags |= PD_LOCKED;
}
+ return flags;
+}
+
+/*
+ * Kill the prison and its descendants. Mark them as dying, clear the
+ * persist flag, and call module remove methods. Return flags
+ * indicating further action required.
+ */
+static int
+prison_deref_kill(struct prison *pr, struct prisonlist *freeprison)
+{
+ struct prison *cpr, *ppr;
+ int flags, cflags, killed;
+ bool descend;
+
+ /* If the prison is already dying, there's nothing to kill. */
+ if (pr->pr_state == PRISON_STATE_DYING)
+ return 0;
+ /*
+ * The operation on the prison and each descendant is similar to what
+ * prison_deref() does when losing the last user or system reference,
+ * plus extra work to clear PR_PERSIST.
+ */
+ flags = 0;
+ killed = 1;
+ prison_deref_kill_descend(pr);
+
+ FOREACH_PRISON_DESCENDANT_PRE_POST(pr, cpr, descend) {
+ if (descend) {
+ if (cpr->pr_state != PRISON_STATE_ALIVE) {
+ /*
+ * Only kill alive descendents, as dying
+ * prison don't need killing, and new
+ * prisons will kill themselves later.
+ */
+ descend = false;
+ continue;
+ }
+ killed++;
+ mtx_lock(&cpr->pr_mtx);
+ prison_deref_kill_descend(cpr);
+ } else {
+ /*
+ * PR_REMOVE should only be set within this function,
+ * with its exclusive hold on allprison_lock, so we
+ * don't need to worry about other threads' caches.
+ */
+ if (!(cpr->pr_flags & PR_REMOVE))
+ continue;
+ cflags = prison_deref_kill_ascend(cpr, freeprison);
+ flags |= (cflags & PD_KILL);
+ mtx_unlock(&cpr->pr_mtx);
+ if (!refcount_release_if_not_last(
+ &cpr->pr_parent->pr_uref)) {
+ mtx_lock(&cpr->pr_parent->pr_mtx);
+ (void)refcount_release(
+ &cpr->pr_parent->pr_uref);
+ mtx_unlock(&cpr->pr_parent->pr_mtx);
+ }
+ if (cflags & PD_DEREF)
+ prison_free_not_last(cpr->pr_parent);
+ }
+ }
+
+ flags |= prison_deref_kill_ascend(pr, freeprison);
+ for (ppr = pr->pr_parent;
+ ppr != NULL;
+ ppr = ppr->pr_parent)
+ ppr->pr_childcount -= killed;
+
+ /*
+ * Disconnect unreferenced descendant prisons from their parents,
+ * which couldn't easily be done mid-loop.
+ */
+ TAILQ_FOREACH(cpr, freeprison, pr_list)
+ LIST_REMOVE(cpr, pr_sibling);
+ return flags;
+}
+
+static void
+prison_deref_kill_descend(struct prison *pr)
+{
+
+ pr->pr_state = PRISON_STATE_DYING;
+ pr->pr_flags |= PR_REMOVE;
+ prison_hold(pr);
+ mtx_unlock(&pr->pr_mtx);
+}
+
+static int
+prison_deref_kill_ascend(struct prison *pr, struct prisonlist *freeprison)
+{
+ int flags;
+
+ flags = PD_DEUREF;
+ (void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL);
+ mtx_lock(&pr->pr_mtx);
+ pr->pr_flags &= ~PR_REMOVE;
+ if (pr->pr_flags & PR_PERSIST) {
+ pr->pr_flags &= ~PR_PERSIST;
+ if (!refcount_release(&pr->pr_uref))
+ flags |= PD_KILL;
+ prison_free_not_last(pr);
+ } else if (refcount_load(&pr->pr_uref) > 0)
+ flags |= PD_KILL;
+ if (refcount_release(&pr->pr_ref)) {
+ KASSERT(refcount_load(&pr->pr_uref) == 0,
+ ("prison_deref_kill: last ref, "
+ "but still has %d urefs (jid=%d)",
+ pr->pr_uref, pr->pr_id));
+ TAILQ_REMOVE(&allprison, pr, pr_list);
+ TAILQ_INSERT_TAIL(freeprison, pr, pr_list);
+ flags |= PD_DEREF;
+ }
+ pr->pr_childcount = 0;
+ return flags;
}
/*
@@ -3019,32 +3176,34 @@
}
/*
- * Return true if the prison is currently alive. A prison is alive if it is
- * valid and it holds user references.
+ * Return true if the prison is currently alive.
*/
bool
prison_isalive(struct prison *pr)
{
- mtx_assert(&pr->pr_mtx, MA_OWNED);
- if (__predict_false(refcount_load(&pr->pr_ref) == 0))
- return (false);
- if (__predict_false(refcount_load(&pr->pr_uref) == 0))
+ if (__predict_false(pr->pr_state != PRISON_STATE_ALIVE))
return (false);
return (true);
}
/*
- * Return true if the prison is currently valid. A prison is valid if it has
- * been fully created, and is not being destroyed. Note that dying prisons
- * are still considered valid.
+ * Return true if the prison is currently valid, i.e. is has been fully
+ * created. Note that dying prisons are still considered valid.
*/
bool
prison_isvalid(struct prison *pr)
{
- mtx_assert(&pr->pr_mtx, MA_OWNED);
- if (__predict_false(refcount_load(&pr->pr_ref) == 0))
+ /*
+ * A prison is also invalid if it has no references, but that should
+ * never be the case when the right locks are held (prison mutex, or
+ * allprison_lock at least shared).
+ */
+ KASSERT(refcount_load(&pr->pr_ref) > 0,
+ ("prison_invalid checking dead prison %p (jid=%d).",
+ pr, pr->pr_id));
+ if (__predict_false(pr->pr_state == PRISON_STATE_INVALID))
return (false);
return (true);
}
@@ -3678,6 +3837,8 @@
#if defined(INET) || defined(INET6)
again:
#endif
+ if (!prison_isvalid(cpr))
+ continue;
mtx_lock(&cpr->pr_mtx);
#ifdef INET
if (cpr->pr_ip4s > 0) {
@@ -3705,15 +3866,10 @@
cpr->pr_ip6s * sizeof(struct in6_addr));
}
#endif
- if (!prison_isvalid(cpr)) {
- mtx_unlock(&cpr->pr_mtx);
- continue;
- }
bzero(xp, sizeof(*xp));
xp->pr_version = XPRISON_VERSION;
xp->pr_id = cpr->pr_id;
- xp->pr_state = prison_isalive(cpr)
- ? PRISON_STATE_ALIVE : PRISON_STATE_DYING;
+ xp->pr_state = cpr->pr_state;
strlcpy(xp->pr_path, prison_path(pr, cpr), sizeof(xp->pr_path));
strlcpy(xp->pr_host, cpr->pr_hostname, sizeof(xp->pr_host));
strlcpy(xp->pr_name, prison_name(pr, cpr), sizeof(xp->pr_name));
@@ -4364,6 +4520,7 @@
db_printf(" parent = %p\n", pr->pr_parent);
db_printf(" ref = %d\n", pr->pr_ref);
db_printf(" uref = %d\n", pr->pr_uref);
+ db_printf(" state = %d\n", pr->pr_state);
db_printf(" path = %s\n", pr->pr_path);
db_printf(" cpuset = %d\n", pr->pr_cpuset
? pr->pr_cpuset->cs_id : -1);
diff --git a/sys/sys/jail.h b/sys/sys/jail.h
--- a/sys/sys/jail.h
+++ b/sys/sys/jail.h
@@ -88,9 +88,11 @@
};
#define XPRISON_VERSION 3
-#define PRISON_STATE_INVALID 0
-#define PRISON_STATE_ALIVE 1
-#define PRISON_STATE_DYING 2
+enum prison_state {
+ PRISON_STATE_INVALID = 0, /* New prison, not ready to be seen */
+ PRISON_STATE_ALIVE, /* Current prison, visible to all */
+ PRISON_STATE_DYING /* Removed, but holding resources, */
+}; /* optionally visible. */
/*
* Flags for jail_set and jail_get.
@@ -99,7 +101,7 @@
#define JAIL_UPDATE 0x02 /* Update parameters of existing jail */
#define JAIL_ATTACH 0x04 /* Attach to jail upon creation */
#define JAIL_DYING 0x08 /* Allow getting a dying jail */
-#define JAIL_SET_MASK 0x0f
+#define JAIL_SET_MASK 0x0f /* JAIL_DYING is deprecated/ignored here */
#define JAIL_GET_MASK 0x08
#define JAIL_SYS_DISABLE 0
@@ -155,7 +157,9 @@
* (m) locked by pr_mtx
* (p) locked by pr_mtx, and also at least shared allprison_lock required
* to update
- * (r) atomic via refcount(9), pr_mtx required to decrement to zero
+ * (q) locked both pr_mtx and allprison_lock
+ * (r) atomic via refcount(9), pr_mtx and allprison_lock required to
+ * decrement to zero
*/
struct prison {
TAILQ_ENTRY(prison) pr_list; /* (a) all prisons */
@@ -179,12 +183,13 @@
struct prison_racct *pr_prison_racct; /* (c) racct jail proxy */
void *pr_sparep[3];
int pr_childcount; /* (a) number of child jails */
- int pr_childmax; /* (p) maximum child jails */
+ int pr_childmax; /* (a) maximum child jails */
unsigned pr_allow; /* (p) PR_ALLOW_* flags */
int pr_securelevel; /* (p) securelevel */
int pr_enforce_statfs; /* (p) statfs permission */
int pr_devfs_rsnum; /* (p) devfs ruleset */
- int pr_spare[3];
+ enum prison_state pr_state; /* (q) state in life cycle */
+ int pr_spare[2];
int pr_osreldate; /* (c) kern.osreldate value */
unsigned long pr_hostid; /* (p) jail hostid */
char pr_name[MAXHOSTNAMELEN]; /* (p) admin jail name */
@@ -216,6 +221,7 @@
/* primary jail address. */
/* Internal flag bits */
+#define PR_REMOVE 0x01000000 /* In process of being removed */
#define PR_IP4 0x02000000 /* IPv4 restricted or disabled */
/* by this jail or an ancestor */
#define PR_IP6 0x04000000 /* IPv6 restricted or disabled */
@@ -333,6 +339,19 @@
; \
else
+/*
+ * As FOREACH_PRISON_DESCENDANT, but visit both preorder and postorder.
+ */
+#define FOREACH_PRISON_DESCENDANT_PRE_POST(ppr, cpr, descend) \
+ for ((cpr) = (ppr), (descend) = 1; \
+ ((cpr) = (descend) \
+ ? ((descend) = !LIST_EMPTY(&(cpr)->pr_children)) \
+ ? LIST_FIRST(&(cpr)->pr_children) \
+ : (cpr) \
+ : ((descend) = LIST_NEXT(cpr, pr_sibling) != NULL) \
+ ? LIST_NEXT(cpr, pr_sibling) \
+ : cpr->pr_parent) != (ppr);)
+
/*
* Attributes of the physical system, and the root of the jail tree.
*/
diff --git a/usr.sbin/jail/jail.8 b/usr.sbin/jail/jail.8
--- a/usr.sbin/jail/jail.8
+++ b/usr.sbin/jail/jail.8
@@ -25,7 +25,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd November 18, 2020
+.Dd January 25, 2021
.Dt JAIL 8
.Os
.Sh NAME
@@ -136,10 +136,6 @@
.Pp
Other available options are:
.Bl -tag -width indent
-.It Fl d
-Allow making changes to a dying jail, equivalent to the
-.Va allow.dying
-parameter.
.It Fl f Ar conf_file
Use configuration file
.Ar conf_file
@@ -207,6 +203,17 @@
.It Fl v
Print a message on every operation, such as running commands and
mounting filesystems.
+.It Fl d
+This is deprecated and is equivalent to the
+.Va allow.dying
+parameter, which is also deprecated.
+It used to allow making changes to a
+.Va dying
+jail.
+Now such jails are always replaced when a new jail is created with the same
+.Va jid
+or
+.Va name .
.El
.Pp
If no arguments are given after the options, the operation (except
@@ -903,9 +910,14 @@
.Pa /proc
directory.
.It Va allow.dying
-Allow making changes to a
+This deprecated and has no effect.
+It used to allow making changes to a
.Va dying
jail.
+Now such jails are always replaced when a new jail is created with the same
+.Va jid
+or
+.Va name .
.It Va depend
Specify a jail (or jails) that this jail depends on.
When this jail is to be created, any jail(s) it depends on must already exist.
diff --git a/usr.sbin/jail/jail.c b/usr.sbin/jail/jail.c
--- a/usr.sbin/jail/jail.c
+++ b/usr.sbin/jail/jail.c
@@ -65,7 +65,7 @@
static void clear_persist(struct cfjail *j);
static int update_jail(struct cfjail *j);
static int rdtun_params(struct cfjail *j, int dofail);
-static void running_jid(struct cfjail *j, int dflag);
+static void running_jid(struct cfjail *j);
static void jail_quoted_warnx(const struct cfjail *j, const char *name_msg,
const char *noname_msg);
static int jailparam_set_note(const struct cfjail *j, struct jailparam *jp,
@@ -140,7 +140,7 @@
char *JidFile;
size_t sysvallen;
unsigned op, pi;
- int ch, docf, error, i, oldcl, sysval;
+ int ch, docf, error, i, oldcl, sysval, dying_warned;
int dflag, Rflag;
#if defined(INET) || defined(INET6)
char *cs, *ncs;
@@ -377,6 +377,7 @@
* operation on it. When that is done, the jail may be finished,
* or it may go back for the next step.
*/
+ dying_warned = 0;
while ((j = next_jail()))
{
if (j->flags & JF_FAILED) {
@@ -397,11 +398,13 @@
import_params(j) < 0)
continue;
}
+ if (j->intparams[IP_ALLOW_DYING] && !dying_warned) {
+ warnx("%s", "the 'allow.dying' parameter and '-d' flag"
+ "are deprecated and have no effect.");
+ dying_warned = 1;
+ }
if (!j->jid)
- running_jid(j,
- (j->flags & (JF_SET | JF_DEPEND)) == JF_SET
- ? dflag || bool_param(j->intparams[IP_ALLOW_DYING])
- : 0);
+ running_jid(j);
if (finish_command(j))
continue;
@@ -613,11 +616,10 @@
int
create_jail(struct cfjail *j)
{
- struct iovec jiov[4];
struct stat st;
- struct jailparam *jp, *setparams, *setparams2, *sjp;
+ struct jailparam *jp, *setparams, *sjp;
const char *path;
- int dopersist, ns, jid, dying, didfail;
+ int dopersist, ns;
/*
* Check the jail's path, with a better error message than jail_set
@@ -657,57 +659,8 @@
*sjp++ = *jp;
ns = sjp - setparams;
- didfail = 0;
j->jid = jailparam_set_note(j, setparams, ns, JAIL_CREATE);
- if (j->jid < 0 && errno == EEXIST &&
- bool_param(j->intparams[IP_ALLOW_DYING]) &&
- int_param(j->intparams[KP_JID], &jid) && jid != 0) {
- /*
- * The jail already exists, but may be dying.
- * Make sure it is, in which case an update is appropriate.
- */
- jiov[0].iov_base = __DECONST(char *, "jid");
- jiov[0].iov_len = sizeof("jid");
- jiov[1].iov_base = &jid;
- jiov[1].iov_len = sizeof(jid);
- jiov[2].iov_base = __DECONST(char *, "dying");
- jiov[2].iov_len = sizeof("dying");
- jiov[3].iov_base = &dying;
- jiov[3].iov_len = sizeof(dying);
- if (jail_get(jiov, 4, JAIL_DYING) < 0) {
- /*
- * It could be that the jail just barely finished
- * dying, or it could be that the jid never existed
- * but the name does. In either case, another try
- * at creating the jail should do the right thing.
- */
- if (errno == ENOENT)
- j->jid = jailparam_set_note(j, setparams, ns,
- JAIL_CREATE);
- } else if (dying) {
- j->jid = jid;
- if (rdtun_params(j, 1) < 0) {
- j->jid = -1;
- didfail = 1;
- } else {
- sjp = setparams2 = alloca((j->njp + dopersist) *
- sizeof(struct jailparam));
- for (jp = setparams; jp < setparams + ns; jp++)
- if (!JP_RDTUN(jp) ||
- !strcmp(jp->jp_name, "jid"))
- *sjp++ = *jp;
- j->jid = jailparam_set_note(j, setparams2,
- sjp - setparams2, JAIL_UPDATE | JAIL_DYING);
- /*
- * Again, perhaps the jail just finished dying.
- */
- if (j->jid < 0 && errno == ENOENT)
- j->jid = jailparam_set_note(j,
- setparams, ns, JAIL_CREATE);
- }
- }
- }
- if (j->jid < 0 && !didfail) {
+ if (j->jid < 0) {
jail_warnx(j, "%s", jail_errmsg);
failed(j);
}
@@ -772,9 +725,7 @@
if (!JP_RDTUN(jp))
*++sjp = *jp;
- jid = jailparam_set_note(j, setparams, ns,
- bool_param(j->intparams[IP_ALLOW_DYING])
- ? JAIL_UPDATE | JAIL_DYING : JAIL_UPDATE);
+ jid = jailparam_set_note(j, setparams, ns, JAIL_UPDATE);
if (jid < 0) {
jail_warnx(j, "%s", jail_errmsg);
failed(j);
@@ -813,8 +764,7 @@
rtjp->jp_value = NULL;
}
rval = 0;
- if (jailparam_get(rtparams, nrt,
- bool_param(j->intparams[IP_ALLOW_DYING]) ? JAIL_DYING : 0) > 0) {
+ if (jailparam_get(rtparams, nrt, 0) > 0) {
rtjp = rtparams + 1;
for (jp = j->jp; rtjp < rtparams + nrt; jp++) {
if (JP_RDTUN(jp) && strcmp(jp->jp_name, "jid")) {
@@ -851,7 +801,7 @@
* Get the jail's jid if it is running.
*/
static void
-running_jid(struct cfjail *j, int dflag)
+running_jid(struct cfjail *j)
{
struct iovec jiov[2];
const char *pval;
@@ -877,7 +827,7 @@
j->jid = -1;
return;
}
- j->jid = jail_get(jiov, 2, dflag ? JAIL_DYING : 0);
+ j->jid = jail_get(jiov, 2, 0);
}
static void
@@ -906,10 +856,9 @@
jid = jailparam_set(jp, njp, flags);
if (verbose > 0) {
- jail_note(j, "jail_set(%s%s)",
+ jail_note(j, "jail_set(%s)",
(flags & (JAIL_CREATE | JAIL_UPDATE)) == JAIL_CREATE
- ? "JAIL_CREATE" : "JAIL_UPDATE",
- (flags & JAIL_DYING) ? " | JAIL_DYING" : "");
+ ? "JAIL_CREATE" : "JAIL_UPDATE");
for (i = 0; i < njp; i++) {
printf(" %s", jp[i].jp_name);
if (jp[i].jp_value == NULL)

File Metadata

Mime Type
text/plain
Expires
Thu, Apr 16, 6:27 AM (7 h, 44 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
31584649
Default Alt Text
D28150.id82895.diff (52 KB)

Event Timeline