Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F152486188
D28150.id82895.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
52 KB
Referenced Files
None
Subscribers
None
D28150.id82895.diff
View Options
diff --git a/lib/libc/sys/jail.2 b/lib/libc/sys/jail.2
--- a/lib/libc/sys/jail.2
+++ b/lib/libc/sys/jail.2
@@ -25,7 +25,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd February 8, 2012
+.Dd January 25, 2021
.Dt JAIL 2
.Os
.Sh NAME
@@ -187,7 +187,12 @@
.Fn jail_attach
system call.
.It Dv JAIL_DYING
-Allow setting a jail that is in the process of being removed.
+This is deprecated and has no effect.
+It used to allow setting a jail that is in the process of being removed.
+Now such jails are always replaced when a new jail is created with the same
+.Va jid
+or
+.Va name .
.El
.Pp
The
diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c
@@ -710,6 +710,16 @@
*/
knote_fork(p1->p_klist, p2->p_pid);
+ /*
+ * See if the containing prison died while the process was still new.
+ */
+ if (!prison_isalive(p2->p_ucred->cr_prison)) {
+ /* Folow the prison into death. */
+ PROC_LOCK(p2);
+ kern_psignal(p2, SIGKILL);
+ PROC_UNLOCK(p2);
+ }
+
/*
* Now can be swapped.
*/
diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c
--- a/sys/kern/kern_jail.c
+++ b/sys/kern/kern_jail.c
@@ -106,6 +106,7 @@
.pr_path = "/",
.pr_securelevel = -1,
.pr_devfs_rsnum = 0,
+ .pr_state = PRISON_STATE_ALIVE,
.pr_childmax = JAIL_MAX,
.pr_hostuuid = DEFAULT_HOSTUUID,
.pr_children = LIST_HEAD_INITIALIZER(prison0.pr_children),
@@ -140,10 +141,16 @@
static int do_jail_attach(struct thread *td, struct prison *pr);
static void prison_complete(void *context, int pending);
static void prison_deref(struct prison *pr, int flags);
+static int prison_deref_lock(struct prison *pr, int flags);
+static int prison_deref_kill(struct prison *pr, struct prisonlist *freeprison);
+static void prison_deref_kill_descend(struct prison *pr);
+static int prison_deref_kill_ascend(struct prison *pr,
+ struct prisonlist *freeprison);
+static void prison_free_not_last(struct prison *pr);
+static void prison_proc_free_not_last(struct prison *pr);
static void prison_set_allow_locked(struct prison *pr, unsigned flag,
int enable);
static char *prison_path(struct prison *pr1, struct prison *pr2);
-static void prison_remove_one(struct prison *pr);
#ifdef RACCT
static void prison_racct_attach(struct prison *pr);
static void prison_racct_modify(struct prison *pr);
@@ -153,9 +160,10 @@
/* Flags for prison_deref */
#define PD_DEREF 0x01 /* Decrement pr_ref */
#define PD_DEUREF 0x02 /* Decrement pr_uref */
-#define PD_LOCKED 0x04 /* pr_mtx is held */
-#define PD_LIST_SLOCKED 0x08 /* allprison_lock is held shared */
-#define PD_LIST_XLOCKED 0x10 /* allprison_lock is held exclusive */
+#define PD_KILL 0x04 /* Remove jail, kill processes, etc */
+#define PD_LOCKED 0x08 /* pr_mtx is held */
+#define PD_LIST_SLOCKED 0x10 /* allprison_lock is held shared */
+#define PD_LIST_XLOCKED 0x20 /* allprison_lock is held exclusive */
/*
* Parameter names corresponding to PR_* flag values. Size values are for kvm
@@ -526,10 +534,10 @@
#endif
unsigned long hid;
size_t namelen, onamelen, pnamelen;
- int born, created, cuflags, descend, drflags, enforce;
+ int created, cuflags, descend, drflags, enforce;
int error, errmsg_len, errmsg_pos;
int gotchildmax, gotenforce, gothid, gotrsnum, gotslevel;
- int jid, jsys, len, level;
+ int jid, jsys, len, level, tjid;
int childmax, osreldt, rsnum, slevel;
#if defined(INET) || defined(INET6)
int ii, ij;
@@ -540,9 +548,8 @@
#ifdef INET6
int ip6s, redo_ip6;
#endif
- uint64_t pr_allow, ch_allow, pr_flags, ch_flags;
+ uint64_t pr_allow, ch_allow, pr_flags, ch_flags, tallow;
uint64_t pr_allow_diff;
- unsigned tallow;
char numbuf[12];
error = priv_check(td, PRIV_JAIL_SET);
@@ -550,9 +557,6 @@
error = priv_check(td, PRIV_JAIL_ATTACH);
if (error)
return (error);
- mypr = td->td_ucred->cr_prison;
- if ((flags & JAIL_CREATE) && mypr->pr_childmax == 0)
- return (EPERM);
if (flags & ~JAIL_SET_MASK)
return (EINVAL);
@@ -661,12 +665,6 @@
}
ch_flags |= jsf->new | jsf->disable;
}
- if ((flags & (JAIL_CREATE | JAIL_UPDATE | JAIL_ATTACH)) == JAIL_CREATE
- && !(pr_flags & PR_PERSIST)) {
- error = EINVAL;
- vfs_opterror(opts, "new jail must persist or attach");
- goto done_errmsg;
- }
#ifdef VIMAGE
if ((flags & JAIL_UPDATE) && (ch_flags & PR_VNET)) {
error = EINVAL;
@@ -984,9 +982,10 @@
* Find the specified jail, or at least its parent.
* This abuses the file error codes ENOENT and EEXIST.
*/
+ ppr = mypr = td->td_ucred->cr_prison;
pr = NULL;
- ppr = mypr;
inspr = NULL;
+ deadpr = NULL;
if (cuflags == JAIL_CREATE && jid == 0 && name != NULL) {
namelc = strrchr(name, '.');
jid = strtoul(namelc != NULL ? namelc + 1 : name, &p, 10);
@@ -1006,68 +1005,45 @@
* where it can be inserted later.
*/
TAILQ_FOREACH(inspr, &allprison, pr_list) {
- if (inspr->pr_id == jid) {
- mtx_lock(&inspr->pr_mtx);
- if (prison_isvalid(inspr)) {
- pr = inspr;
- drflags |= PD_LOCKED;
- inspr = NULL;
- } else
- mtx_unlock(&inspr->pr_mtx);
- break;
- }
+ if (inspr->pr_id < jid)
+ continue;
if (inspr->pr_id > jid)
break;
- }
- if (pr != NULL) {
- ppr = pr->pr_parent;
- /* Create: jid must not exist. */
- if (cuflags == JAIL_CREATE) {
- /*
- * Even creators that cannot see the jail will
- * get EEXIST.
- */
- error = EEXIST;
- vfs_opterror(opts, "jail %d already exists",
- jid);
- goto done_deref;
- }
- if (!prison_ischild(mypr, pr)) {
- /*
- * Updaters get ENOENT if they cannot see the
- * jail. This is true even for CREATE | UPDATE,
- * which normally cannot give this error.
- */
- error = ENOENT;
- vfs_opterror(opts, "jail %d not found", jid);
- goto done_deref;
- } else if (!prison_isalive(pr)) {
- if (!(flags & JAIL_DYING)) {
- error = ENOENT;
- vfs_opterror(opts, "jail %d is dying",
- jid);
- goto done_deref;
- } else if ((flags & JAIL_ATTACH) ||
- (pr_flags & PR_PERSIST)) {
- /*
- * A dying jail might be resurrected
- * (via attach or persist), but first
- * it must determine if another jail
- * has claimed its name. Accomplish
- * this by implicitly re-setting the
- * name.
- */
- if (name == NULL)
- name = prison_name(mypr, pr);
- }
- }
- } else {
- /* Update: jid must exist. */
- if (cuflags == JAIL_UPDATE) {
- error = ENOENT;
- vfs_opterror(opts, "jail %d not found", jid);
- goto done_deref;
+ if (inspr->pr_state != PRISON_STATE_DYING) {
+ /* The jail exists. */
+ pr = inspr;
+ mtx_lock(&pr->pr_mtx);
+ drflags |= PD_LOCKED;
+ ppr = pr->pr_parent;
+ } else {
+ /* Note a dying jail to handle later. */
+ deadpr = inspr;
}
+ inspr = NULL;
+ break;
+ }
+
+ if (cuflags == JAIL_CREATE && pr != NULL) {
+ /*
+ * Creators get EEXIST if the jail already exists,
+ * even if they cannot see it.
+ */
+ error = EEXIST;
+ vfs_opterror(opts, "jail %d already exists", jid);
+ goto done_deref;
+ }
+ if ((pr == NULL)
+ ? cuflags == JAIL_UPDATE
+ : !prison_isalive(pr) || !prison_ischild(mypr, pr)) {
+ /*
+ * Updaters get ENOENT for noexistent jails,
+ * or if the jail exists but they cannot see it.
+ * The latter case is true even for CREATE | UPDATE,
+ * which normally cannot give this error.
+ */
+ error = ENOENT;
+ vfs_opterror(opts, "jail %d not found", jid);
+ goto done_deref;
}
}
/*
@@ -1099,7 +1075,9 @@
} else {
*namelc = '\0';
ppr = prison_find_name(mypr, name);
- if (ppr == NULL) {
+ if (ppr == NULL || !prison_isalive(ppr)) {
+ if (ppr != NULL)
+ mtx_unlock(&ppr->pr_mtx);
error = ENOENT;
vfs_opterror(opts,
"jail \"%s\" not found", name);
@@ -1113,61 +1091,34 @@
if (namelc[0] != '\0') {
pnamelen =
(ppr == &prison0) ? 0 : strlen(ppr->pr_name) + 1;
- name_again:
- deadpr = NULL;
FOREACH_PRISON_CHILD(ppr, tpr) {
- if (tpr != pr &&
- !strcmp(tpr->pr_name + pnamelen, namelc)) {
- mtx_lock(&tpr->pr_mtx);
- if (prison_isalive(tpr)) {
- if (pr == NULL &&
- cuflags != JAIL_CREATE) {
- /*
- * Use this jail
- * for updates.
- */
- pr = tpr;
- drflags |= PD_LOCKED;
- break;
- }
- /*
- * Create, or update(jid):
- * name must not exist in an
- * active sibling jail.
- */
- error = EEXIST;
- mtx_unlock(&tpr->pr_mtx);
- vfs_opterror(opts,
- "jail \"%s\" already exists",
- name);
- goto done_deref;
- }
- if (pr == NULL &&
- cuflags != JAIL_CREATE &&
- prison_isvalid(tpr))
- deadpr = tpr;
- mtx_unlock(&tpr->pr_mtx);
- }
- }
- /* If no active jail is found, use a dying one. */
- if (deadpr != NULL && pr == NULL) {
- if (flags & JAIL_DYING) {
- mtx_lock(&deadpr->pr_mtx);
- if (!prison_isvalid(deadpr)) {
- mtx_unlock(&deadpr->pr_mtx);
- goto name_again;
- }
- pr = deadpr;
- drflags |= PD_LOCKED;
- } else if (cuflags == JAIL_UPDATE) {
- error = ENOENT;
+ if (tpr == pr ||
+ tpr->pr_state == PRISON_STATE_DYING ||
+ strcmp(tpr->pr_name + pnamelen, namelc))
+ continue;
+ if (cuflags == JAIL_CREATE || pr != NULL) {
+ /*
+ * Create, or update(jid): name must
+ * not exist in an active sibling jail.
+ */
+ error = EEXIST;
vfs_opterror(opts,
- "jail \"%s\" is dying", name);
+ "jail \"%s\" already exists", name);
goto done_deref;
}
+ /* Use this jail for updates. */
+ pr = tpr;
+ mtx_lock(&pr->pr_mtx);
+ drflags |= PD_LOCKED;
+ break;
}
- /* Update: name must exist if no jid. */
- else if (cuflags == JAIL_UPDATE && pr == NULL) {
+ /*
+ * Update: name must exist if no jid. As with the jid
+ * case, the jail must be currently visible, or else
+ * even CREATE | UPDATE will get an error.
+ */
+ if ((pr == NULL)
+ ? cuflags == JAIL_UPDATE : !prison_isalive(pr)) {
error = ENOENT;
vfs_opterror(opts, "jail \"%s\" not found",
name);
@@ -1182,7 +1133,7 @@
goto done_deref;
}
- /* If there's no prison to update, create a new one and link it in. */
+ /* If there's no prison to update, create a new one. */
created = pr == NULL;
if (created) {
for (tpr = mypr; tpr != NULL; tpr = tpr->pr_parent)
@@ -1191,36 +1142,75 @@
vfs_opterror(opts, "prison limit exceeded");
goto done_deref;
}
- mtx_lock(&ppr->pr_mtx);
- if (!prison_isvalid(ppr)) {
- mtx_unlock(&ppr->pr_mtx);
+ if (!prison_isalive(ppr)) {
error = ENOENT;
vfs_opterror(opts, "jail \"%s\" not found",
prison_name(mypr, ppr));
goto done_deref;
}
prison_hold(ppr);
- refcount_acquire(&ppr->pr_uref);
- mtx_unlock(&ppr->pr_mtx);
-
- if (jid == 0 && (jid = get_next_prid(&inspr)) == 0) {
- error = EAGAIN;
- vfs_opterror(opts, "no available jail IDs");
- pr = ppr;
- drflags |= PD_DEREF | PD_DEUREF;
- goto done_deref;
+ prison_proc_hold(ppr);
+
+ /*
+ * If no jid was explicitly given, or if a dying jail is being
+ * replaced, find free ID.
+ */
+ if (jid > 0 && deadpr == NULL)
+ tjid = jid;
+ else {
+ tjid = get_next_prid(&inspr);
+ if (tjid == 0) {
+ error = EAGAIN;
+ vfs_opterror(opts, "no available jail IDs");
+ pr = ppr;
+ drflags |= PD_DEREF | PD_DEUREF;
+ goto done_deref;
+ }
}
+ /*
+ * Start the prison with a reference, matching the one added
+ * to existing prisons.
+ */
pr = malloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO);
+ pr->pr_state = PRISON_STATE_INVALID;
+ refcount_init(&pr->pr_ref, 1);
+ refcount_init(&pr->pr_uref, 1);
+ drflags |= PD_DEREF | PD_DEUREF;
LIST_INIT(&pr->pr_children);
mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF | MTX_DUPOK);
TASK_INIT(&pr->pr_task, 0, prison_complete, pr);
- pr->pr_id = jid;
+ if (deadpr == NULL) {
+ pr->pr_id = jid = tjid;
+ tpr = pr;
+ } else {
+ pr->pr_id = jid;
+ /*
+ * The prison being created has the same ID as a dying
+ * one. Handle this by swapping the new prison with
+ * the dying one, and then re-adding the dying jail
+ * with the new ID. This may cause some confusion to
+ * user space, but only to those listing dying jails.
+ */
+ TAILQ_INSERT_BEFORE(deadpr, pr, pr_list);
+ TAILQ_REMOVE(&allprison, deadpr, pr_list);
+ if (inspr == deadpr)
+ inspr = pr;
+ mtx_lock(&deadpr->pr_mtx);
+ deadpr->pr_id = tjid;
+ mtx_unlock(&deadpr->pr_mtx);
+ tpr = deadpr;
+ }
+
+ /*
+ * Link the prison into the allprison list in ID order,
+ * and into its parent's child list in no particular order.
+ */
if (inspr != NULL)
- TAILQ_INSERT_BEFORE(inspr, pr, pr_list);
+ TAILQ_INSERT_BEFORE(inspr, tpr, pr_list);
else
- TAILQ_INSERT_TAIL(&allprison, pr, pr_list);
+ TAILQ_INSERT_TAIL(&allprison, tpr, pr_list);
pr->pr_parent = ppr;
LIST_INSERT_HEAD(&ppr->pr_children, pr, pr_sibling);
@@ -1305,18 +1295,14 @@
mtx_lock(&pr->pr_mtx);
drflags |= PD_LOCKED;
- /*
- * New prisons do not yet have a reference, because we do not
- * want others to see the incomplete prison once the
- * allprison_lock is downgraded.
- */
} else {
/*
* Grab a reference for existing prisons, to ensure they
* continue to exist for the duration of the call.
*/
prison_hold(pr);
- drflags |= PD_DEREF;
+ prison_proc_hold(pr);
+ drflags |= PD_DEREF | PD_DEUREF;
#if defined(VIMAGE) && (defined(INET) || defined(INET6))
if ((pr->pr_flags & PR_VNET) &&
(ch_flags & (PR_IP4_USER | PR_IP6_USER))) {
@@ -1434,7 +1420,7 @@
#ifdef VIMAGE
(tpr != tppr && (tpr->pr_flags & PR_VNET)) ||
#endif
- refcount_load(&tpr->pr_uref) == 0) {
+ tpr->pr_state == PRISON_STATE_DYING) {
descend = 0;
continue;
}
@@ -1502,7 +1488,7 @@
#ifdef VIMAGE
(tpr != tppr && (tpr->pr_flags & PR_VNET)) ||
#endif
- refcount_load(&tpr->pr_uref) == 0) {
+ tpr->pr_state == PRISON_STATE_DYING) {
descend = 0;
continue;
}
@@ -1731,23 +1717,26 @@
prison_set_allow_locked(pr, tallow, 0);
/*
* Persistent prisons get an extra reference, and prisons losing their
- * persist flag lose that reference. Only do this for existing prisons
- * for now, so new ones will remain unseen until after the module
- * handlers have completed.
+ * persist flag lose that reference.
*/
- born = !prison_isalive(pr);
- if (!created && (ch_flags & PR_PERSIST & (pr_flags ^ pr->pr_flags))) {
+ if (ch_flags & PR_PERSIST & (pr_flags ^ pr->pr_flags)) {
if (pr_flags & PR_PERSIST) {
prison_hold(pr);
- refcount_acquire(&pr->pr_uref);
+ prison_proc_hold(pr);
} else {
- refcount_release(&pr->pr_ref);
- drflags |= PD_DEUREF;
+ prison_proc_free_not_last(pr);
+ prison_free_not_last(pr);
}
}
pr->pr_flags = (pr->pr_flags & ~ch_flags) | pr_flags;
mtx_unlock(&pr->pr_mtx);
drflags &= ~PD_LOCKED;
+ /*
+ * Any errors past this point will need to de-persist newly created
+ * prisons, as well as call remove methods.
+ */
+ if (created)
+ drflags |= PD_KILL;
#ifdef RACCT
if (racct_enable && created)
@@ -1807,19 +1796,14 @@
/* Let the modules do their work. */
sx_downgrade(&allprison_lock);
drflags = (drflags & ~PD_LIST_XLOCKED) | PD_LIST_SLOCKED;
- if (born) {
+ if (created) {
error = osd_jail_call(pr, PR_METHOD_CREATE, opts);
- if (error) {
- (void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL);
+ if (error)
goto done_deref;
- }
}
error = osd_jail_call(pr, PR_METHOD_SET, opts);
- if (error) {
- if (born)
- (void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL);
+ if (error)
goto done_deref;
- }
/* Attach this process to the prison if requested. */
if (flags & JAIL_ATTACH) {
@@ -1827,10 +1811,6 @@
error = do_jail_attach(td, pr);
drflags &= ~PD_LIST_SLOCKED;
if (error) {
- if (created) {
- /* do_jail_attach has removed the prison. */
- pr = NULL;
- }
vfs_opterror(opts, "attach failed");
goto done_deref;
}
@@ -1846,25 +1826,39 @@
}
#endif
- td->td_retval[0] = pr->pr_id;
-
+ /*
+ * Now that everything is done, a newly created prison should be alive,
+ * either from persistence, attaching, or perhaps a module parameter.
+ */
if (created) {
+ drflags = prison_deref_lock(pr, drflags);
+ if (!prison_isalive(ppr)) {
+ /*
+ * The parent prison died while this one was being
+ * created.
+ */
+ error = ENOENT;
+ vfs_opterror(opts, "jail \"%s\" not found",
+ prison_name(mypr, ppr));
+ goto done_deref;
+ }
/*
- * Add a reference to newly created persistent prisons
- * (which was not done earlier so that the prison would
- * not be publicly visible).
+ * We are holding one temporary user reference, so there
+ * must be more than that for the prison to continue to
+ * exist. That usually comes from persistence or attaching,
+ * though modules may also add a reference.
*/
- if (pr_flags & PR_PERSIST) {
- mtx_lock(&pr->pr_mtx);
- drflags |= PD_LOCKED;
- refcount_acquire(&pr->pr_ref);
- refcount_acquire(&pr->pr_uref);
- } else {
- /* Non-persistent jails need no further changes. */
- pr = NULL;
+ if (atomic_load_acq_int(&pr->pr_uref) <= 1) {
+ error = EINVAL;
+ vfs_opterror(opts, "new jail must persist or attach");
+ goto done_deref;
}
+ pr->pr_state = PRISON_STATE_ALIVE;
+ drflags &= ~PD_KILL;
}
+ td->td_retval[0] = pr->pr_id;
+
done_deref:
/* Release any temporary prison holds and/or locks. */
if (pr != NULL)
@@ -1933,13 +1927,8 @@
TAILQ_FOREACH(inspr, &allprison, pr_list) {
if (inspr->pr_id < jid)
continue;
- if (inspr->pr_id > jid ||
- refcount_load(&inspr->pr_ref) == 0) {
- /*
- * Found an opening. This may be a gap
- * in the list, or a dead jail with the
- * same ID.
- */
+ if (inspr->pr_id > jid) {
+ /* Found an opening. */
maxid = 0;
break;
}
@@ -2028,15 +2017,13 @@
error = vfs_copyopt(opts, "lastjid", &jid, sizeof(jid));
if (error == 0) {
TAILQ_FOREACH(pr, &allprison, pr_list) {
- if (pr->pr_id > jid && prison_ischild(mypr, pr)) {
- mtx_lock(&pr->pr_mtx);
- if ((flags & JAIL_DYING)
- ? prison_isvalid(pr) : prison_isalive(pr))
+ if (pr->pr_id > jid && prison_ischild(mypr, pr) &&
+ ((flags & JAIL_DYING)
+ ? prison_isvalid(pr) : prison_isalive(pr)))
break;
- mtx_unlock(&pr->pr_mtx);
- }
}
if (pr != NULL) {
+ mtx_lock(&pr->pr_mtx);
drflags |= PD_LOCKED;
goto found_prison;
}
@@ -2052,8 +2039,8 @@
pr = prison_find_child(mypr, jid);
if (pr != NULL) {
drflags |= PD_LOCKED;
- if (!(prison_isalive(pr) ||
- (flags & JAIL_DYING))) {
+ if (!((flags & JAIL_DYING) ||
+ prison_isalive(pr))) {
error = ENOENT;
vfs_opterror(opts, "jail %d is dying",
jid);
@@ -2077,7 +2064,7 @@
pr = prison_find_name(mypr, name);
if (pr != NULL) {
drflags |= PD_LOCKED;
- if (!(prison_isalive(pr) || (flags & JAIL_DYING))) {
+ if (!((flags & JAIL_DYING) || prison_isalive(pr))) {
error = ENOENT;
vfs_opterror(opts, "jail \"%s\" is dying",
name);
@@ -2295,8 +2282,8 @@
int
sys_jail_remove(struct thread *td, struct jail_remove_args *uap)
{
- struct prison *pr, *cpr, *lpr, *tpr;
- int descend, error;
+ struct prison *pr;
+ int error;
error = priv_check(td, PRIV_JAIL_REMOVE);
if (error)
@@ -2308,86 +2295,16 @@
sx_xunlock(&allprison_lock);
return (EINVAL);
}
-
- /* Remove all descendants of this prison, then remove this prison. */
- prison_hold(pr);
- if (!LIST_EMPTY(&pr->pr_children)) {
+ if (!prison_isalive(pr)) {
+ /* Silently ignore already-dying prisons. */
mtx_unlock(&pr->pr_mtx);
- lpr = NULL;
- FOREACH_PRISON_DESCENDANT(pr, cpr, descend) {
- mtx_lock(&cpr->pr_mtx);
- if (prison_isvalid(cpr)) {
- tpr = cpr;
- prison_hold(cpr);
- } else {
- /* Already removed - do not do it again. */
- tpr = NULL;
- }
- mtx_unlock(&cpr->pr_mtx);
- if (lpr != NULL) {
- mtx_lock(&lpr->pr_mtx);
- prison_remove_one(lpr);
- sx_xlock(&allprison_lock);
- }
- lpr = tpr;
- }
- if (lpr != NULL) {
- mtx_lock(&lpr->pr_mtx);
- prison_remove_one(lpr);
- sx_xlock(&allprison_lock);
- }
- mtx_lock(&pr->pr_mtx);
+ sx_xunlock(&allprison_lock);
+ return (0);
}
- prison_remove_one(pr);
+ prison_deref(pr, PD_KILL | PD_LOCKED | PD_LIST_XLOCKED);
return (0);
}
-static void
-prison_remove_one(struct prison *pr)
-{
- struct proc *p;
- int drflags;
-
- drflags = PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED;
-
- /* If the prison was persistent, it is not anymore. */
- if (pr->pr_flags & PR_PERSIST) {
- refcount_release(&pr->pr_ref);
- drflags |= PD_DEUREF;
- pr->pr_flags &= ~PR_PERSIST;
- }
-
- /*
- * jail_remove added a reference. If that's the only one, remove
- * the prison now. refcount(9) doesn't guarantee the cache coherence
- * of non-zero counters, so force it here.
- */
- KASSERT(refcount_load(&pr->pr_ref) > 0,
- ("prison_remove_one removing a dead prison (jid=%d)", pr->pr_id));
- if (atomic_load_acq_int(&pr->pr_ref) == 1) {
- prison_deref(pr, drflags);
- return;
- }
-
- mtx_unlock(&pr->pr_mtx);
- sx_xunlock(&allprison_lock);
- drflags &= ~(PD_LOCKED | PD_LIST_XLOCKED);
- /*
- * Kill all processes unfortunate enough to be attached to this prison.
- */
- sx_slock(&allproc_lock);
- FOREACH_PROC_IN_SYSTEM(p) {
- PROC_LOCK(p);
- if (p->p_state != PRS_NEW && p->p_ucred &&
- p->p_ucred->cr_prison == pr)
- kern_psignal(p, SIGKILL);
- PROC_UNLOCK(p);
- }
- sx_sunlock(&allproc_lock);
- /* Remove the temporary reference added by jail_remove. */
- prison_deref(pr, drflags);
-}
-
/*
* struct jail_attach_args {
* int jid;
@@ -2403,14 +2320,7 @@
if (error)
return (error);
- /*
- * Start with exclusive hold on allprison_lock to ensure that a possible
- * PR_METHOD_REMOVE call isn't concurrent with jail_set or jail_remove.
- * But then immediately downgrade it since we don't need to stop
- * readers.
- */
- sx_xlock(&allprison_lock);
- sx_downgrade(&allprison_lock);
+ sx_slock(&allprison_lock);
pr = prison_find_child(td->td_ucred->cr_prison, uap->jid);
if (pr == NULL) {
sx_sunlock(&allprison_lock);
@@ -2442,8 +2352,8 @@
* a process root from one prison, but attached to the jail
* of another.
*/
- refcount_acquire(&pr->pr_ref);
- refcount_acquire(&pr->pr_uref);
+ prison_hold(pr);
+ prison_proc_hold(pr);
mtx_unlock(&pr->pr_mtx);
/* Let modules do whatever they need to prepare for attaching. */
@@ -2490,6 +2400,18 @@
#endif
prison_deref(oldcred->cr_prison, PD_DEREF | PD_DEUREF);
crfree(oldcred);
+
+ /*
+ * See if the target prison died between unlocking the prison
+ * and changing the credentials.
+ */
+ if (pr->pr_state == PRISON_STATE_DYING) {
+ /* Follow the prison into death. */
+ PROC_LOCK(p);
+ kern_psignal(p, SIGKILL);
+ PROC_UNLOCK(p);
+ }
+
return (0);
e_unlock:
@@ -2513,14 +2435,14 @@
sx_assert(&allprison_lock, SX_LOCKED);
TAILQ_FOREACH(pr, &allprison, pr_list) {
if (pr->pr_id == prid) {
- mtx_lock(&pr->pr_mtx);
- if (prison_isvalid(pr))
+ if (prison_isvalid(pr)) {
+ mtx_lock(&pr->pr_mtx);
return (pr);
+ }
/*
* Any active prison with the same ID would have
* been inserted before a dead one.
*/
- mtx_unlock(&pr->pr_mtx);
break;
}
if (pr->pr_id > prid)
@@ -2540,11 +2462,9 @@
sx_assert(&allprison_lock, SX_LOCKED);
FOREACH_PRISON_DESCENDANT(mypr, pr, descend) {
- if (pr->pr_id == prid) {
+ if (pr->pr_id == prid && prison_isvalid(pr)) {
mtx_lock(&pr->pr_mtx);
- if (prison_isvalid(pr))
- return (pr);
- mtx_unlock(&pr->pr_mtx);
+ return (pr);
}
}
return (NULL);
@@ -2562,26 +2482,20 @@
sx_assert(&allprison_lock, SX_LOCKED);
mylen = (mypr == &prison0) ? 0 : strlen(mypr->pr_name) + 1;
- again:
deadpr = NULL;
FOREACH_PRISON_DESCENDANT(mypr, pr, descend) {
if (!strcmp(pr->pr_name + mylen, name)) {
- mtx_lock(&pr->pr_mtx);
- if (prison_isalive(pr))
+ if (prison_isalive(pr)) {
+ mtx_lock(&pr->pr_mtx);
return (pr);
+ }
if (prison_isvalid(pr))
deadpr = pr;
- mtx_unlock(&pr->pr_mtx);
}
}
/* There was no valid prison - perhaps there was a dying one. */
- if (deadpr != NULL) {
+ if (deadpr != NULL)
mtx_lock(&deadpr->pr_mtx);
- if (!prison_isvalid(deadpr)) {
- mtx_unlock(&deadpr->pr_mtx);
- goto again;
- }
- }
return (deadpr);
}
@@ -2624,8 +2538,9 @@
prison_hold(struct prison *pr)
{
#ifdef INVARIANTS
- int was_valid = refcount_acquire_if_not_zero(&pr->pr_ref);
+ int was_valid;
+ was_valid = refcount_acquire_if_not_zero(&pr->pr_ref);
KASSERT(was_valid,
("Trying to hold dead prison %p (jid=%d).", pr, pr->pr_id));
#else
@@ -2635,45 +2550,52 @@
/*
* Remove a prison reference. If that was the last reference, the
- * prison will be removed (at a later time). Return with the prison
- * unlocked.
+ * prison will be removed (at a later time).
*/
void
prison_free_locked(struct prison *pr)
{
- int lastref;
- mtx_assert(&pr->pr_mtx, MA_OWNED);
+ /*
+ * Locking is no longer required, but unlock because the caller
+ * expects it.
+ */
+ mtx_unlock(&pr->pr_mtx);
+ prison_free(pr);
+}
+
+void
+prison_free(struct prison *pr)
+{
+
KASSERT(refcount_load(&pr->pr_ref) > 0,
("Trying to free dead prison %p (jid=%d).",
pr, pr->pr_id));
- lastref = refcount_release(&pr->pr_ref);
- mtx_unlock(&pr->pr_mtx);
- if (lastref) {
+ if (!refcount_release_if_not_last(&pr->pr_ref)) {
/*
- * Don't remove the prison itself in this context,
+ * Don't remove the last reference in this context,
* in case there are locks held.
*/
taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
}
}
-void
-prison_free(struct prison *pr)
+static void
+prison_free_not_last(struct prison *pr)
{
+#ifdef INVARIANTS
+ int lastref;
- /*
- * Locking is only required when releasing the last reference.
- * This allows assurance that a locked prison will remain valid
- * until it is unlocked.
- */
KASSERT(refcount_load(&pr->pr_ref) > 0,
("Trying to free dead prison %p (jid=%d).",
pr, pr->pr_id));
- if (refcount_release_if_not_last(&pr->pr_ref))
- return;
- mtx_lock(&pr->pr_mtx);
- prison_free_locked(pr);
+ lastref = refcount_release(&pr->pr_ref);
+ KASSERT(!lastref,
+ ("prison_free_not_last freed last ref on prison %p (jid=%d).",
+ pr, pr->pr_id));
+#else
+ refcount_release(&pr>pr_ref);
+#endif
}
/*
@@ -2682,15 +2604,17 @@
* user-visible, except through the the jail system calls. It is also
* an error to hold an invalid prison. A prison record will remain
* alive as long as it has at least one user reference, and will not
- * be set to the dying state was long as the prison mutex is held.
+ * be set to the dying state was long as either the prison mutex or
+ * the allprison lock is held (allprison_lock may be shared).
*/
void
prison_proc_hold(struct prison *pr)
{
#ifdef INVARIANTS
- int was_alive = refcount_acquire_if_not_zero(&pr->pr_uref);
+ int was_alive;
- KASSERT(was_alive,
+ was_alive = refcount_acquire_if_not_zero(&pr->pr_uref);
+ KASSERT(was_alive && refcount_load(&pr->pr_ref) > 0,
("Cannot add a process to a non-alive prison (jid=%d)", pr->pr_id));
#else
refcount_acquire(&pr->pr_uref);
@@ -2706,13 +2630,8 @@
prison_proc_free(struct prison *pr)
{
- /*
- * Locking is only required when releasing the last reference.
- * This allows assurance that a locked prison will remain alive
- * until it is unlocked.
- */
KASSERT(refcount_load(&pr->pr_uref) > 0,
- ("Trying to kill a process in a dead prison (jid=%d)", pr->pr_id));
+ ("Trying to kill a process in a dying prison (jid=%d)", pr->pr_id));
if (!refcount_release_if_not_last(&pr->pr_uref)) {
/*
* Don't remove the last user reference in this context,
@@ -2720,11 +2639,28 @@
* but also half dead. Add a reference so any calls to
* prison_free() won't re-submit the task.
*/
- refcount_acquire(&pr->pr_ref);
+ prison_hold(pr);
taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
}
}
+static void
+prison_proc_free_not_last(struct prison *pr)
+{
+#ifdef INVARIANTS
+ int lastref;
+
+ KASSERT(refcount_load(&pr->pr_uref) > 0,
+ ("Trying to kill a process in a dead prison (jid=%d)", pr->pr_id));
+ lastref = refcount_release(&pr->pr_uref);
+ KASSERT(!lastref,
+ ("prison_proc_free_not_last freed last uref on prison %p (jid=%d).",
+ pr, pr->pr_id));
+#else
+ refcount_release(&pr>pr_uref);
+#endif
+}
+
/*
* Complete a call to either prison_free or prison_proc_free.
*/
@@ -2732,129 +2668,350 @@
prison_complete(void *context, int pending)
{
struct prison *pr = context;
+ int flags;
- sx_xlock(&allprison_lock);
- mtx_lock(&pr->pr_mtx);
/*
- * If this is completing a call to prison_proc_free, there will still
- * be a user reference held; clear that as well as the reference that
- * was added. No references are expected if this is completing a call
- * to prison_free, but prison_deref is still called for the cleanup.
+ * This could be called to release the last reference, or the
+ * last user reference; the existence of a user reference implies
+ * the latter. There will always be a reference to remove, as
+ * prison_proc_free adds one.
*/
- prison_deref(pr, refcount_load(&pr->pr_uref) > 0
- ? PD_DEREF | PD_DEUREF | PD_LOCKED | PD_LIST_XLOCKED
- : PD_LOCKED | PD_LIST_XLOCKED);
+ flags = prison_deref_lock(pr, PD_DEREF);
+ if (refcount_load(&pr->pr_uref) > 0)
+ flags |= PD_DEUREF;
+ prison_deref(pr, flags);
}
/*
- * Remove a prison reference and/or user reference (usually).
+ * Remove a prison reference and/or user reference (usually), which
+ * may or may not end up removing the prison itself, or putting it into
+ * a "dying" state while it cleans itself up. Optionally forcibly
+ * remove a prison and its descendents, including killing all associated
+ * processes.
+ *
* This assumes context that allows sleeping (for allprison_lock),
* with no non-sleeping locks held, except perhaps the prison itself.
- * If there are no more references, release and delist the prison.
* On completion, the prison lock and the allprison lock are both
* unlocked.
*/
static void
prison_deref(struct prison *pr, int flags)
{
- struct prison *ppr, *tpr;
- int lastref, lasturef;
+ struct prisonlist freeprison;
+ struct prison *killpr, *rpr, *ppr, *tpr;
+ struct proc *p;
+ int killflags;
- if (!(flags & PD_LOCKED))
- mtx_lock(&pr->pr_mtx);
+ killpr = NULL;
+ TAILQ_INIT(&freeprison);
+ /*
+ * Release this prison as requested, which may cause its parent to be
+ * released, and then maybe its grandparent, etc.
+ */
for (;;) {
+ killflags = 0;
+ if (flags & PD_KILL) {
+ /* Kill the prison and its descendents. */
+ flags &= ~PD_KILL;
+ flags = prison_deref_lock(pr, flags);
+ killflags = prison_deref_kill(pr, &freeprison);
+ if (killflags & PD_KILL)
+ killpr = pr;
+ }
if (flags & PD_DEUREF) {
+ /* Drop a user reference. */
+ flags &= ~PD_DEUREF;
KASSERT(refcount_load(&pr->pr_uref) > 0,
- ("prison_deref PD_DEUREF on a dead prison (jid=%d)",
+ ("prison_deref PD_DEUREF on a dying prison (jid=%d)",
pr->pr_id));
- lasturef = refcount_release(&pr->pr_uref);
- if (lasturef)
- refcount_acquire(&pr->pr_ref);
- KASSERT(refcount_load(&prison0.pr_uref) > 0,
- ("prison0 pr_uref=0"));
- } else
- lasturef = 0;
+ if (!refcount_release_if_not_last(&pr->pr_uref)) {
+ flags = prison_deref_lock(pr, flags);
+ if (refcount_release(&pr->pr_uref) &&
+ pr->pr_state != PRISON_STATE_DYING) {
+ /*
+ * When the last user reference goes,
+ * this becomes a DYING prison (unless
+ * it was one already).
+ */
+ KASSERT(
+ refcount_load(&prison0.pr_ref) != 0,
+ ("prison0 pr_ref=0"));
+ if (pr->pr_state == PRISON_STATE_ALIVE)
+ {
+ prison_hold(pr);
+ mtx_unlock(&pr->pr_mtx);
+ (void)osd_jail_call(pr,
+ PR_METHOD_REMOVE, NULL);
+ mtx_lock(&pr->pr_mtx);
+ if (!(flags & PD_DEREF))
+ flags |= PD_DEREF;
+ else
+ prison_free_not_last(
+ pr);
+ }
+ pr->pr_state = PRISON_STATE_DYING;
+ for (ppr = pr->pr_parent;
+ ppr != NULL;
+ ppr = ppr->pr_parent)
+ ppr->pr_childcount--;
+ /* This now refers to the parent. */
+ flags |= PD_DEUREF;
+ }
+ }
+ }
if (flags & PD_DEREF) {
+ /* Drop a reference. */
+ flags &= ~PD_DEREF;
KASSERT(refcount_load(&pr->pr_ref) > 0,
("prison_deref PD_DEREF on a dead prison (jid=%d)",
pr->pr_id));
- lastref = refcount_release(&pr->pr_ref);
- }
- else
- lastref = refcount_load(&pr->pr_ref) == 0;
- mtx_unlock(&pr->pr_mtx);
-
- /*
- * Tell the modules if the last user reference was removed
- * (even it sticks around in dying state).
- */
- if (lasturef) {
- if (!(flags & (PD_LIST_SLOCKED | PD_LIST_XLOCKED))) {
- if (atomic_load_acq_int(&pr->pr_ref) > 1) {
- sx_slock(&allprison_lock);
- flags |= PD_LIST_SLOCKED;
- } else {
- sx_xlock(&allprison_lock);
- flags |= PD_LIST_XLOCKED;
+ if (!refcount_release_if_not_last(&pr->pr_ref)) {
+ flags = prison_deref_lock(pr, flags);
+ if (refcount_release(&pr->pr_ref)) {
+ /*
+ * When the last reference goes,
+ * prepare to remove the prison.
+ */
+ KASSERT(
+ refcount_load(&pr->pr_uref) == 0,
+ ("prison_deref: last ref, "
+ "but still has %d urefs (jid=%d)",
+ pr->pr_uref, pr->pr_id));
+ KASSERT(
+ refcount_load(&prison0.pr_ref) != 0,
+ ("prison0 pr_ref=0"));
+ TAILQ_REMOVE(&allprison, pr, pr_list);
+ TAILQ_INSERT_TAIL(&freeprison, pr,
+ pr_list);
+ LIST_REMOVE(pr, pr_sibling);
+ /* This now refers to the parent. */
+ flags |= PD_DEREF;
}
}
- (void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL);
- mtx_lock(&pr->pr_mtx);
- lastref = refcount_release(&pr->pr_ref);
+ }
+ flags |= killflags & (PD_DEUREF | PD_DEREF);
+ if (flags & (PD_DEUREF | PD_DEREF)) {
+ /*
+ * A prison was marked as dying or removed, which
+ * means its parent now drops a reference.
+ */
mtx_unlock(&pr->pr_mtx);
+ pr = pr->pr_parent;
+ mtx_lock(&pr->pr_mtx);
+ continue;
}
+ break;
+ }
- /* If the prison still has references, nothing else to do. */
- if (!lastref) {
- if (flags & PD_LIST_SLOCKED)
- sx_sunlock(&allprison_lock);
- else if (flags & PD_LIST_XLOCKED)
- sx_xunlock(&allprison_lock);
- return;
- }
+ /* Release all the prison locks. */
+ if (flags & PD_LOCKED)
+ mtx_unlock(&pr->pr_mtx);
+ if (flags & PD_LIST_SLOCKED)
+ sx_sunlock(&allprison_lock);
+ else if (flags & PD_LIST_XLOCKED)
+ sx_xunlock(&allprison_lock);
- if (flags & PD_LIST_SLOCKED) {
- if (!sx_try_upgrade(&allprison_lock)) {
- sx_sunlock(&allprison_lock);
- sx_xlock(&allprison_lock);
+ if (killpr != NULL) {
+ /*
+ * The killed prison or descendants still had some user
+ * references, which are likely attached processes.
+ * So find and kill any such processes.
+ */
+ sx_slock(&allproc_lock);
+ FOREACH_PROC_IN_SYSTEM(p) {
+ PROC_LOCK(p);
+ if (p->p_state != PRS_NEW && p->p_ucred != NULL) {
+ for (ppr = p->p_ucred->cr_prison;
+ ppr != &prison0;
+ ppr = ppr->pr_parent)
+ if (ppr == killpr) {
+ kern_psignal(p, SIGKILL);
+ break;
+ }
}
- } else if (!(flags & PD_LIST_XLOCKED))
- sx_xlock(&allprison_lock);
-
- TAILQ_REMOVE(&allprison, pr, pr_list);
- LIST_REMOVE(pr, pr_sibling);
- ppr = pr->pr_parent;
- for (tpr = ppr; tpr != NULL; tpr = tpr->pr_parent)
- tpr->pr_childcount--;
- sx_xunlock(&allprison_lock);
+ PROC_UNLOCK(p);
+ }
+ sx_sunlock(&allproc_lock);
+ }
+ TAILQ_FOREACH_SAFE(rpr, &freeprison, pr_list, tpr) {
+ /*
+ * Finish removing unreferenced prisons, which couldn't happen
+ * while allprison_lock was held (to avoid a LOR on vrele).
+ */
#ifdef VIMAGE
- if (pr->pr_vnet != ppr->pr_vnet)
- vnet_destroy(pr->pr_vnet);
+ if (rpr->pr_vnet != rpr->pr_parent->pr_vnet)
+ vnet_destroy(rpr->pr_vnet);
#endif
- if (pr->pr_root != NULL)
- vrele(pr->pr_root);
- mtx_destroy(&pr->pr_mtx);
+ if (rpr->pr_root != NULL)
+ vrele(rpr->pr_root);
+ mtx_destroy(&rpr->pr_mtx);
#ifdef INET
- free(pr->pr_ip4, M_PRISON);
+ free(rpr->pr_ip4, M_PRISON);
#endif
#ifdef INET6
- free(pr->pr_ip6, M_PRISON);
+ free(rpr->pr_ip6, M_PRISON);
#endif
- if (pr->pr_cpuset != NULL)
- cpuset_rel(pr->pr_cpuset);
- osd_jail_exit(pr);
+ if (rpr->pr_cpuset != NULL)
+ cpuset_rel(rpr->pr_cpuset);
+ osd_jail_exit(rpr);
#ifdef RACCT
if (racct_enable)
- prison_racct_detach(pr);
+ prison_racct_detach(rpr);
#endif
- free(pr, M_PRISON);
+ TAILQ_REMOVE(&freeprison, rpr, pr_list);
+ free(rpr, M_PRISON);
+ }
+}
+
+/*
+ * Make sure allprison_lock is held exclusive, and the prison is locked.
+ * Return the new deref flags.
+ */
+static int
+prison_deref_lock(struct prison *pr, int flags)
+{
- /* Removing a prison frees a reference on its parent. */
- pr = ppr;
+ if (!(flags & PD_LIST_XLOCKED)) {
+ /*
+ * Get allprison_lock, which may be an upgrade,
+ * and may require unlocking the prison.
+ */
+ if (flags & PD_LOCKED) {
+ mtx_lock(&pr->pr_mtx);
+ flags &= PD_LOCKED;
+ }
+ if (flags & PD_LIST_SLOCKED) {
+ if (!sx_try_upgrade(&allprison_lock)) {
+ sx_sunlock(&allprison_lock);
+ sx_xlock(&allprison_lock);
+ }
+ flags &= ~PD_LIST_SLOCKED;
+ } else
+ sx_xlock(&allprison_lock);
+ flags |= PD_LIST_XLOCKED;
+ }
+ if (!(flags & PD_LOCKED)) {
+ /* Lock the prison mutex. */
mtx_lock(&pr->pr_mtx);
- flags = PD_DEREF | PD_DEUREF;
+ flags |= PD_LOCKED;
}
+ return flags;
+}
+
+/*
+ * Kill the prison and its descendants. Mark them as dying, clear the
+ * persist flag, and call module remove methods. Return flags
+ * indicating further action required.
+ */
+static int
+prison_deref_kill(struct prison *pr, struct prisonlist *freeprison)
+{
+ struct prison *cpr, *ppr;
+ int flags, cflags, killed;
+ bool descend;
+
+ /* If the prison is already dying, there's nothing to kill. */
+ if (pr->pr_state == PRISON_STATE_DYING)
+ return 0;
+ /*
+ * The operation on the prison and each descendant is similar to what
+ * prison_deref() does when losing the last user or system reference,
+ * plus extra work to clear PR_PERSIST.
+ */
+ flags = 0;
+ killed = 1;
+ prison_deref_kill_descend(pr);
+
+ FOREACH_PRISON_DESCENDANT_PRE_POST(pr, cpr, descend) {
+ if (descend) {
+ if (cpr->pr_state != PRISON_STATE_ALIVE) {
+ /*
+ * Only kill alive descendents, as dying
+ * prison don't need killing, and new
+ * prisons will kill themselves later.
+ */
+ descend = false;
+ continue;
+ }
+ killed++;
+ mtx_lock(&cpr->pr_mtx);
+ prison_deref_kill_descend(cpr);
+ } else {
+ /*
+ * PR_REMOVE should only be set within this function,
+ * with its exclusive hold on allprison_lock, so we
+ * don't need to worry about other threads' caches.
+ */
+ if (!(cpr->pr_flags & PR_REMOVE))
+ continue;
+ cflags = prison_deref_kill_ascend(cpr, freeprison);
+ flags |= (cflags & PD_KILL);
+ mtx_unlock(&cpr->pr_mtx);
+ if (!refcount_release_if_not_last(
+ &cpr->pr_parent->pr_uref)) {
+ mtx_lock(&cpr->pr_parent->pr_mtx);
+ (void)refcount_release(
+ &cpr->pr_parent->pr_uref);
+ mtx_unlock(&cpr->pr_parent->pr_mtx);
+ }
+ if (cflags & PD_DEREF)
+ prison_free_not_last(cpr->pr_parent);
+ }
+ }
+
+ flags |= prison_deref_kill_ascend(pr, freeprison);
+ for (ppr = pr->pr_parent;
+ ppr != NULL;
+ ppr = ppr->pr_parent)
+ ppr->pr_childcount -= killed;
+
+ /*
+ * Disconnect unreferenced descendant prisons from their parents,
+ * which couldn't easily be done mid-loop.
+ */
+ TAILQ_FOREACH(cpr, freeprison, pr_list)
+ LIST_REMOVE(cpr, pr_sibling);
+ return flags;
+}
+
+static void
+prison_deref_kill_descend(struct prison *pr)
+{
+
+ pr->pr_state = PRISON_STATE_DYING;
+ pr->pr_flags |= PR_REMOVE;
+ prison_hold(pr);
+ mtx_unlock(&pr->pr_mtx);
+}
+
+static int
+prison_deref_kill_ascend(struct prison *pr, struct prisonlist *freeprison)
+{
+ int flags;
+
+ flags = PD_DEUREF;
+ (void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL);
+ mtx_lock(&pr->pr_mtx);
+ pr->pr_flags &= ~PR_REMOVE;
+ if (pr->pr_flags & PR_PERSIST) {
+ pr->pr_flags &= ~PR_PERSIST;
+ if (!refcount_release(&pr->pr_uref))
+ flags |= PD_KILL;
+ prison_free_not_last(pr);
+ } else if (refcount_load(&pr->pr_uref) > 0)
+ flags |= PD_KILL;
+ if (refcount_release(&pr->pr_ref)) {
+ KASSERT(refcount_load(&pr->pr_uref) == 0,
+ ("prison_deref_kill: last ref, "
+ "but still has %d urefs (jid=%d)",
+ pr->pr_uref, pr->pr_id));
+ TAILQ_REMOVE(&allprison, pr, pr_list);
+ TAILQ_INSERT_TAIL(freeprison, pr, pr_list);
+ flags |= PD_DEREF;
+ }
+ pr->pr_childcount = 0;
+ return flags;
}
/*
@@ -3019,32 +3176,34 @@
}
/*
- * Return true if the prison is currently alive. A prison is alive if it is
- * valid and it holds user references.
+ * Return true if the prison is currently alive.
*/
bool
prison_isalive(struct prison *pr)
{
- mtx_assert(&pr->pr_mtx, MA_OWNED);
- if (__predict_false(refcount_load(&pr->pr_ref) == 0))
- return (false);
- if (__predict_false(refcount_load(&pr->pr_uref) == 0))
+ if (__predict_false(pr->pr_state != PRISON_STATE_ALIVE))
return (false);
return (true);
}
/*
- * Return true if the prison is currently valid. A prison is valid if it has
- * been fully created, and is not being destroyed. Note that dying prisons
- * are still considered valid.
+ * Return true if the prison is currently valid, i.e. is has been fully
+ * created. Note that dying prisons are still considered valid.
*/
bool
prison_isvalid(struct prison *pr)
{
- mtx_assert(&pr->pr_mtx, MA_OWNED);
- if (__predict_false(refcount_load(&pr->pr_ref) == 0))
+ /*
+ * A prison is also invalid if it has no references, but that should
+ * never be the case when the right locks are held (prison mutex, or
+ * allprison_lock at least shared).
+ */
+ KASSERT(refcount_load(&pr->pr_ref) > 0,
+ ("prison_invalid checking dead prison %p (jid=%d).",
+ pr, pr->pr_id));
+ if (__predict_false(pr->pr_state == PRISON_STATE_INVALID))
return (false);
return (true);
}
@@ -3678,6 +3837,8 @@
#if defined(INET) || defined(INET6)
again:
#endif
+ if (!prison_isvalid(cpr))
+ continue;
mtx_lock(&cpr->pr_mtx);
#ifdef INET
if (cpr->pr_ip4s > 0) {
@@ -3705,15 +3866,10 @@
cpr->pr_ip6s * sizeof(struct in6_addr));
}
#endif
- if (!prison_isvalid(cpr)) {
- mtx_unlock(&cpr->pr_mtx);
- continue;
- }
bzero(xp, sizeof(*xp));
xp->pr_version = XPRISON_VERSION;
xp->pr_id = cpr->pr_id;
- xp->pr_state = prison_isalive(cpr)
- ? PRISON_STATE_ALIVE : PRISON_STATE_DYING;
+ xp->pr_state = cpr->pr_state;
strlcpy(xp->pr_path, prison_path(pr, cpr), sizeof(xp->pr_path));
strlcpy(xp->pr_host, cpr->pr_hostname, sizeof(xp->pr_host));
strlcpy(xp->pr_name, prison_name(pr, cpr), sizeof(xp->pr_name));
@@ -4364,6 +4520,7 @@
db_printf(" parent = %p\n", pr->pr_parent);
db_printf(" ref = %d\n", pr->pr_ref);
db_printf(" uref = %d\n", pr->pr_uref);
+ db_printf(" state = %d\n", pr->pr_state);
db_printf(" path = %s\n", pr->pr_path);
db_printf(" cpuset = %d\n", pr->pr_cpuset
? pr->pr_cpuset->cs_id : -1);
diff --git a/sys/sys/jail.h b/sys/sys/jail.h
--- a/sys/sys/jail.h
+++ b/sys/sys/jail.h
@@ -88,9 +88,11 @@
};
#define XPRISON_VERSION 3
-#define PRISON_STATE_INVALID 0
-#define PRISON_STATE_ALIVE 1
-#define PRISON_STATE_DYING 2
+enum prison_state {
+ PRISON_STATE_INVALID = 0, /* New prison, not ready to be seen */
+ PRISON_STATE_ALIVE, /* Current prison, visible to all */
+ PRISON_STATE_DYING /* Removed, but holding resources, */
+}; /* optionally visible. */
/*
* Flags for jail_set and jail_get.
@@ -99,7 +101,7 @@
#define JAIL_UPDATE 0x02 /* Update parameters of existing jail */
#define JAIL_ATTACH 0x04 /* Attach to jail upon creation */
#define JAIL_DYING 0x08 /* Allow getting a dying jail */
-#define JAIL_SET_MASK 0x0f
+#define JAIL_SET_MASK 0x0f /* JAIL_DYING is deprecated/ignored here */
#define JAIL_GET_MASK 0x08
#define JAIL_SYS_DISABLE 0
@@ -155,7 +157,9 @@
* (m) locked by pr_mtx
* (p) locked by pr_mtx, and also at least shared allprison_lock required
* to update
- * (r) atomic via refcount(9), pr_mtx required to decrement to zero
+ * (q) locked both pr_mtx and allprison_lock
+ * (r) atomic via refcount(9), pr_mtx and allprison_lock required to
+ * decrement to zero
*/
struct prison {
TAILQ_ENTRY(prison) pr_list; /* (a) all prisons */
@@ -179,12 +183,13 @@
struct prison_racct *pr_prison_racct; /* (c) racct jail proxy */
void *pr_sparep[3];
int pr_childcount; /* (a) number of child jails */
- int pr_childmax; /* (p) maximum child jails */
+ int pr_childmax; /* (a) maximum child jails */
unsigned pr_allow; /* (p) PR_ALLOW_* flags */
int pr_securelevel; /* (p) securelevel */
int pr_enforce_statfs; /* (p) statfs permission */
int pr_devfs_rsnum; /* (p) devfs ruleset */
- int pr_spare[3];
+ enum prison_state pr_state; /* (q) state in life cycle */
+ int pr_spare[2];
int pr_osreldate; /* (c) kern.osreldate value */
unsigned long pr_hostid; /* (p) jail hostid */
char pr_name[MAXHOSTNAMELEN]; /* (p) admin jail name */
@@ -216,6 +221,7 @@
/* primary jail address. */
/* Internal flag bits */
+#define PR_REMOVE 0x01000000 /* In process of being removed */
#define PR_IP4 0x02000000 /* IPv4 restricted or disabled */
/* by this jail or an ancestor */
#define PR_IP6 0x04000000 /* IPv6 restricted or disabled */
@@ -333,6 +339,19 @@
; \
else
+/*
+ * As FOREACH_PRISON_DESCENDANT, but visit both preorder and postorder.
+ */
+#define FOREACH_PRISON_DESCENDANT_PRE_POST(ppr, cpr, descend) \
+ for ((cpr) = (ppr), (descend) = 1; \
+ ((cpr) = (descend) \
+ ? ((descend) = !LIST_EMPTY(&(cpr)->pr_children)) \
+ ? LIST_FIRST(&(cpr)->pr_children) \
+ : (cpr) \
+ : ((descend) = LIST_NEXT(cpr, pr_sibling) != NULL) \
+ ? LIST_NEXT(cpr, pr_sibling) \
+ : cpr->pr_parent) != (ppr);)
+
/*
* Attributes of the physical system, and the root of the jail tree.
*/
diff --git a/usr.sbin/jail/jail.8 b/usr.sbin/jail/jail.8
--- a/usr.sbin/jail/jail.8
+++ b/usr.sbin/jail/jail.8
@@ -25,7 +25,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd November 18, 2020
+.Dd January 25, 2021
.Dt JAIL 8
.Os
.Sh NAME
@@ -136,10 +136,6 @@
.Pp
Other available options are:
.Bl -tag -width indent
-.It Fl d
-Allow making changes to a dying jail, equivalent to the
-.Va allow.dying
-parameter.
.It Fl f Ar conf_file
Use configuration file
.Ar conf_file
@@ -207,6 +203,17 @@
.It Fl v
Print a message on every operation, such as running commands and
mounting filesystems.
+.It Fl d
+This is deprecated and is equivalent to the
+.Va allow.dying
+parameter, which is also deprecated.
+It used to allow making changes to a
+.Va dying
+jail.
+Now such jails are always replaced when a new jail is created with the same
+.Va jid
+or
+.Va name .
.El
.Pp
If no arguments are given after the options, the operation (except
@@ -903,9 +910,14 @@
.Pa /proc
directory.
.It Va allow.dying
-Allow making changes to a
+This deprecated and has no effect.
+It used to allow making changes to a
.Va dying
jail.
+Now such jails are always replaced when a new jail is created with the same
+.Va jid
+or
+.Va name .
.It Va depend
Specify a jail (or jails) that this jail depends on.
When this jail is to be created, any jail(s) it depends on must already exist.
diff --git a/usr.sbin/jail/jail.c b/usr.sbin/jail/jail.c
--- a/usr.sbin/jail/jail.c
+++ b/usr.sbin/jail/jail.c
@@ -65,7 +65,7 @@
static void clear_persist(struct cfjail *j);
static int update_jail(struct cfjail *j);
static int rdtun_params(struct cfjail *j, int dofail);
-static void running_jid(struct cfjail *j, int dflag);
+static void running_jid(struct cfjail *j);
static void jail_quoted_warnx(const struct cfjail *j, const char *name_msg,
const char *noname_msg);
static int jailparam_set_note(const struct cfjail *j, struct jailparam *jp,
@@ -140,7 +140,7 @@
char *JidFile;
size_t sysvallen;
unsigned op, pi;
- int ch, docf, error, i, oldcl, sysval;
+ int ch, docf, error, i, oldcl, sysval, dying_warned;
int dflag, Rflag;
#if defined(INET) || defined(INET6)
char *cs, *ncs;
@@ -377,6 +377,7 @@
* operation on it. When that is done, the jail may be finished,
* or it may go back for the next step.
*/
+ dying_warned = 0;
while ((j = next_jail()))
{
if (j->flags & JF_FAILED) {
@@ -397,11 +398,13 @@
import_params(j) < 0)
continue;
}
+ if (j->intparams[IP_ALLOW_DYING] && !dying_warned) {
+ warnx("%s", "the 'allow.dying' parameter and '-d' flag"
+ "are deprecated and have no effect.");
+ dying_warned = 1;
+ }
if (!j->jid)
- running_jid(j,
- (j->flags & (JF_SET | JF_DEPEND)) == JF_SET
- ? dflag || bool_param(j->intparams[IP_ALLOW_DYING])
- : 0);
+ running_jid(j);
if (finish_command(j))
continue;
@@ -613,11 +616,10 @@
int
create_jail(struct cfjail *j)
{
- struct iovec jiov[4];
struct stat st;
- struct jailparam *jp, *setparams, *setparams2, *sjp;
+ struct jailparam *jp, *setparams, *sjp;
const char *path;
- int dopersist, ns, jid, dying, didfail;
+ int dopersist, ns;
/*
* Check the jail's path, with a better error message than jail_set
@@ -657,57 +659,8 @@
*sjp++ = *jp;
ns = sjp - setparams;
- didfail = 0;
j->jid = jailparam_set_note(j, setparams, ns, JAIL_CREATE);
- if (j->jid < 0 && errno == EEXIST &&
- bool_param(j->intparams[IP_ALLOW_DYING]) &&
- int_param(j->intparams[KP_JID], &jid) && jid != 0) {
- /*
- * The jail already exists, but may be dying.
- * Make sure it is, in which case an update is appropriate.
- */
- jiov[0].iov_base = __DECONST(char *, "jid");
- jiov[0].iov_len = sizeof("jid");
- jiov[1].iov_base = &jid;
- jiov[1].iov_len = sizeof(jid);
- jiov[2].iov_base = __DECONST(char *, "dying");
- jiov[2].iov_len = sizeof("dying");
- jiov[3].iov_base = &dying;
- jiov[3].iov_len = sizeof(dying);
- if (jail_get(jiov, 4, JAIL_DYING) < 0) {
- /*
- * It could be that the jail just barely finished
- * dying, or it could be that the jid never existed
- * but the name does. In either case, another try
- * at creating the jail should do the right thing.
- */
- if (errno == ENOENT)
- j->jid = jailparam_set_note(j, setparams, ns,
- JAIL_CREATE);
- } else if (dying) {
- j->jid = jid;
- if (rdtun_params(j, 1) < 0) {
- j->jid = -1;
- didfail = 1;
- } else {
- sjp = setparams2 = alloca((j->njp + dopersist) *
- sizeof(struct jailparam));
- for (jp = setparams; jp < setparams + ns; jp++)
- if (!JP_RDTUN(jp) ||
- !strcmp(jp->jp_name, "jid"))
- *sjp++ = *jp;
- j->jid = jailparam_set_note(j, setparams2,
- sjp - setparams2, JAIL_UPDATE | JAIL_DYING);
- /*
- * Again, perhaps the jail just finished dying.
- */
- if (j->jid < 0 && errno == ENOENT)
- j->jid = jailparam_set_note(j,
- setparams, ns, JAIL_CREATE);
- }
- }
- }
- if (j->jid < 0 && !didfail) {
+ if (j->jid < 0) {
jail_warnx(j, "%s", jail_errmsg);
failed(j);
}
@@ -772,9 +725,7 @@
if (!JP_RDTUN(jp))
*++sjp = *jp;
- jid = jailparam_set_note(j, setparams, ns,
- bool_param(j->intparams[IP_ALLOW_DYING])
- ? JAIL_UPDATE | JAIL_DYING : JAIL_UPDATE);
+ jid = jailparam_set_note(j, setparams, ns, JAIL_UPDATE);
if (jid < 0) {
jail_warnx(j, "%s", jail_errmsg);
failed(j);
@@ -813,8 +764,7 @@
rtjp->jp_value = NULL;
}
rval = 0;
- if (jailparam_get(rtparams, nrt,
- bool_param(j->intparams[IP_ALLOW_DYING]) ? JAIL_DYING : 0) > 0) {
+ if (jailparam_get(rtparams, nrt, 0) > 0) {
rtjp = rtparams + 1;
for (jp = j->jp; rtjp < rtparams + nrt; jp++) {
if (JP_RDTUN(jp) && strcmp(jp->jp_name, "jid")) {
@@ -851,7 +801,7 @@
* Get the jail's jid if it is running.
*/
static void
-running_jid(struct cfjail *j, int dflag)
+running_jid(struct cfjail *j)
{
struct iovec jiov[2];
const char *pval;
@@ -877,7 +827,7 @@
j->jid = -1;
return;
}
- j->jid = jail_get(jiov, 2, dflag ? JAIL_DYING : 0);
+ j->jid = jail_get(jiov, 2, 0);
}
static void
@@ -906,10 +856,9 @@
jid = jailparam_set(jp, njp, flags);
if (verbose > 0) {
- jail_note(j, "jail_set(%s%s)",
+ jail_note(j, "jail_set(%s)",
(flags & (JAIL_CREATE | JAIL_UPDATE)) == JAIL_CREATE
- ? "JAIL_CREATE" : "JAIL_UPDATE",
- (flags & JAIL_DYING) ? " | JAIL_DYING" : "");
+ ? "JAIL_CREATE" : "JAIL_UPDATE");
for (i = 0; i < njp; i++) {
printf(" %s", jp[i].jp_name);
if (jp[i].jp_value == NULL)
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Apr 16, 6:27 AM (7 h, 44 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
31584649
Default Alt Text
D28150.id82895.diff (52 KB)
Attached To
Mode
D28150: jail: Don't allow resurrection of dead jails
Attached
Detach File
Event Timeline
Log In to Comment