diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c --- a/sys/kern/kern_jail.c +++ b/sys/kern/kern_jail.c @@ -106,6 +106,7 @@ .pr_path = "/", .pr_securelevel = -1, .pr_devfs_rsnum = 0, + .pr_state = PRISON_STATE_ALIVE, .pr_childmax = JAIL_MAX, .pr_hostuuid = DEFAULT_HOSTUUID, .pr_children = LIST_HEAD_INITIALIZER(prison0.pr_children), @@ -1007,7 +1008,8 @@ TAILQ_FOREACH(inspr, &allprison, pr_list) { if (inspr->pr_id == jid) { mtx_lock(&inspr->pr_mtx); - if (inspr->pr_ref > 0) { + if (inspr->pr_ref > 0 && + inspr->pr_state != PRISON_STATE_INVALID) { pr = inspr; inspr = NULL; } else @@ -1039,7 +1041,7 @@ */ mtx_unlock(&pr->pr_mtx); pr = NULL; - } else if (pr->pr_uref == 0) { + } else if (pr->pr_state == PRISON_STATE_DYING) { if (!(flags & JAIL_DYING)) { mtx_unlock(&pr->pr_mtx); error = ENOENT; @@ -1118,23 +1120,29 @@ deadpr = NULL; FOREACH_PRISON_CHILD(ppr, tpr) { if (tpr != pr && tpr->pr_ref > 0 && + tpr->pr_state != PRISON_STATE_INVALID && !strcmp(tpr->pr_name + pnamelen, namelc)) { if (pr == NULL && cuflags != JAIL_CREATE) { mtx_lock(&tpr->pr_mtx); if (tpr->pr_ref > 0) { - /* - * Use this jail - * for updates. - */ - if (tpr->pr_uref > 0) { + if (tpr->pr_state == + PRISON_STATE_ALIVE) + { + /* + * Use this jail + * for updates. + */ pr = tpr; break; } - deadpr = tpr; + else if (tpr->pr_state == + PRISON_STATE_DYING) + deadpr = tpr; } mtx_unlock(&tpr->pr_mtx); - } else if (tpr->pr_uref > 0) { + } else if (tpr->pr_state == + PRISON_STATE_ALIVE) { /* * Create, or update(jid): * name must not exist in an @@ -1183,16 +1191,16 @@ } /* If there's no prison to update, create a new one and link it in. */ - if (pr == NULL) { + created = pr == NULL; + if (created) { for (tpr = mypr; tpr != NULL; tpr = tpr->pr_parent) if (tpr->pr_childcount >= tpr->pr_childmax) { error = EPERM; vfs_opterror(opts, "prison limit exceeded"); goto done_unlock_list; } - created = 1; mtx_lock(&ppr->pr_mtx); - if (ppr->pr_ref == 0) { + if (ppr->pr_ref == 0 || ppr->pr_state == PRISON_STATE_INVALID) { mtx_unlock(&ppr->pr_mtx); error = ENOENT; vfs_opterror(opts, "jail \"%s\" not found", @@ -1201,24 +1209,38 @@ } ppr->pr_ref++; ppr->pr_uref++; + ppr->pr_state = PRISON_STATE_ALIVE; mtx_unlock(&ppr->pr_mtx); - pr = malloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO); if (jid == 0 && (jid = get_next_prid(&inspr)) == 0) { error = EAGAIN; vfs_opterror(opts, "no available jail IDs"); - free(pr, M_PRISON); prison_deref(ppr, PD_DEREF | PD_DEUREF | PD_LIST_XLOCKED); goto done_releroot; } + + /* + * Start the prison with a reference, matching the one added to + * existing prisons. + */ + pr = malloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO); + pr->pr_state = PRISON_STATE_INVALID; + pr->pr_ref = 1; + mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF | MTX_DUPOK); + TASK_INIT(&pr->pr_task, 0, prison_complete, pr); + LIST_INIT(&pr->pr_children); + + /* + * Link the prison into the allprison list in ID order, + * and into its parent's child list in no particular order. + */ pr->pr_id = jid; + pr->pr_parent = ppr; if (inspr != NULL) TAILQ_INSERT_BEFORE(inspr, pr, pr_list); else TAILQ_INSERT_TAIL(&allprison, pr, pr_list); - - pr->pr_parent = ppr; LIST_INSERT_HEAD(&ppr->pr_children, pr, pr_sibling); for (tpr = ppr; tpr != NULL; tpr = tpr->pr_parent) tpr->pr_childcount++; @@ -1286,10 +1308,6 @@ strlcpy(pr->pr_osrelease, osrelstr, sizeof(pr->pr_osrelease)); - LIST_INIT(&pr->pr_children); - mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF | MTX_DUPOK); - TASK_INIT(&pr->pr_task, 0, prison_complete, pr); - #ifdef VIMAGE /* Allocate a new vnet if specified. */ pr->pr_vnet = (pr_flags & PR_VNET) @@ -1301,18 +1319,17 @@ */ error = cpuset_create_root(ppr, &pr->pr_cpuset); if (error) { - prison_deref(pr, PD_LIST_XLOCKED); + prison_deref(pr, PD_DEREF | PD_LIST_XLOCKED); goto done_releroot; } - mtx_lock(&pr->pr_mtx); /* - * New prisons do not yet have a reference, because we do not - * want others to see the incomplete prison once the - * allprison_lock is downgraded. + * The new prison hasn't yet needed locking, because its + * invalid state prevents it from being used elsewhere. + * But lock it now to match existing prisons. */ + mtx_lock(&pr->pr_mtx); } else { - created = 0; /* * Grab a reference for existing prisons, to ensure they * continue to exist for the duration of the call. @@ -1435,7 +1452,7 @@ #ifdef VIMAGE (tpr != tppr && (tpr->pr_flags & PR_VNET)) || #endif - tpr->pr_uref == 0) { + tpr->pr_state != PRISON_STATE_ALIVE) { descend = 0; continue; } @@ -1503,7 +1520,7 @@ #ifdef VIMAGE (tpr != tppr && (tpr->pr_flags & PR_VNET)) || #endif - tpr->pr_uref == 0) { + tpr->pr_state != PRISON_STATE_ALIVE) { descend = 0; continue; } @@ -1573,9 +1590,7 @@ mtx_unlock(&pr->pr_mtx); error = osd_jail_call(pr, PR_METHOD_CHECK, opts); if (error != 0) { - prison_deref(pr, created - ? PD_LIST_XLOCKED - : PD_DEREF | PD_LIST_XLOCKED); + prison_deref(pr, PD_DEREF | PD_LIST_XLOCKED); goto done_releroot; } mtx_lock(&pr->pr_mtx); @@ -1679,6 +1694,7 @@ /* Try to keep a real-rooted full pathname. */ strlcpy(pr->pr_path, path, sizeof(pr->pr_path)); pr->pr_root = root; + root = NULL; } if (PR_HOST & ch_flags & ~pr_flags) { if (pr->pr_flags & PR_HOST) { @@ -1733,12 +1749,10 @@ prison_set_allow_locked(pr, tallow, 0); /* * Persistent prisons get an extra reference, and prisons losing their - * persist flag lose that reference. Only do this for existing prisons - * for now, so new ones will remain unseen until after the module - * handlers have completed. + * persist flag lose that reference. */ - born = pr->pr_uref == 0; - if (!created && (ch_flags & PR_PERSIST & (pr_flags ^ pr->pr_flags))) { + born = pr->pr_state != PRISON_STATE_ALIVE; + if (ch_flags & PR_PERSIST & (pr_flags ^ pr->pr_flags)) { if (pr_flags & PR_PERSIST) { pr->pr_ref++; pr->pr_uref++; @@ -1809,21 +1823,14 @@ sx_downgrade(&allprison_lock); if (born) { error = osd_jail_call(pr, PR_METHOD_CREATE, opts); - if (error) { - (void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL); - prison_deref(pr, created - ? PD_LIST_SLOCKED - : PD_DEREF | PD_LIST_SLOCKED); - goto done_errmsg; - } + if (error) + goto done_remove; } error = osd_jail_call(pr, PR_METHOD_SET, opts); if (error) { if (born) - (void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL); - prison_deref(pr, created - ? PD_LIST_SLOCKED - : PD_DEREF | PD_LIST_SLOCKED); + goto done_remove; + prison_deref(pr, PD_DEREF | PD_LIST_SLOCKED); goto done_errmsg; } @@ -1837,12 +1844,9 @@ vfs_opterror(opts, "attach failed"); if (born) { sx_slock(&allprison_lock); - slocked = PD_LIST_SLOCKED; - (void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL); + goto done_remove; } - prison_deref(pr, created - ? slocked - : PD_DEREF | slocked); + prison_deref(pr, PD_DEREF); goto done_errmsg; } } @@ -1860,31 +1864,31 @@ td->td_retval[0] = pr->pr_id; /* - * Now that it is all there, drop the temporary reference from existing - * prisons. Or add a reference to newly created persistent prisons - * (which was not done earlier so that the prison would not be publicly - * visible). + * Now that everything is done, the prison is usually alive, though + * it might have been either new (invalid) or dying before. Drop + * the temporary reference before returning. */ - if (!created) - prison_deref(pr, PD_DEREF | slocked); - else { - if (pr_flags & PR_PERSIST) { - mtx_lock(&pr->pr_mtx); - pr->pr_ref++; - pr->pr_uref++; - mtx_unlock(&pr->pr_mtx); - } - if (slocked) - sx_sunlock(&allprison_lock); - } + mtx_lock(&pr->pr_mtx); + if (pr->pr_uref > 0) + pr->pr_state = PRISON_STATE_ALIVE; + prison_deref(pr, PD_DEREF | PD_LOCKED | slocked); goto done_free; done_deref_locked: - prison_deref(pr, created - ? PD_LOCKED | PD_LIST_XLOCKED - : PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED); + prison_deref(pr, PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED); goto done_releroot; + done_remove: + (void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL); + mtx_lock(&pr->pr_mtx); + if (pr->pr_flags & PR_PERSIST) { + pr->pr_flags &= ~PR_PERSIST; + pr->pr_ref--; + prison_deref(pr, PD_DEREF | PD_DEUREF | PD_LOCKED | + PD_LIST_SLOCKED); + } else + prison_deref(pr, PD_DEREF | PD_LOCKED | PD_LIST_SLOCKED); + goto done_errmsg; done_unlock_list: sx_xunlock(&allprison_lock); done_releroot: @@ -2042,7 +2046,9 @@ if (pr->pr_id > jid && prison_ischild(mypr, pr)) { mtx_lock(&pr->pr_mtx); if (pr->pr_ref > 0 && - (pr->pr_uref > 0 || (flags & JAIL_DYING))) + (pr->pr_state == PRISON_STATE_ALIVE || + (pr->pr_state == PRISON_STATE_DYING && + (flags & JAIL_DYING)))) break; mtx_unlock(&pr->pr_mtx); } @@ -2060,7 +2066,8 @@ if (jid != 0) { pr = prison_find_child(mypr, jid); if (pr != NULL) { - if (pr->pr_uref == 0 && !(flags & JAIL_DYING)) { + if (pr->pr_state == PRISON_STATE_DYING && + !(flags & JAIL_DYING)) { mtx_unlock(&pr->pr_mtx); error = ENOENT; vfs_opterror(opts, "jail %d is dying", @@ -2084,7 +2091,8 @@ } pr = prison_find_name(mypr, name); if (pr != NULL) { - if (pr->pr_uref == 0 && !(flags & JAIL_DYING)) { + if (pr->pr_state == PRISON_STATE_DYING && + !(flags & JAIL_DYING)) { mtx_unlock(&pr->pr_mtx); error = ENOENT; vfs_opterror(opts, "jail \"%s\" is dying", @@ -2213,7 +2221,7 @@ if (error != 0 && error != ENOENT) goto done_deref; } - i = (pr->pr_uref == 0); + i = pr->pr_state == PRISON_STATE_DYING; error = vfs_setopt(opts, "dying", &i, sizeof(i)); if (error != 0 && error != ENOENT) goto done_deref; @@ -2422,11 +2430,8 @@ return (EINVAL); } - /* - * Do not allow a process to attach to a prison that is not - * considered to be "alive". - */ - if (pr->pr_uref == 0) { + /* Do not allow a process to attach to a prison that is not alive. */ + if (pr->pr_state != PRISON_STATE_ALIVE) { mtx_unlock(&pr->pr_mtx); sx_sunlock(&allprison_lock); return (EINVAL); @@ -2522,7 +2527,8 @@ TAILQ_FOREACH(pr, &allprison, pr_list) { if (pr->pr_id == prid) { mtx_lock(&pr->pr_mtx); - if (pr->pr_ref > 0) + if (pr->pr_ref > 0 && + pr->pr_state != PRISON_STATE_INVALID) return (pr); /* * Any active prison with the same ID would have @@ -2550,7 +2556,8 @@ FOREACH_PRISON_DESCENDANT(mypr, pr, descend) { if (pr->pr_id == prid) { mtx_lock(&pr->pr_mtx); - if (pr->pr_ref > 0) + if (pr->pr_ref > 0 && + pr->pr_state != PRISON_STATE_INVALID) return (pr); mtx_unlock(&pr->pr_mtx); } @@ -2575,8 +2582,9 @@ FOREACH_PRISON_DESCENDANT(mypr, pr, descend) { if (!strcmp(pr->pr_name + mylen, name)) { mtx_lock(&pr->pr_mtx); - if (pr->pr_ref > 0) { - if (pr->pr_uref > 0) + if (pr->pr_ref > 0 && + pr->pr_state != PRISON_STATE_INVALID) { + if (pr->pr_state == PRISON_STATE_ALIVE) return (pr); deadpr = pr; } @@ -2673,12 +2681,14 @@ ("prison_deref PD_DEUREF on a dead prison (jid=%d)", pr->pr_id)); pr->pr_uref--; - lasturef = pr->pr_uref == 0; - if (lasturef) - pr->pr_ref++; KASSERT(prison0.pr_uref != 0, ("prison0 pr_uref=0")); - } else - lasturef = 0; + } + lasturef = pr->pr_uref == 0 && + pr->pr_state == PRISON_STATE_ALIVE; + if (lasturef) { + pr->pr_ref++; + pr->pr_state = PRISON_STATE_DYING; + } if (flags & PD_DEREF) { KASSERT(pr->pr_ref > 0, ("prison_deref PD_DEREF on a dead prison (jid=%d)", @@ -3631,15 +3641,14 @@ cpr->pr_ip6s * sizeof(struct in6_addr)); } #endif - if (cpr->pr_ref == 0) { + if (cpr->pr_ref == 0 || cpr->pr_state == PRISON_STATE_INVALID) { mtx_unlock(&cpr->pr_mtx); continue; } bzero(xp, sizeof(*xp)); xp->pr_version = XPRISON_VERSION; xp->pr_id = cpr->pr_id; - xp->pr_state = cpr->pr_uref > 0 - ? PRISON_STATE_ALIVE : PRISON_STATE_DYING; + xp->pr_state = cpr->pr_state; strlcpy(xp->pr_path, prison_path(pr, cpr), sizeof(xp->pr_path)); strlcpy(xp->pr_host, cpr->pr_hostname, sizeof(xp->pr_host)); strlcpy(xp->pr_name, prison_name(pr, cpr), sizeof(xp->pr_name)); @@ -4290,6 +4299,7 @@ db_printf(" parent = %p\n", pr->pr_parent); db_printf(" ref = %d\n", pr->pr_ref); db_printf(" uref = %d\n", pr->pr_uref); + db_printf(" state = %d\n", pr->pr_state); db_printf(" path = %s\n", pr->pr_path); db_printf(" cpuset = %d\n", pr->pr_cpuset ? pr->pr_cpuset->cs_id : -1); diff --git a/sys/sys/jail.h b/sys/sys/jail.h --- a/sys/sys/jail.h +++ b/sys/sys/jail.h @@ -88,9 +88,11 @@ }; #define XPRISON_VERSION 3 -#define PRISON_STATE_INVALID 0 -#define PRISON_STATE_ALIVE 1 -#define PRISON_STATE_DYING 2 +enum prison_state { + PRISON_STATE_INVALID = 0, /* New prison, not ready to be seen */ + PRISON_STATE_ALIVE, /* Current prison, visible to all */ + PRISON_STATE_DYING /* Removed, but holding resources, */ +}; /* optionally visible. */ /* * Flags for jail_set and jail_get. @@ -183,7 +185,8 @@ int pr_securelevel; /* (p) securelevel */ int pr_enforce_statfs; /* (p) statfs permission */ int pr_devfs_rsnum; /* (p) devfs ruleset */ - int pr_spare[3]; + enum prison_state pr_state; /* (m) state in life cycle */ + int pr_spare[2]; int pr_osreldate; /* (c) kern.osreldate value */ unsigned long pr_hostid; /* (p) jail hostid */ char pr_name[MAXHOSTNAMELEN]; /* (p) admin jail name */