diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c --- a/sys/kern/kern_jail.c +++ b/sys/kern/kern_jail.c @@ -106,6 +106,7 @@ .pr_path = "/", .pr_securelevel = -1, .pr_devfs_rsnum = 0, + .pr_state = PRISON_STATE_ALIVE, .pr_childmax = JAIL_MAX, .pr_hostuuid = DEFAULT_HOSTUUID, .pr_children = LIST_HEAD_INITIALIZER(prison0.pr_children), @@ -1008,7 +1009,8 @@ TAILQ_FOREACH(inspr, &allprison, pr_list) { if (inspr->pr_id == jid) { mtx_lock(&inspr->pr_mtx); - if (inspr->pr_ref > 0) { + if (inspr->pr_ref > 0 && + inspr->pr_state != PRISON_STATE_INVALID) { pr = inspr; drflags |= PD_LOCKED; inspr = NULL; @@ -1041,7 +1043,7 @@ error = ENOENT; vfs_opterror(opts, "jail %d not found", jid); goto done_deref; - } else if (pr->pr_uref == 0) { + } else if (pr->pr_state == PRISON_STATE_DYING) { if (!(flags & JAIL_DYING)) { error = ENOENT; vfs_opterror(opts, "jail %d is dying", @@ -1117,25 +1119,31 @@ deadpr = NULL; FOREACH_PRISON_CHILD(ppr, tpr) { if (tpr != pr && tpr->pr_ref > 0 && + tpr->pr_state != PRISON_STATE_INVALID && !strcmp(tpr->pr_name + pnamelen, namelc)) { if (pr == NULL && cuflags != JAIL_CREATE) { mtx_lock(&tpr->pr_mtx); if (tpr->pr_ref > 0) { - /* - * Use this jail - * for updates. - */ - if (tpr->pr_uref > 0) { + if (tpr->pr_state == + PRISON_STATE_ALIVE) + { + /* + * Use this jail + * for updates. + */ pr = tpr; drflags |= PD_LOCKED; break; } - deadpr = tpr; + if (tpr->pr_state == + PRISON_STATE_DYING) + deadpr = tpr; } mtx_unlock(&tpr->pr_mtx); - } else if (tpr->pr_uref > 0) { + } else if (tpr->pr_state == + PRISON_STATE_ALIVE) { /* * Create, or update(jid): * name must not exist in an @@ -1192,7 +1200,7 @@ goto done_deref; } mtx_lock(&ppr->pr_mtx); - if (ppr->pr_ref == 0) { + if (ppr->pr_ref == 0 || ppr->pr_state == PRISON_STATE_INVALID) { mtx_unlock(&ppr->pr_mtx); error = ENOENT; vfs_opterror(opts, "jail \"%s\" not found", @@ -1201,6 +1209,7 @@ } ppr->pr_ref++; ppr->pr_uref++; + ppr->pr_state = PRISON_STATE_ALIVE; mtx_unlock(&ppr->pr_mtx); if (jid == 0 && (jid = get_next_prid(&inspr)) == 0) { @@ -1211,11 +1220,22 @@ goto done_deref; } + /* + * Start the prison with a reference, matching the one added + * to existing prisons. + */ pr = malloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO); + pr->pr_state = PRISON_STATE_INVALID; + pr->pr_ref = 1; + drflags |= PD_DEREF; LIST_INIT(&pr->pr_children); mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF | MTX_DUPOK); TASK_INIT(&pr->pr_task, 0, prison_complete, pr); + /* + * Link the prison into the allprison list in ID order, + * and into its parent's child list in no particular order. + */ pr->pr_id = jid; if (inspr != NULL) TAILQ_INSERT_BEFORE(inspr, pr, pr_list); @@ -1305,11 +1325,6 @@ mtx_lock(&pr->pr_mtx); drflags |= PD_LOCKED; - /* - * New prisons do not yet have a reference, because we do not - * want others to see the incomplete prison once the - * allprison_lock is downgraded. - */ } else { /* * Grab a reference for existing prisons, to ensure they @@ -1434,7 +1449,7 @@ #ifdef VIMAGE (tpr != tppr && (tpr->pr_flags & PR_VNET)) || #endif - tpr->pr_uref == 0) { + tpr->pr_state == PRISON_STATE_DYIG) { descend = 0; continue; } @@ -1502,7 +1517,7 @@ #ifdef VIMAGE (tpr != tppr && (tpr->pr_flags & PR_VNET)) || #endif - tpr->pr_uref == 0) { + tpr->pr_state == PRISON_STATE_DYING) { descend = 0; continue; } @@ -1731,12 +1746,9 @@ prison_set_allow_locked(pr, tallow, 0); /* * Persistent prisons get an extra reference, and prisons losing their - * persist flag lose that reference. Only do this for existing prisons - * for now, so new ones will remain unseen until after the module - * handlers have completed. + * persist flag lose that reference. */ - born = pr->pr_uref == 0; - if (!created && (ch_flags & PR_PERSIST & (pr_flags ^ pr->pr_flags))) { + if (ch_flags & PR_PERSIST & (pr_flags ^ pr->pr_flags)) { if (pr_flags & PR_PERSIST) { pr->pr_ref++; pr->pr_uref++; @@ -1807,17 +1819,16 @@ /* Let the modules do their work. */ sx_downgrade(&allprison_lock); drflags = (drflags & ~PD_LIST_XLOCKED) | PD_LIST_SLOCKED; + born = pr->pr_state != PRISON_STATE_ALIVE; if (born) { error = osd_jail_call(pr, PR_METHOD_CREATE, opts); - if (error) { - (void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL); - goto done_deref; - } + if (error) + goto done_remove; } error = osd_jail_call(pr, PR_METHOD_SET, opts); if (error) { if (born) - (void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL); + goto done_remove; goto done_deref; } @@ -1827,11 +1838,9 @@ error = do_jail_attach(td, pr); drflags &= ~PD_LIST_SLOCKED; if (error) { - if (created) { - /* do_jail_attach has removed the prison. */ - pr = NULL; - } vfs_opterror(opts, "attach failed"); + if (born) + goto done_remove; goto done_deref; } } @@ -1846,25 +1855,31 @@ } #endif + /* + * Now that everything is done, the prison is usually alive, though + * it might have been either new (invalid) or dying before. + */ + mtx_lock(&pr->pr_mtx); + drflags |= PD_LOCKED; + if (pr->pr_uref > 0) + pr->pr_state = PRISON_STATE_ALIVE; + td->td_retval[0] = pr->pr_id; + goto done_deref; - if (created) { - /* - * Add a reference to newly created persistent prisons - * (which was not done earlier so that the prison would - * not be publicly visible). - */ - if (pr_flags & PR_PERSIST) { - mtx_lock(&pr->pr_mtx); - drflags |= PD_LOCKED; - pr->pr_ref++; - pr->pr_uref++; - } else { - /* Non-persistent jails need no further changes. */ - pr = NULL; - } + done_remove: + /* + * Remove the persist flag from new (or resurrected) prisons, + * and call OSD remove methods. + */ + (void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL); + mtx_lock(&pr->pr_mtx); + drflags |= PD_LOCKED; + if (pr->pr_flags & PR_PERSIST) { + pr->pr_flags &= ~PR_PERSIST; + pr->pr_ref--; + drflags |= PD_DEUREF; } - done_deref: /* Release any temporary prison holds and/or locks. */ if (pr != NULL) @@ -2030,7 +2045,9 @@ if (pr->pr_id > jid && prison_ischild(mypr, pr)) { mtx_lock(&pr->pr_mtx); if (pr->pr_ref > 0 && - (pr->pr_uref > 0 || (flags & JAIL_DYING))) + (pr->pr_state == PRISON_STATE_ALIVE || + (pr->pr_state == PRISON_STATE_DYING && + (flags & JAIL_DYING)))) break; mtx_unlock(&pr->pr_mtx); } @@ -2051,7 +2068,8 @@ pr = prison_find_child(mypr, jid); if (pr != NULL) { drflags |= PD_LOCKED; - if (pr->pr_uref == 0 && !(flags & JAIL_DYING)) { + if (pr->pr_state == PRISON_STATE_DYING && + !(flags & JAIL_DYING)) { error = ENOENT; vfs_opterror(opts, "jail %d is dying", jid); @@ -2075,7 +2093,8 @@ pr = prison_find_name(mypr, name); if (pr != NULL) { drflags |= PD_LOCKED; - if (pr->pr_uref == 0 && !(flags & JAIL_DYING)) { + if (pr->pr_state == PRISON_STATE_DYING && + !(flags & JAIL_DYING)) { error = ENOENT; vfs_opterror(opts, "jail \"%s\" is dying", name); @@ -2203,7 +2222,7 @@ if (error != 0 && error != ENOENT) goto done; } - i = (pr->pr_uref == 0); + i = pr->pr_state == PRISON_STATE_DYING; error = vfs_setopt(opts, "dying", &i, sizeof(i)); if (error != 0 && error != ENOENT) goto done; @@ -2314,7 +2333,8 @@ lpr = NULL; FOREACH_PRISON_DESCENDANT(pr, cpr, descend) { mtx_lock(&cpr->pr_mtx); - if (cpr->pr_ref > 0) { + if (cpr->pr_ref > 0 && + cpr->pr_state != PRISON_STATE_INVALID) { tpr = cpr; cpr->pr_ref++; } else { @@ -2414,11 +2434,8 @@ return (EINVAL); } - /* - * Do not allow a process to attach to a prison that is not - * considered to be "alive". - */ - if (pr->pr_uref == 0) { + /* Do not allow a process to attach to a prison that is not alive. */ + if (pr->pr_state != PRISON_STATE_ALIVE) { mtx_unlock(&pr->pr_mtx); sx_sunlock(&allprison_lock); return (EINVAL); @@ -2514,7 +2531,8 @@ TAILQ_FOREACH(pr, &allprison, pr_list) { if (pr->pr_id == prid) { mtx_lock(&pr->pr_mtx); - if (pr->pr_ref > 0) + if (pr->pr_ref > 0 && + pr->pr_state != PRISON_STATE_INVALID) return (pr); /* * Any active prison with the same ID would have @@ -2542,7 +2560,8 @@ FOREACH_PRISON_DESCENDANT(mypr, pr, descend) { if (pr->pr_id == prid) { mtx_lock(&pr->pr_mtx); - if (pr->pr_ref > 0) + if (pr->pr_ref > 0 && + pr->pr_state != PRISON_STATE_INVALID) return (pr); mtx_unlock(&pr->pr_mtx); } @@ -2568,9 +2587,10 @@ if (!strcmp(pr->pr_name + mylen, name)) { mtx_lock(&pr->pr_mtx); if (pr->pr_ref > 0) { - if (pr->pr_uref > 0) + if (pr->pr_state == PRISON_STATE_ALIVE) return (pr); - deadpr = pr; + if (pr->pr_state == PRISON_STATE_DYING) + deadpr = pr; } mtx_unlock(&pr->pr_mtx); } @@ -2665,14 +2685,16 @@ ("prison_deref PD_DEUREF on a dead prison (jid=%d)", pr->pr_id)); pr->pr_uref--; - lasturef = pr->pr_uref == 0; - if (lasturef) - pr->pr_ref++; KASSERT(prison0.pr_uref != 0, ("prison0 pr_uref=0")); - } else - lasturef = 0; + } + lasturef = pr->pr_uref == 0 && + pr->pr_state == PRISON_STATE_ALIVE; + if (lasturef) { + pr->pr_ref++; + pr->pr_state = PRISON_STATE_DYING; + } if (flags & PD_DEREF) { - KASSERT(pr->pr_ref > 0, + KASSERT(pr->pr_ref > (lasturef ? 1 : 0), ("prison_deref PD_DEREF on a dead prison (jid=%d)", pr->pr_id)); pr->pr_ref--; @@ -2749,7 +2771,7 @@ /* Removing a prison frees a reference on its parent. */ pr = ppr; mtx_lock(&pr->pr_mtx); - flags = PD_DEREF | PD_DEUREF; + flags = PD_DEREF | PD_DEUREF | PD_LOCKED; } } @@ -3623,15 +3645,14 @@ cpr->pr_ip6s * sizeof(struct in6_addr)); } #endif - if (cpr->pr_ref == 0) { + if (cpr->pr_ref == 0 || cpr->pr_state == PRISON_STATE_INVALID) { mtx_unlock(&cpr->pr_mtx); continue; } bzero(xp, sizeof(*xp)); xp->pr_version = XPRISON_VERSION; xp->pr_id = cpr->pr_id; - xp->pr_state = cpr->pr_uref > 0 - ? PRISON_STATE_ALIVE : PRISON_STATE_DYING; + xp->pr_state = cpr->pr_state; strlcpy(xp->pr_path, prison_path(pr, cpr), sizeof(xp->pr_path)); strlcpy(xp->pr_host, cpr->pr_hostname, sizeof(xp->pr_host)); strlcpy(xp->pr_name, prison_name(pr, cpr), sizeof(xp->pr_name)); @@ -4282,6 +4303,7 @@ db_printf(" parent = %p\n", pr->pr_parent); db_printf(" ref = %d\n", pr->pr_ref); db_printf(" uref = %d\n", pr->pr_uref); + db_printf(" state = %d\n", pr->pr_state); db_printf(" path = %s\n", pr->pr_path); db_printf(" cpuset = %d\n", pr->pr_cpuset ? pr->pr_cpuset->cs_id : -1); diff --git a/sys/sys/jail.h b/sys/sys/jail.h --- a/sys/sys/jail.h +++ b/sys/sys/jail.h @@ -88,9 +88,12 @@ }; #define XPRISON_VERSION 3 -#define PRISON_STATE_INVALID 0 -#define PRISON_STATE_ALIVE 1 -#define PRISON_STATE_DYING 2 +enum prison_state { + PRISON_STATE_INVALID = 0, /* New prison, not ready to be seen */ + PRISON_STATE_ALIVE, /* Current prison, visible to all */ + PRISON_STATE_DYING /* Removed, but holding resources, */ + /* optionally visible. */ +}; /* * Flags for jail_set and jail_get. @@ -183,7 +186,8 @@ int pr_securelevel; /* (p) securelevel */ int pr_enforce_statfs; /* (p) statfs permission */ int pr_devfs_rsnum; /* (p) devfs ruleset */ - int pr_spare[3]; + enum prison_state pr_state; /* (m) state in life cycle */ + int pr_spare[2]; int pr_osreldate; /* (c) kern.osreldate value */ unsigned long pr_hostid; /* (p) jail hostid */ char pr_name[MAXHOSTNAMELEN]; /* (p) admin jail name */