Changeset View
Standalone View
sys/kern/kern_jail.c
Show First 20 Lines • Show All 131 Lines • ▼ Show 20 Lines | |||||
/* allprison, allprison_racct and lastprid are protected by allprison_lock. */ | /* allprison, allprison_racct and lastprid are protected by allprison_lock. */ | ||||
struct sx allprison_lock; | struct sx allprison_lock; | ||||
SX_SYSINIT(allprison_lock, &allprison_lock, "allprison"); | SX_SYSINIT(allprison_lock, &allprison_lock, "allprison"); | ||||
struct prisonlist allprison = TAILQ_HEAD_INITIALIZER(allprison); | struct prisonlist allprison = TAILQ_HEAD_INITIALIZER(allprison); | ||||
LIST_HEAD(, prison_racct) allprison_racct; | LIST_HEAD(, prison_racct) allprison_racct; | ||||
int lastprid = 0; | int lastprid = 0; | ||||
static int get_next_prid(struct prison **insprp); | static int get_next_prid(struct prison **insprp); | ||||
static int do_jail_attach(struct thread *td, struct prison *pr); | static int do_jail_attach(struct thread *td, struct prison *pr, int drflags); | ||||
static void prison_complete(void *context, int pending); | static void prison_complete(void *context, int pending); | ||||
static void prison_deref(struct prison *pr, int flags); | static void prison_deref(struct prison *pr, int flags); | ||||
static int prison_lock_xlock(struct prison *pr, int flags); | |||||
static void prison_free_not_last(struct prison *pr); | |||||
static void prison_set_allow_locked(struct prison *pr, unsigned flag, | static void prison_set_allow_locked(struct prison *pr, unsigned flag, | ||||
int enable); | int enable); | ||||
static char *prison_path(struct prison *pr1, struct prison *pr2); | static char *prison_path(struct prison *pr1, struct prison *pr2); | ||||
static void prison_remove_one(struct prison *pr); | static void prison_remove_one(struct prison *pr); | ||||
#ifdef RACCT | #ifdef RACCT | ||||
static void prison_racct_attach(struct prison *pr); | static void prison_racct_attach(struct prison *pr); | ||||
static void prison_racct_modify(struct prison *pr); | static void prison_racct_modify(struct prison *pr); | ||||
static void prison_racct_detach(struct prison *pr); | static void prison_racct_detach(struct prison *pr); | ||||
▲ Show 20 Lines • Show All 850 Lines • ▼ Show 20 Lines | if (jid < 0) { | ||||
vfs_opterror(opts, "negative jid"); | vfs_opterror(opts, "negative jid"); | ||||
goto done_deref; | goto done_deref; | ||||
} | } | ||||
/* | /* | ||||
* See if a requested jid already exists. Keep track of | * See if a requested jid already exists. Keep track of | ||||
* where it can be inserted later. | * where it can be inserted later. | ||||
*/ | */ | ||||
TAILQ_FOREACH(inspr, &allprison, pr_list) { | TAILQ_FOREACH(inspr, &allprison, pr_list) { | ||||
if (inspr->pr_id == jid) { | if (inspr->pr_id < jid) | ||||
mtx_lock(&inspr->pr_mtx); | continue; | ||||
if (prison_isvalid(inspr)) { | if (inspr->pr_id > jid) | ||||
break; | |||||
pr = inspr; | pr = inspr; | ||||
mtx_lock(&pr->pr_mtx); | |||||
drflags |= PD_LOCKED; | drflags |= PD_LOCKED; | ||||
inspr = NULL; | inspr = NULL; | ||||
} else | |||||
mtx_unlock(&inspr->pr_mtx); | |||||
break; | break; | ||||
} | } | ||||
if (inspr->pr_id > jid) | |||||
break; | |||||
} | |||||
if (pr != NULL) { | if (pr != NULL) { | ||||
ppr = pr->pr_parent; | ppr = pr->pr_parent; | ||||
/* Create: jid must not exist. */ | /* Create: jid must not exist. */ | ||||
if (cuflags == JAIL_CREATE) { | if (cuflags == JAIL_CREATE) { | ||||
/* | /* | ||||
* Even creators that cannot see the jail will | * Even creators that cannot see the jail will | ||||
* get EEXIST. | * get EEXIST. | ||||
*/ | */ | ||||
error = EEXIST; | error = EEXIST; | ||||
vfs_opterror(opts, "jail %d already exists", | vfs_opterror(opts, "jail %d already exists", | ||||
jid); | jid); | ||||
goto done_deref; | goto done_deref; | ||||
} | } | ||||
if (!prison_ischild(mypr, pr)) { | if (!prison_ischild(mypr, pr)) { | ||||
/* | /* | ||||
* Updaters get ENOENT if they cannot see the | * Updaters get ENOENT if they cannot see the | ||||
* jail. This is true even for CREATE | UPDATE, | * jail. This is true even for CREATE | UPDATE, | ||||
* which normally cannot give this error. | * which normally cannot give this error. | ||||
*/ | */ | ||||
error = ENOENT; | error = ENOENT; | ||||
vfs_opterror(opts, "jail %d not found", jid); | vfs_opterror(opts, "jail %d not found", jid); | ||||
goto done_deref; | goto done_deref; | ||||
} else if (!prison_isalive(pr)) { | } | ||||
if (!prison_isalive(pr)) { | |||||
if (!(flags & JAIL_DYING)) { | if (!(flags & JAIL_DYING)) { | ||||
error = ENOENT; | error = ENOENT; | ||||
vfs_opterror(opts, "jail %d is dying", | vfs_opterror(opts, "jail %d is dying", | ||||
jid); | jid); | ||||
goto done_deref; | goto done_deref; | ||||
} else if ((flags & JAIL_ATTACH) || | } | ||||
if ((flags & JAIL_ATTACH) || | |||||
(pr_flags & PR_PERSIST)) { | (pr_flags & PR_PERSIST)) { | ||||
/* | /* | ||||
* A dying jail might be resurrected | * A dying jail might be resurrected | ||||
* (via attach or persist), but first | * (via attach or persist), but first | ||||
* it must determine if another jail | * it must determine if another jail | ||||
* has claimed its name. Accomplish | * has claimed its name. Accomplish | ||||
* this by implicitly re-setting the | * this by implicitly re-setting the | ||||
* name. | * name. | ||||
▲ Show 20 Lines • Show All 57 Lines • ▼ Show 20 Lines | else { | ||||
mtx_unlock(&ppr->pr_mtx); | mtx_unlock(&ppr->pr_mtx); | ||||
*namelc = '.'; | *namelc = '.'; | ||||
} | } | ||||
namelc++; | namelc++; | ||||
} | } | ||||
if (namelc[0] != '\0') { | if (namelc[0] != '\0') { | ||||
pnamelen = | pnamelen = | ||||
(ppr == &prison0) ? 0 : strlen(ppr->pr_name) + 1; | (ppr == &prison0) ? 0 : strlen(ppr->pr_name) + 1; | ||||
name_again: | |||||
deadpr = NULL; | deadpr = NULL; | ||||
FOREACH_PRISON_CHILD(ppr, tpr) { | FOREACH_PRISON_CHILD(ppr, tpr) { | ||||
if (tpr != pr && | if (tpr != pr && | ||||
!strcmp(tpr->pr_name + pnamelen, namelc)) { | !strcmp(tpr->pr_name + pnamelen, namelc)) { | ||||
mtx_lock(&tpr->pr_mtx); | |||||
if (prison_isalive(tpr)) { | if (prison_isalive(tpr)) { | ||||
if (pr == NULL && | if (pr == NULL && | ||||
cuflags != JAIL_CREATE) { | cuflags != JAIL_CREATE) { | ||||
/* | /* | ||||
* Use this jail | * Use this jail | ||||
* for updates. | * for updates. | ||||
*/ | */ | ||||
pr = tpr; | pr = tpr; | ||||
mtx_lock(&pr->pr_mtx); | |||||
drflags |= PD_LOCKED; | drflags |= PD_LOCKED; | ||||
break; | break; | ||||
} | } | ||||
/* | /* | ||||
* Create, or update(jid): | * Create, or update(jid): | ||||
* name must not exist in an | * name must not exist in an | ||||
* active sibling jail. | * active sibling jail. | ||||
*/ | */ | ||||
error = EEXIST; | error = EEXIST; | ||||
mtx_unlock(&tpr->pr_mtx); | |||||
vfs_opterror(opts, | vfs_opterror(opts, | ||||
"jail \"%s\" already exists", | "jail \"%s\" already exists", | ||||
name); | name); | ||||
goto done_deref; | goto done_deref; | ||||
} | } | ||||
if (pr == NULL && | if (pr == NULL && | ||||
cuflags != JAIL_CREATE && | cuflags != JAIL_CREATE) { | ||||
prison_isvalid(tpr)) | |||||
deadpr = tpr; | deadpr = tpr; | ||||
mtx_unlock(&tpr->pr_mtx); | |||||
} | } | ||||
} | } | ||||
} | |||||
/* If no active jail is found, use a dying one. */ | /* If no active jail is found, use a dying one. */ | ||||
if (deadpr != NULL && pr == NULL) { | if (deadpr != NULL && pr == NULL) { | ||||
if (flags & JAIL_DYING) { | if (flags & JAIL_DYING) { | ||||
mtx_lock(&deadpr->pr_mtx); | |||||
if (!prison_isvalid(deadpr)) { | |||||
mtx_unlock(&deadpr->pr_mtx); | |||||
goto name_again; | |||||
} | |||||
pr = deadpr; | pr = deadpr; | ||||
mtx_lock(&pr->pr_mtx); | |||||
drflags |= PD_LOCKED; | drflags |= PD_LOCKED; | ||||
} else if (cuflags == JAIL_UPDATE) { | } else if (cuflags == JAIL_UPDATE) { | ||||
error = ENOENT; | error = ENOENT; | ||||
vfs_opterror(opts, | vfs_opterror(opts, | ||||
"jail \"%s\" is dying", name); | "jail \"%s\" is dying", name); | ||||
goto done_deref; | goto done_deref; | ||||
} | } | ||||
} | } | ||||
Show All 17 Lines | #endif | ||||
created = pr == NULL; | created = pr == NULL; | ||||
if (created) { | if (created) { | ||||
for (tpr = mypr; tpr != NULL; tpr = tpr->pr_parent) | for (tpr = mypr; tpr != NULL; tpr = tpr->pr_parent) | ||||
if (tpr->pr_childcount >= tpr->pr_childmax) { | if (tpr->pr_childcount >= tpr->pr_childmax) { | ||||
error = EPERM; | error = EPERM; | ||||
vfs_opterror(opts, "prison limit exceeded"); | vfs_opterror(opts, "prison limit exceeded"); | ||||
goto done_deref; | goto done_deref; | ||||
} | } | ||||
mtx_lock(&ppr->pr_mtx); | |||||
if (!prison_isvalid(ppr)) { | |||||
mtx_unlock(&ppr->pr_mtx); | |||||
error = ENOENT; | |||||
vfs_opterror(opts, "jail \"%s\" not found", | |||||
prison_name(mypr, ppr)); | |||||
goto done_deref; | |||||
} | |||||
prison_hold(ppr); | prison_hold(ppr); | ||||
if (refcount_acquire(&ppr->pr_uref)) | if (!refcount_acquire_if_not_zero(&ppr->pr_uref)) { | ||||
mtx_unlock(&ppr->pr_mtx); | |||||
else { | |||||
/* This brings the parent back to life. */ | /* This brings the parent back to life. */ | ||||
mtx_lock(&ppr->pr_mtx); | |||||
refcount_acquire(&ppr->pr_uref); | |||||
mtx_unlock(&ppr->pr_mtx); | mtx_unlock(&ppr->pr_mtx); | ||||
error = osd_jail_call(ppr, PR_METHOD_CREATE, opts); | error = osd_jail_call(ppr, PR_METHOD_CREATE, opts); | ||||
if (error) { | if (error) { | ||||
pr = ppr; | pr = ppr; | ||||
drflags |= PD_DEREF | PD_DEUREF; | drflags |= PD_DEREF | PD_DEUREF; | ||||
goto done_deref; | goto done_deref; | ||||
} | } | ||||
} | } | ||||
if (jid == 0 && (jid = get_next_prid(&inspr)) == 0) { | if (jid == 0 && (jid = get_next_prid(&inspr)) == 0) { | ||||
error = EAGAIN; | error = EAGAIN; | ||||
vfs_opterror(opts, "no available jail IDs"); | vfs_opterror(opts, "no available jail IDs"); | ||||
pr = ppr; | pr = ppr; | ||||
drflags |= PD_DEREF | PD_DEUREF; | drflags |= PD_DEREF | PD_DEUREF; | ||||
goto done_deref; | goto done_deref; | ||||
} | } | ||||
pr = malloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO); | pr = malloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO); | ||||
refcount_init(&pr->pr_ref, 0); | |||||
refcount_init(&pr->pr_uref, 0); | |||||
LIST_INIT(&pr->pr_children); | LIST_INIT(&pr->pr_children); | ||||
mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF | MTX_DUPOK); | mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF | MTX_DUPOK); | ||||
TASK_INIT(&pr->pr_task, 0, prison_complete, pr); | TASK_INIT(&pr->pr_task, 0, prison_complete, pr); | ||||
pr->pr_id = jid; | pr->pr_id = jid; | ||||
if (inspr != NULL) | if (inspr != NULL) | ||||
TAILQ_INSERT_BEFORE(inspr, pr, pr_list); | TAILQ_INSERT_BEFORE(inspr, pr, pr_list); | ||||
else | else | ||||
▲ Show 20 Lines • Show All 206 Lines • ▼ Show 20 Lines | |||||
#else | #else | ||||
tppr = &prison0; | tppr = &prison0; | ||||
#endif | #endif | ||||
FOREACH_PRISON_DESCENDANT(tppr, tpr, descend) { | FOREACH_PRISON_DESCENDANT(tppr, tpr, descend) { | ||||
if (tpr == pr || | if (tpr == pr || | ||||
#ifdef VIMAGE | #ifdef VIMAGE | ||||
(tpr != tppr && (tpr->pr_flags & PR_VNET)) || | (tpr != tppr && (tpr->pr_flags & PR_VNET)) || | ||||
#endif | #endif | ||||
refcount_load(&tpr->pr_uref) == 0) { | !prison_isalive(tpr)) { | ||||
descend = 0; | descend = 0; | ||||
continue; | continue; | ||||
} | } | ||||
if (!(tpr->pr_flags & PR_IP4_USER)) | if (!(tpr->pr_flags & PR_IP4_USER)) | ||||
continue; | continue; | ||||
descend = 0; | descend = 0; | ||||
if (tpr->pr_ip4 == NULL || | if (tpr->pr_ip4 == NULL || | ||||
(ip4s == 1 && tpr->pr_ip4s == 1)) | (ip4s == 1 && tpr->pr_ip4s == 1)) | ||||
▲ Show 20 Lines • Show All 51 Lines • ▼ Show 20 Lines | |||||
#else | #else | ||||
tppr = &prison0; | tppr = &prison0; | ||||
#endif | #endif | ||||
FOREACH_PRISON_DESCENDANT(tppr, tpr, descend) { | FOREACH_PRISON_DESCENDANT(tppr, tpr, descend) { | ||||
if (tpr == pr || | if (tpr == pr || | ||||
#ifdef VIMAGE | #ifdef VIMAGE | ||||
(tpr != tppr && (tpr->pr_flags & PR_VNET)) || | (tpr != tppr && (tpr->pr_flags & PR_VNET)) || | ||||
#endif | #endif | ||||
refcount_load(&tpr->pr_uref) == 0) { | !prison_isalive(tpr)) { | ||||
descend = 0; | descend = 0; | ||||
continue; | continue; | ||||
} | } | ||||
if (!(tpr->pr_flags & PR_IP6_USER)) | if (!(tpr->pr_flags & PR_IP6_USER)) | ||||
continue; | continue; | ||||
descend = 0; | descend = 0; | ||||
if (tpr->pr_ip6 == NULL || | if (tpr->pr_ip6 == NULL || | ||||
(ip6s == 1 && tpr->pr_ip6s == 1)) | (ip6s == 1 && tpr->pr_ip6s == 1)) | ||||
▲ Show 20 Lines • Show All 222 Lines • ▼ Show 20 Lines | #endif | ||||
* handlers have completed. | * handlers have completed. | ||||
*/ | */ | ||||
born = !prison_isalive(pr); | born = !prison_isalive(pr); | ||||
if (!created && (ch_flags & PR_PERSIST & (pr_flags ^ pr->pr_flags))) { | if (!created && (ch_flags & PR_PERSIST & (pr_flags ^ pr->pr_flags))) { | ||||
if (pr_flags & PR_PERSIST) { | if (pr_flags & PR_PERSIST) { | ||||
prison_hold(pr); | prison_hold(pr); | ||||
refcount_acquire(&pr->pr_uref); | refcount_acquire(&pr->pr_uref); | ||||
} else { | } else { | ||||
refcount_release(&pr->pr_ref); | |||||
drflags |= PD_DEUREF; | drflags |= PD_DEUREF; | ||||
prison_free_not_last(pr); | |||||
} | } | ||||
} | } | ||||
pr->pr_flags = (pr->pr_flags & ~ch_flags) | pr_flags; | pr->pr_flags = (pr->pr_flags & ~ch_flags) | pr_flags; | ||||
pr->pr_flags &= ~PR_REMOVE; | pr->pr_flags &= ~PR_REMOVE; | ||||
mtx_unlock(&pr->pr_mtx); | mtx_unlock(&pr->pr_mtx); | ||||
drflags &= ~PD_LOCKED; | drflags &= ~PD_LOCKED; | ||||
#ifdef RACCT | #ifdef RACCT | ||||
▲ Show 20 Lines • Show All 47 Lines • ▼ Show 20 Lines | #endif | ||||
redo_ip6 = 1; | redo_ip6 = 1; | ||||
} | } | ||||
} | } | ||||
mtx_unlock(&pr->pr_mtx); | mtx_unlock(&pr->pr_mtx); | ||||
} | } | ||||
#endif | #endif | ||||
/* Let the modules do their work. */ | /* Let the modules do their work. */ | ||||
sx_downgrade(&allprison_lock); | |||||
drflags = (drflags & ~PD_LIST_XLOCKED) | PD_LIST_SLOCKED; | |||||
if (born) { | if (born) { | ||||
error = osd_jail_call(pr, PR_METHOD_CREATE, opts); | error = osd_jail_call(pr, PR_METHOD_CREATE, opts); | ||||
if (error) { | if (error) { | ||||
(void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL); | (void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL); | ||||
goto done_deref; | goto done_deref; | ||||
} | } | ||||
} | } | ||||
error = osd_jail_call(pr, PR_METHOD_SET, opts); | error = osd_jail_call(pr, PR_METHOD_SET, opts); | ||||
if (error) { | if (error) { | ||||
if (born) | if (born) | ||||
(void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL); | (void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL); | ||||
goto done_deref; | goto done_deref; | ||||
} | } | ||||
/* Attach this process to the prison if requested. */ | /* Attach this process to the prison if requested. */ | ||||
if (flags & JAIL_ATTACH) { | if (flags & JAIL_ATTACH) { | ||||
mtx_lock(&pr->pr_mtx); | error = do_jail_attach(td, pr, prison_lock_xlock(pr, drflags)); | ||||
error = do_jail_attach(td, pr); | drflags &= ~(PD_LOCKED | PD_LIST_XLOCKED); | ||||
drflags &= ~PD_LIST_SLOCKED; | |||||
if (error) { | if (error) { | ||||
if (created) { | if (created) { | ||||
/* do_jail_attach has removed the prison. */ | /* do_jail_attach has removed the prison. */ | ||||
pr = NULL; | pr = NULL; | ||||
} | } | ||||
vfs_opterror(opts, "attach failed"); | vfs_opterror(opts, "attach failed"); | ||||
goto done_deref; | goto done_deref; | ||||
} | } | ||||
} | } | ||||
#ifdef RACCT | #ifdef RACCT | ||||
if (racct_enable && !created) { | if (racct_enable && !created) { | ||||
if (drflags & PD_LIST_SLOCKED) { | if (drflags & PD_LIST_XLOCKED) { | ||||
sx_sunlock(&allprison_lock); | sx_xunlock(&allprison_lock); | ||||
drflags &= ~PD_LIST_SLOCKED; | drflags &= ~PD_LIST_XLOCKED; | ||||
} | } | ||||
prison_racct_modify(pr); | prison_racct_modify(pr); | ||||
} | } | ||||
#endif | #endif | ||||
td->td_retval[0] = pr->pr_id; | td->td_retval[0] = pr->pr_id; | ||||
if (created) { | if (created) { | ||||
/* | /* | ||||
* Add a reference to newly created persistent prisons | * Add a reference to newly created persistent prisons | ||||
* (which was not done earlier so that the prison would | * (which was not done earlier so that the prison would | ||||
* not be publicly visible). | * not be publicly visible). | ||||
*/ | */ | ||||
if (pr_flags & PR_PERSIST) { | if (pr_flags & PR_PERSIST) { | ||||
mtx_lock(&pr->pr_mtx); | drflags = prison_lock_xlock(pr, drflags); | ||||
drflags |= PD_LOCKED; | |||||
refcount_acquire(&pr->pr_ref); | refcount_acquire(&pr->pr_ref); | ||||
refcount_acquire(&pr->pr_uref); | refcount_acquire(&pr->pr_uref); | ||||
} else { | } else { | ||||
/* Non-persistent jails need no further changes. */ | /* Non-persistent jails need no further changes. */ | ||||
pr = NULL; | pr = NULL; | ||||
} | } | ||||
} | } | ||||
▲ Show 20 Lines • Show All 60 Lines • ▼ Show 20 Lines | if (TAILQ_EMPTY(&allprison) || | ||||
/* | /* | ||||
* Take two passes through the allprison list: first starting | * Take two passes through the allprison list: first starting | ||||
* with the proposed jid, then ending with it. | * with the proposed jid, then ending with it. | ||||
*/ | */ | ||||
for (maxid = JAIL_MAX; maxid != 0; ) { | for (maxid = JAIL_MAX; maxid != 0; ) { | ||||
TAILQ_FOREACH(inspr, &allprison, pr_list) { | TAILQ_FOREACH(inspr, &allprison, pr_list) { | ||||
if (inspr->pr_id < jid) | if (inspr->pr_id < jid) | ||||
continue; | continue; | ||||
if (inspr->pr_id > jid || | if (inspr->pr_id > jid) { | ||||
refcount_load(&inspr->pr_ref) == 0) { | /* Found an opening. */ | ||||
/* | |||||
* Found an opening. This may be a gap | |||||
* in the list, or a dead jail with the | |||||
* same ID. | |||||
*/ | |||||
maxid = 0; | maxid = 0; | ||||
break; | break; | ||||
} | } | ||||
if (++jid > maxid) { | if (++jid > maxid) { | ||||
if (lastprid == maxid || lastprid == 0) | if (lastprid == maxid || lastprid == 0) | ||||
{ | { | ||||
/* | /* | ||||
* The entire legal range | * The entire legal range | ||||
▲ Show 20 Lines • Show All 72 Lines • ▼ Show 20 Lines | kern_jail_get(struct thread *td, struct uio *optuio, int flags) | ||||
/* | /* | ||||
* Find the prison specified by one of: lastjid, jid, name. | * Find the prison specified by one of: lastjid, jid, name. | ||||
*/ | */ | ||||
sx_slock(&allprison_lock); | sx_slock(&allprison_lock); | ||||
drflags = PD_LIST_SLOCKED; | drflags = PD_LIST_SLOCKED; | ||||
error = vfs_copyopt(opts, "lastjid", &jid, sizeof(jid)); | error = vfs_copyopt(opts, "lastjid", &jid, sizeof(jid)); | ||||
if (error == 0) { | if (error == 0) { | ||||
TAILQ_FOREACH(pr, &allprison, pr_list) { | TAILQ_FOREACH(pr, &allprison, pr_list) { | ||||
if (pr->pr_id > jid && prison_ischild(mypr, pr)) { | if (pr->pr_id > jid && | ||||
((flags & JAIL_DYING) || prison_isalive(pr)) && | |||||
prison_ischild(mypr, pr)) { | |||||
mtx_lock(&pr->pr_mtx); | mtx_lock(&pr->pr_mtx); | ||||
if ((flags & JAIL_DYING) | |||||
? prison_isvalid(pr) : prison_isalive(pr)) | |||||
break; | |||||
mtx_unlock(&pr->pr_mtx); | |||||
} | |||||
} | |||||
if (pr != NULL) { | |||||
drflags |= PD_LOCKED; | drflags |= PD_LOCKED; | ||||
goto found_prison; | goto found_prison; | ||||
} | } | ||||
} | |||||
error = ENOENT; | error = ENOENT; | ||||
vfs_opterror(opts, "no jail after %d", jid); | vfs_opterror(opts, "no jail after %d", jid); | ||||
goto done; | goto done; | ||||
} else if (error != ENOENT) | } else if (error != ENOENT) | ||||
goto done; | goto done; | ||||
error = vfs_copyopt(opts, "jid", &jid, sizeof(jid)); | error = vfs_copyopt(opts, "jid", &jid, sizeof(jid)); | ||||
if (error == 0) { | if (error == 0) { | ||||
▲ Show 20 Lines • Show All 239 Lines • ▼ Show 20 Lines | |||||
/* | /* | ||||
* struct jail_remove_args { | * struct jail_remove_args { | ||||
* int jid; | * int jid; | ||||
* }; | * }; | ||||
*/ | */ | ||||
int | int | ||||
sys_jail_remove(struct thread *td, struct jail_remove_args *uap) | sys_jail_remove(struct thread *td, struct jail_remove_args *uap) | ||||
{ | { | ||||
struct prison *pr, *cpr, *lpr, *tpr; | struct prison *pr, *cpr, *lpr; | ||||
int descend, error; | int descend, error; | ||||
error = priv_check(td, PRIV_JAIL_REMOVE); | error = priv_check(td, PRIV_JAIL_REMOVE); | ||||
if (error) | if (error) | ||||
return (error); | return (error); | ||||
sx_xlock(&allprison_lock); | sx_xlock(&allprison_lock); | ||||
pr = prison_find_child(td->td_ucred->cr_prison, uap->jid); | pr = prison_find_child(td->td_ucred->cr_prison, uap->jid); | ||||
if (pr == NULL) { | if (pr == NULL) { | ||||
sx_xunlock(&allprison_lock); | sx_xunlock(&allprison_lock); | ||||
return (EINVAL); | return (EINVAL); | ||||
} | } | ||||
/* Remove all descendants of this prison, then remove this prison. */ | /* Remove all descendants of this prison, then remove this prison. */ | ||||
prison_hold(pr); | prison_hold(pr); | ||||
if (!LIST_EMPTY(&pr->pr_children)) { | if (!LIST_EMPTY(&pr->pr_children)) { | ||||
mtx_unlock(&pr->pr_mtx); | mtx_unlock(&pr->pr_mtx); | ||||
lpr = NULL; | lpr = NULL; | ||||
FOREACH_PRISON_DESCENDANT(pr, cpr, descend) { | FOREACH_PRISON_DESCENDANT(pr, cpr, descend) { | ||||
mtx_lock(&cpr->pr_mtx); | |||||
if (prison_isvalid(cpr)) { | |||||
tpr = cpr; | |||||
prison_hold(cpr); | prison_hold(cpr); | ||||
} else { | |||||
/* Already removed - do not do it again. */ | |||||
tpr = NULL; | |||||
} | |||||
mtx_unlock(&cpr->pr_mtx); | |||||
if (lpr != NULL) { | if (lpr != NULL) { | ||||
mtx_lock(&lpr->pr_mtx); | mtx_lock(&lpr->pr_mtx); | ||||
prison_remove_one(lpr); | prison_remove_one(lpr); | ||||
sx_xlock(&allprison_lock); | sx_xlock(&allprison_lock); | ||||
} | } | ||||
lpr = tpr; | lpr = cpr; | ||||
} | } | ||||
if (lpr != NULL) { | if (lpr != NULL) { | ||||
mtx_lock(&lpr->pr_mtx); | mtx_lock(&lpr->pr_mtx); | ||||
prison_remove_one(lpr); | prison_remove_one(lpr); | ||||
sx_xlock(&allprison_lock); | sx_xlock(&allprison_lock); | ||||
} | } | ||||
mtx_lock(&pr->pr_mtx); | mtx_lock(&pr->pr_mtx); | ||||
} | } | ||||
Show All 12 Lines | prison_remove_one(struct prison *pr) | ||||
/* | /* | ||||
* Mark the prison as doomed, so it doesn't accidentally come back | * Mark the prison as doomed, so it doesn't accidentally come back | ||||
* to life. It may still be explicitly brought back by jail_set(2). | * to life. It may still be explicitly brought back by jail_set(2). | ||||
*/ | */ | ||||
pr->pr_flags |= PR_REMOVE; | pr->pr_flags |= PR_REMOVE; | ||||
/* If the prison was persistent, it is not anymore. */ | /* If the prison was persistent, it is not anymore. */ | ||||
if (pr->pr_flags & PR_PERSIST) { | if (pr->pr_flags & PR_PERSIST) { | ||||
refcount_release(&pr->pr_ref); | |||||
drflags |= PD_DEUREF; | drflags |= PD_DEUREF; | ||||
prison_free_not_last(pr); | |||||
pr->pr_flags &= ~PR_PERSIST; | pr->pr_flags &= ~PR_PERSIST; | ||||
} | } | ||||
/* | /* | ||||
* jail_remove added a reference. If that's the only one, remove | * jail_remove added a reference. If that's the only one, remove | ||||
* the prison now. refcount(9) doesn't guarantee the cache coherence | * the prison now. refcount(9) doesn't guarantee the cache coherence | ||||
* of non-zero counters, so force it here. | * of non-zero counters, so force it here. | ||||
*/ | */ | ||||
Show All 33 Lines | |||||
{ | { | ||||
struct prison *pr; | struct prison *pr; | ||||
int error; | int error; | ||||
error = priv_check(td, PRIV_JAIL_ATTACH); | error = priv_check(td, PRIV_JAIL_ATTACH); | ||||
if (error) | if (error) | ||||
return (error); | return (error); | ||||
/* | sx_slock(&allprison_lock); | ||||
* Start with exclusive hold on allprison_lock to ensure that a possible | |||||
* PR_METHOD_REMOVE call isn't concurrent with jail_set or jail_remove. | |||||
* But then immediately downgrade it since we don't need to stop | |||||
* readers. | |||||
*/ | |||||
sx_xlock(&allprison_lock); | |||||
sx_downgrade(&allprison_lock); | |||||
pr = prison_find_child(td->td_ucred->cr_prison, uap->jid); | pr = prison_find_child(td->td_ucred->cr_prison, uap->jid); | ||||
if (pr == NULL) { | if (pr == NULL) { | ||||
sx_sunlock(&allprison_lock); | sx_sunlock(&allprison_lock); | ||||
return (EINVAL); | return (EINVAL); | ||||
} | } | ||||
/* Do not allow a process to attach to a prison that is not alive. */ | /* Do not allow a process to attach to a prison that is not alive. */ | ||||
if (!prison_isalive(pr)) { | if (!prison_isalive(pr)) { | ||||
mtx_unlock(&pr->pr_mtx); | mtx_unlock(&pr->pr_mtx); | ||||
sx_sunlock(&allprison_lock); | sx_sunlock(&allprison_lock); | ||||
return (EINVAL); | return (EINVAL); | ||||
} | } | ||||
return (do_jail_attach(td, pr)); | return (do_jail_attach(td, pr, PD_LOCKED | PD_LIST_SLOCKED)); | ||||
} | } | ||||
static int | static int | ||||
do_jail_attach(struct thread *td, struct prison *pr) | do_jail_attach(struct thread *td, struct prison *pr, int drflags) | ||||
{ | { | ||||
struct proc *p; | struct proc *p; | ||||
struct ucred *newcred, *oldcred; | struct ucred *newcred, *oldcred; | ||||
int error; | int error; | ||||
mtx_assert(&pr->pr_mtx, MA_OWNED); | |||||
sx_assert(&allprison_lock, SX_LOCKED); | |||||
/* | /* | ||||
* XXX: Note that there is a slight race here if two threads | * XXX: Note that there is a slight race here if two threads | ||||
* in the same privileged process attempt to attach to two | * in the same privileged process attempt to attach to two | ||||
* different jails at the same time. It is important for | * different jails at the same time. It is important for | ||||
* user processes not to do this, or they might end up with | * user processes not to do this, or they might end up with | ||||
* a process root from one prison, but attached to the jail | * a process root from one prison, but attached to the jail | ||||
* of another. | * of another. | ||||
*/ | */ | ||||
refcount_acquire(&pr->pr_ref); | refcount_acquire(&pr->pr_ref); | ||||
refcount_acquire(&pr->pr_uref); | refcount_acquire(&pr->pr_uref); | ||||
drflags |= PD_DEREF | PD_DEUREF; | |||||
mtx_unlock(&pr->pr_mtx); | mtx_unlock(&pr->pr_mtx); | ||||
drflags &= ~PD_LOCKED; | |||||
/* Let modules do whatever they need to prepare for attaching. */ | /* Let modules do whatever they need to prepare for attaching. */ | ||||
error = osd_jail_call(pr, PR_METHOD_ATTACH, td); | error = osd_jail_call(pr, PR_METHOD_ATTACH, td); | ||||
if (error) { | if (error) { | ||||
prison_deref(pr, PD_DEREF | PD_DEUREF | PD_LIST_SLOCKED); | prison_deref(pr, drflags); | ||||
return (error); | return (error); | ||||
} | } | ||||
sx_sunlock(&allprison_lock); | sx_unlock(&allprison_lock); | ||||
drflags &= ~(PD_LIST_SLOCKED | PD_LIST_XLOCKED); | |||||
/* | /* | ||||
* Reparent the newly attached process to this jail. | * Reparent the newly attached process to this jail. | ||||
*/ | */ | ||||
p = td->td_proc; | p = td->td_proc; | ||||
error = cpuset_setproc_update_set(p, pr->pr_cpuset); | error = cpuset_setproc_update_set(p, pr->pr_cpuset); | ||||
if (error) | if (error) | ||||
goto e_revert_osd; | goto e_revert_osd; | ||||
Show All 19 Lines | #ifdef RACCT | ||||
racct_proc_ucred_changed(p, oldcred, newcred); | racct_proc_ucred_changed(p, oldcred, newcred); | ||||
crhold(newcred); | crhold(newcred); | ||||
#endif | #endif | ||||
PROC_UNLOCK(p); | PROC_UNLOCK(p); | ||||
#ifdef RCTL | #ifdef RCTL | ||||
rctl_proc_ucred_changed(p, newcred); | rctl_proc_ucred_changed(p, newcred); | ||||
crfree(newcred); | crfree(newcred); | ||||
#endif | #endif | ||||
prison_deref(oldcred->cr_prison, PD_DEREF | PD_DEUREF); | prison_deref(oldcred->cr_prison, drflags); | ||||
crfree(oldcred); | crfree(oldcred); | ||||
/* | /* | ||||
* If the prison was killed while changing credentials, die along | * If the prison was killed while changing credentials, die along | ||||
* with it. | * with it. | ||||
*/ | */ | ||||
if (!prison_isalive(pr)) { | if (!prison_isalive(pr)) { | ||||
PROC_LOCK(p); | PROC_LOCK(p); | ||||
kern_psignal(p, SIGKILL); | kern_psignal(p, SIGKILL); | ||||
PROC_UNLOCK(p); | PROC_UNLOCK(p); | ||||
} | } | ||||
return (0); | return (0); | ||||
e_unlock: | e_unlock: | ||||
VOP_UNLOCK(pr->pr_root); | VOP_UNLOCK(pr->pr_root); | ||||
e_revert_osd: | e_revert_osd: | ||||
/* Tell modules this thread is still in its old jail after all. */ | /* Tell modules this thread is still in its old jail after all. */ | ||||
sx_slock(&allprison_lock); | sx_slock(&allprison_lock); | ||||
drflags |= PD_LIST_SLOCKED; | |||||
(void)osd_jail_call(td->td_ucred->cr_prison, PR_METHOD_ATTACH, td); | (void)osd_jail_call(td->td_ucred->cr_prison, PR_METHOD_ATTACH, td); | ||||
prison_deref(pr, PD_DEREF | PD_DEUREF | PD_LIST_SLOCKED); | prison_deref(pr, drflags); | ||||
return (error); | return (error); | ||||
} | } | ||||
/* | /* | ||||
* Returns a locked prison instance, or NULL on failure. | * Returns a locked prison instance, or NULL on failure. | ||||
*/ | */ | ||||
struct prison * | struct prison * | ||||
prison_find(int prid) | prison_find(int prid) | ||||
{ | { | ||||
struct prison *pr; | struct prison *pr; | ||||
sx_assert(&allprison_lock, SX_LOCKED); | sx_assert(&allprison_lock, SX_LOCKED); | ||||
TAILQ_FOREACH(pr, &allprison, pr_list) { | TAILQ_FOREACH(pr, &allprison, pr_list) { | ||||
if (pr->pr_id == prid) { | if (pr->pr_id < prid) | ||||
continue; | |||||
if (pr->pr_id > prid) | |||||
break; | |||||
KASSERT(prison_isvalid(pr), ("Found invalid prison %p", pr)); | |||||
mtx_lock(&pr->pr_mtx); | mtx_lock(&pr->pr_mtx); | ||||
if (prison_isvalid(pr)) | |||||
return (pr); | return (pr); | ||||
/* | |||||
* Any active prison with the same ID would have | |||||
* been inserted before a dead one. | |||||
*/ | |||||
mtx_unlock(&pr->pr_mtx); | |||||
break; | |||||
} | } | ||||
if (pr->pr_id > prid) | |||||
break; | |||||
} | |||||
return (NULL); | return (NULL); | ||||
} | } | ||||
/* | /* | ||||
* Find a prison that is a descendant of mypr. Returns a locked prison or NULL. | * Find a prison that is a descendant of mypr. Returns a locked prison or NULL. | ||||
*/ | */ | ||||
struct prison * | struct prison * | ||||
prison_find_child(struct prison *mypr, int prid) | prison_find_child(struct prison *mypr, int prid) | ||||
{ | { | ||||
struct prison *pr; | struct prison *pr; | ||||
int descend; | int descend; | ||||
sx_assert(&allprison_lock, SX_LOCKED); | sx_assert(&allprison_lock, SX_LOCKED); | ||||
FOREACH_PRISON_DESCENDANT(mypr, pr, descend) { | FOREACH_PRISON_DESCENDANT(mypr, pr, descend) { | ||||
if (pr->pr_id == prid) { | if (pr->pr_id == prid) { | ||||
KASSERT(prison_isvalid(pr), | |||||
("Found invalid prison %p", pr)); | |||||
mtx_lock(&pr->pr_mtx); | mtx_lock(&pr->pr_mtx); | ||||
if (prison_isvalid(pr)) | |||||
return (pr); | return (pr); | ||||
mtx_unlock(&pr->pr_mtx); | |||||
} | } | ||||
} | } | ||||
return (NULL); | return (NULL); | ||||
} | } | ||||
/* | /* | ||||
* Look for the name relative to mypr. Returns a locked prison or NULL. | * Look for the name relative to mypr. Returns a locked prison or NULL. | ||||
*/ | */ | ||||
struct prison * | struct prison * | ||||
prison_find_name(struct prison *mypr, const char *name) | prison_find_name(struct prison *mypr, const char *name) | ||||
{ | { | ||||
struct prison *pr, *deadpr; | struct prison *pr, *deadpr; | ||||
size_t mylen; | size_t mylen; | ||||
int descend; | int descend; | ||||
sx_assert(&allprison_lock, SX_LOCKED); | sx_assert(&allprison_lock, SX_LOCKED); | ||||
mylen = (mypr == &prison0) ? 0 : strlen(mypr->pr_name) + 1; | mylen = (mypr == &prison0) ? 0 : strlen(mypr->pr_name) + 1; | ||||
again: | |||||
deadpr = NULL; | deadpr = NULL; | ||||
FOREACH_PRISON_DESCENDANT(mypr, pr, descend) { | FOREACH_PRISON_DESCENDANT(mypr, pr, descend) { | ||||
if (!strcmp(pr->pr_name + mylen, name)) { | if (!strcmp(pr->pr_name + mylen, name)) { | ||||
KASSERT(prison_isvalid(pr), | |||||
("Found invalid prison %p", pr)); | |||||
if (prison_isalive(pr)) { | |||||
mtx_lock(&pr->pr_mtx); | mtx_lock(&pr->pr_mtx); | ||||
bz: Looks like a long line in Phabricator. | |||||
Done Inline ActionsA lot of things do - it just depends on the browser window width. It's 79 columns in emacs, so I'll go with that :-). jamie: A lot of things do - it just depends on the browser window width. It's 79 columns in emacs, so… | |||||
if (prison_isalive(pr)) | |||||
return (pr); | return (pr); | ||||
if (prison_isvalid(pr)) | } | ||||
deadpr = pr; | deadpr = pr; | ||||
mtx_unlock(&pr->pr_mtx); | |||||
} | } | ||||
} | } | ||||
/* There was no valid prison - perhaps there was a dying one. */ | /* There was no valid prison - perhaps there was a dying one. */ | ||||
if (deadpr != NULL) { | if (deadpr != NULL) | ||||
mtx_lock(&deadpr->pr_mtx); | mtx_lock(&deadpr->pr_mtx); | ||||
if (!prison_isvalid(deadpr)) { | |||||
mtx_unlock(&deadpr->pr_mtx); | |||||
goto again; | |||||
} | |||||
} | |||||
return (deadpr); | return (deadpr); | ||||
} | } | ||||
/* | /* | ||||
* See if a prison has the specific flag set. The prison should be locked, | * See if a prison has the specific flag set. The prison should be locked, | ||||
* unless checking for flags that are only set at jail creation (such as | * unless checking for flags that are only set at jail creation (such as | ||||
* PR_IP4 and PR_IP6), or only the single bit is examined, without regard | * PR_IP4 and PR_IP6), or only the single bit is examined, without regard | ||||
* to any other prison data. | * to any other prison data. | ||||
Show All 37 Lines | KASSERT(was_valid, | ||||
("Trying to hold dead prison %p (jid=%d).", pr, pr->pr_id)); | ("Trying to hold dead prison %p (jid=%d).", pr, pr->pr_id)); | ||||
#else | #else | ||||
refcount_acquire(&pr->pr_ref); | refcount_acquire(&pr->pr_ref); | ||||
#endif | #endif | ||||
} | } | ||||
/* | /* | ||||
* Remove a prison reference. If that was the last reference, the | * Remove a prison reference. If that was the last reference, the | ||||
* prison will be removed (at a later time). Return with the prison | * prison will be removed (at a later time). | ||||
* unlocked. | |||||
*/ | */ | ||||
void | void | ||||
prison_free_locked(struct prison *pr) | prison_free_locked(struct prison *pr) | ||||
{ | { | ||||
int lastref; | |||||
mtx_assert(&pr->pr_mtx, MA_OWNED); | mtx_assert(&pr->pr_mtx, MA_OWNED); | ||||
/* | |||||
* Locking is no longer required, but unlock because the caller | |||||
* expects it. | |||||
*/ | |||||
mtx_unlock(&pr->pr_mtx); | |||||
prison_free(pr); | |||||
} | |||||
void | |||||
prison_free(struct prison *pr) | |||||
{ | |||||
Not Done Inline ActionsIf you unlock unconditionally leaving an assert is a good way to document the expectation for the caller. bz: If you unlock unconditionally leaving an assert is a good way to document the expectation for… | |||||
Done Inline ActionsYeah, good point. I can put that back. jamie: Yeah, good point. I can put that back. | |||||
KASSERT(refcount_load(&pr->pr_ref) > 0, | KASSERT(refcount_load(&pr->pr_ref) > 0, | ||||
("Trying to free dead prison %p (jid=%d).", | ("Trying to free dead prison %p (jid=%d).", | ||||
pr, pr->pr_id)); | pr, pr->pr_id)); | ||||
lastref = refcount_release(&pr->pr_ref); | if (!refcount_release_if_not_last(&pr->pr_ref)) { | ||||
mtx_unlock(&pr->pr_mtx); | |||||
if (lastref) { | |||||
/* | /* | ||||
* Don't remove the prison itself in this context, | * Don't remove the last reference in this context, | ||||
* in case there are locks held. | * in case there are locks held. | ||||
*/ | */ | ||||
taskqueue_enqueue(taskqueue_thread, &pr->pr_task); | taskqueue_enqueue(taskqueue_thread, &pr->pr_task); | ||||
} | } | ||||
} | } | ||||
void | static void | ||||
prison_free(struct prison *pr) | prison_free_not_last(struct prison *pr) | ||||
{ | { | ||||
#ifdef INVARIANTS | |||||
int lastref; | |||||
/* | |||||
* Locking is only required when releasing the last reference. | |||||
* This allows assurance that a locked prison will remain valid | |||||
* until it is unlocked. | |||||
*/ | |||||
KASSERT(refcount_load(&pr->pr_ref) > 0, | KASSERT(refcount_load(&pr->pr_ref) > 0, | ||||
("Trying to free dead prison %p (jid=%d).", | ("Trying to free dead prison %p (jid=%d).", | ||||
pr, pr->pr_id)); | pr, pr->pr_id)); | ||||
if (refcount_release_if_not_last(&pr->pr_ref)) | lastref = refcount_release(&pr->pr_ref); | ||||
return; | KASSERT(!lastref, | ||||
mtx_lock(&pr->pr_mtx); | ("prison_free_not_last freed last ref on prison %p (jid=%d).", | ||||
prison_free_locked(pr); | pr, pr->pr_id)); | ||||
#else | |||||
refcount_release(&pr>pr_ref); | |||||
#endif | |||||
} | } | ||||
/* | /* | ||||
* Hold a a prison for user visibility, by incrementing pr_uref. | * Hold a a prison for user visibility, by incrementing pr_uref. | ||||
* It is generally an error to hold a prison that isn't already | * It is generally an error to hold a prison that isn't already | ||||
* user-visible, except through the the jail system calls. It is also | * user-visible, except through the the jail system calls. It is also | ||||
* an error to hold an invalid prison. A prison record will remain | * an error to hold an invalid prison. A prison record will remain | ||||
* alive as long as it has at least one user reference, and will not | * alive as long as it has at least one user reference, and will not | ||||
* be set to the dying state was long as the prison mutex is held. | * be set to the dying state until the prison mutex and allprison_lock | ||||
* are both freed. | |||||
*/ | */ | ||||
void | void | ||||
prison_proc_hold(struct prison *pr) | prison_proc_hold(struct prison *pr) | ||||
{ | { | ||||
#ifdef INVARIANTS | #ifdef INVARIANTS | ||||
int was_alive = refcount_acquire_if_not_zero(&pr->pr_uref); | int was_alive = refcount_acquire_if_not_zero(&pr->pr_uref); | ||||
KASSERT(was_alive, | KASSERT(was_alive, | ||||
Show All 21 Lines | KASSERT(refcount_load(&pr->pr_uref) > 0, | ||||
("Trying to kill a process in a dead prison (jid=%d)", pr->pr_id)); | ("Trying to kill a process in a dead prison (jid=%d)", pr->pr_id)); | ||||
if (!refcount_release_if_not_last(&pr->pr_uref)) { | if (!refcount_release_if_not_last(&pr->pr_uref)) { | ||||
/* | /* | ||||
* Don't remove the last user reference in this context, | * Don't remove the last user reference in this context, | ||||
* which is expected to be a process that is not only locked, | * which is expected to be a process that is not only locked, | ||||
* but also half dead. Add a reference so any calls to | * but also half dead. Add a reference so any calls to | ||||
* prison_free() won't re-submit the task. | * prison_free() won't re-submit the task. | ||||
*/ | */ | ||||
refcount_acquire(&pr->pr_ref); | prison_hold(pr); | ||||
taskqueue_enqueue(taskqueue_thread, &pr->pr_task); | taskqueue_enqueue(taskqueue_thread, &pr->pr_task); | ||||
} | } | ||||
} | } | ||||
/* | /* | ||||
* Complete a call to either prison_free or prison_proc_free. | * Complete a call to either prison_free or prison_proc_free. | ||||
*/ | */ | ||||
static void | static void | ||||
prison_complete(void *context, int pending) | prison_complete(void *context, int pending) | ||||
{ | { | ||||
struct prison *pr = context; | struct prison *pr = context; | ||||
int drflags; | |||||
sx_xlock(&allprison_lock); | |||||
mtx_lock(&pr->pr_mtx); | |||||
/* | /* | ||||
* If this is completing a call to prison_proc_free, there will still | * This could be called to release the last reference, or the | ||||
* be a user reference held; clear that as well as the reference that | * last user reference; the existence of a user reference implies | ||||
* was added. No references are expected if this is completing a call | * the latter. There will always be a reference to remove, as | ||||
* to prison_free, but prison_deref is still called for the cleanup. | * prison_proc_free adds one. | ||||
*/ | */ | ||||
prison_deref(pr, refcount_load(&pr->pr_uref) > 0 | drflags = prison_lock_xlock(pr, PD_DEREF); | ||||
? PD_DEREF | PD_DEUREF | PD_LOCKED | PD_LIST_XLOCKED | if (refcount_load(&pr->pr_uref) > 0) | ||||
: PD_LOCKED | PD_LIST_XLOCKED); | drflags |= PD_DEUREF; | ||||
prison_deref(pr, drflags); | |||||
} | } | ||||
/* | /* | ||||
* Remove a prison reference and/or user reference (usually). | * Remove a prison reference and/or user reference (usually). | ||||
* This assumes context that allows sleeping (for allprison_lock), | * This assumes context that allows sleeping (for allprison_lock), | ||||
* with no non-sleeping locks held, except perhaps the prison itself. | * with no non-sleeping locks held, except perhaps the prison itself. | ||||
* If there are no more references, release and delist the prison. | * If there are no more references, release and delist the prison. | ||||
* On completion, the prison lock and the allprison lock are both | * On completion, the prison lock and the allprison lock are both | ||||
* unlocked. | * unlocked. | ||||
*/ | */ | ||||
static void | static void | ||||
prison_deref(struct prison *pr, int flags) | prison_deref(struct prison *pr, int flags) | ||||
{ | { | ||||
struct prisonlist freeprison; | struct prisonlist freeprison; | ||||
struct prison *rpr, *tpr; | struct prison *rpr, *ppr, *tpr; | ||||
int lastref, lasturef; | |||||
TAILQ_INIT(&freeprison); | TAILQ_INIT(&freeprison); | ||||
if (!(flags & PD_LOCKED)) | |||||
mtx_lock(&pr->pr_mtx); | |||||
/* | /* | ||||
* Release this prison as requested, which may cause its parent | * Release this prison as requested, which may cause its parent | ||||
* to be released, and then maybe its grandparent, etc. | * to be released, and then maybe its grandparent, etc. | ||||
*/ | */ | ||||
for (;;) { | for (;;) { | ||||
if (flags & PD_DEUREF) { | if (flags & PD_DEUREF) { | ||||
/* Drop a user reference. */ | |||||
KASSERT(refcount_load(&pr->pr_uref) > 0, | KASSERT(refcount_load(&pr->pr_uref) > 0, | ||||
("prison_deref PD_DEUREF on a dead prison (jid=%d)", | ("prison_deref PD_DEUREF on a dead prison (jid=%d)", | ||||
pr->pr_id)); | pr->pr_id)); | ||||
lasturef = refcount_release(&pr->pr_uref); | if (!refcount_release_if_not_last(&pr->pr_uref)) { | ||||
if (lasturef) | if (!(flags & PD_DEREF)) { | ||||
refcount_acquire(&pr->pr_ref); | prison_hold(pr); | ||||
KASSERT(refcount_load(&prison0.pr_uref) > 0, | flags |= PD_DEREF; | ||||
} | |||||
flags = prison_lock_xlock(pr, flags); | |||||
if (refcount_release(&pr->pr_uref)) { | |||||
/* | |||||
* When the last user references goes, | |||||
* this becomes a dying prison. | |||||
*/ | |||||
KASSERT( | |||||
refcount_load(&prison0.pr_uref) > 0, | |||||
("prison0 pr_uref=0")); | ("prison0 pr_uref=0")); | ||||
} else | mtx_unlock(&pr->pr_mtx); | ||||
lasturef = 0; | flags &= ~PD_LOCKED; | ||||
(void)osd_jail_call(pr, | |||||
PR_METHOD_REMOVE, NULL); | |||||
} | |||||
} | |||||
} | |||||
if (flags & PD_DEREF) { | if (flags & PD_DEREF) { | ||||
/* Drop a reference. */ | |||||
KASSERT(refcount_load(&pr->pr_ref) > 0, | KASSERT(refcount_load(&pr->pr_ref) > 0, | ||||
("prison_deref PD_DEREF on a dead prison (jid=%d)", | ("prison_deref PD_DEREF on a dead prison (jid=%d)", | ||||
pr->pr_id)); | pr->pr_id)); | ||||
lastref = refcount_release(&pr->pr_ref); | if (!refcount_release_if_not_last(&pr->pr_ref)) { | ||||
} | flags = prison_lock_xlock(pr, flags); | ||||
else | if (refcount_release(&pr->pr_ref)) { | ||||
lastref = refcount_load(&pr->pr_ref) == 0; | |||||
mtx_unlock(&pr->pr_mtx); | |||||
/* | /* | ||||
* Tell the modules if the last user reference was removed | * When the last reference goes, | ||||
* (even it sticks around in dying state). | * unlink the prison and set it aside. | ||||
*/ | */ | ||||
if (lasturef) { | KASSERT( | ||||
if (!(flags & (PD_LIST_SLOCKED | PD_LIST_XLOCKED))) { | refcount_load(&pr->pr_uref) == 0, | ||||
if (atomic_load_acq_int(&pr->pr_ref) > 1) { | ("prison_deref: last ref, " | ||||
sx_slock(&allprison_lock); | "but still has %d urefs (jid=%d)", | ||||
flags |= PD_LIST_SLOCKED; | pr->pr_uref, pr->pr_id)); | ||||
} else { | KASSERT( | ||||
sx_xlock(&allprison_lock); | refcount_load(&prison0.pr_ref) != 0, | ||||
flags |= PD_LIST_XLOCKED; | ("prison0 pr_ref=0")); | ||||
TAILQ_REMOVE(&allprison, pr, pr_list); | |||||
LIST_REMOVE(pr, pr_sibling); | |||||
TAILQ_INSERT_TAIL(&freeprison, pr, | |||||
pr_list); | |||||
for (ppr = pr->pr_parent; | |||||
ppr != NULL; | |||||
ppr = ppr->pr_parent) | |||||
ppr->pr_childcount--; | |||||
/* | |||||
* Removing a prison frees references | |||||
* from its parent. | |||||
*/ | |||||
mtx_unlock(&pr->pr_mtx); | |||||
flags &= ~PD_LOCKED; | |||||
pr = pr->pr_parent; | |||||
flags |= PD_DEREF | PD_DEUREF; | |||||
continue; | |||||
} | } | ||||
} | } | ||||
(void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL); | |||||
mtx_lock(&pr->pr_mtx); | |||||
lastref = refcount_release(&pr->pr_ref); | |||||
mtx_unlock(&pr->pr_mtx); | |||||
} | } | ||||
if (!lastref) | |||||
break; | break; | ||||
if (flags & PD_LIST_SLOCKED) { | |||||
if (!sx_try_upgrade(&allprison_lock)) { | |||||
sx_sunlock(&allprison_lock); | |||||
sx_xlock(&allprison_lock); | |||||
} | } | ||||
flags &= ~PD_LIST_SLOCKED; | |||||
} else if (!(flags & PD_LIST_XLOCKED)) | |||||
sx_xlock(&allprison_lock); | |||||
flags |= PD_LIST_XLOCKED; | |||||
TAILQ_REMOVE(&allprison, pr, pr_list); | |||||
LIST_REMOVE(pr, pr_sibling); | |||||
TAILQ_INSERT_TAIL(&freeprison, pr, pr_list); | |||||
for (tpr = pr->pr_parent; tpr != NULL; tpr = tpr->pr_parent) | |||||
tpr->pr_childcount--; | |||||
/* Removing a prison frees a reference on its parent. */ | |||||
pr = pr->pr_parent; | |||||
mtx_lock(&pr->pr_mtx); | |||||
flags |= PD_DEREF | PD_DEUREF; | |||||
} | |||||
/* Release all the prison locks. */ | /* Release all the prison locks. */ | ||||
if (flags & PD_LOCKED) | |||||
mtx_unlock(&pr->pr_mtx); | |||||
if (flags & PD_LIST_SLOCKED) | if (flags & PD_LIST_SLOCKED) | ||||
sx_sunlock(&allprison_lock); | sx_sunlock(&allprison_lock); | ||||
else if (flags & PD_LIST_XLOCKED) | else if (flags & PD_LIST_XLOCKED) | ||||
sx_xunlock(&allprison_lock); | sx_xunlock(&allprison_lock); | ||||
/* | /* | ||||
* Finish removing any unreferenced prisons, which couldn't happen | * Finish removing any unreferenced prisons, which couldn't happen | ||||
* while allprison_lock was held (to avoid a LOR on vrele). | * while allprison_lock was held (to avoid a LOR on vrele). | ||||
Show All 14 Lines | |||||
#endif | #endif | ||||
if (rpr->pr_cpuset != NULL) | if (rpr->pr_cpuset != NULL) | ||||
cpuset_rel(rpr->pr_cpuset); | cpuset_rel(rpr->pr_cpuset); | ||||
osd_jail_exit(rpr); | osd_jail_exit(rpr); | ||||
#ifdef RACCT | #ifdef RACCT | ||||
if (racct_enable) | if (racct_enable) | ||||
prison_racct_detach(rpr); | prison_racct_detach(rpr); | ||||
#endif | #endif | ||||
TAILQ_REMOVE(&freeprison, rpr, pr_list); | |||||
free(rpr, M_PRISON); | free(rpr, M_PRISON); | ||||
} | } | ||||
} | } | ||||
/* | /* | ||||
* Given the current locking state in the flags, make sure allprison_lock | |||||
* is held exclusive, and the prison is locked. Return flags indicating | |||||
* the new state. | |||||
*/ | |||||
static int | |||||
prison_lock_xlock(struct prison *pr, int flags) | |||||
{ | |||||
if (!(flags & PD_LIST_XLOCKED)) { | |||||
/* | |||||
* Get allprison_lock, which may be an upgrade, | |||||
* and may require unlocking the prison. | |||||
*/ | |||||
if (flags & PD_LOCKED) { | |||||
mtx_unlock(&pr->pr_mtx); | |||||
flags &= ~PD_LOCKED; | |||||
} | |||||
if (flags & PD_LIST_SLOCKED) { | |||||
if (!sx_try_upgrade(&allprison_lock)) { | |||||
sx_sunlock(&allprison_lock); | |||||
sx_xlock(&allprison_lock); | |||||
} | |||||
flags &= ~PD_LIST_SLOCKED; | |||||
} else | |||||
sx_xlock(&allprison_lock); | |||||
flags |= PD_LIST_XLOCKED; | |||||
} | |||||
if (!(flags & PD_LOCKED)) { | |||||
Not Done Inline ActionsIs this save in all circumstances? Or may some callers have to "restart" to validate state? bz: Is this save in all circumstances? Or may some callers have to "restart" to validate state?
I… | |||||
Done Inline ActionsSo far, yes. It's called mostly in prison_deref, which has long unlocked and relocked the prison mutex and shared allprison_lock as needed. Other callers are:
Generally speaking, callers do need to know that they don't really need these locks to stay unbroken, but it's static to kern_jail.c, so callers are generally expected to understand the jail locking ins and outs. jamie: So far, yes. It's called mostly in prison_deref, which has long unlocked and relocked the… | |||||
/* Lock the prison mutex. */ | |||||
mtx_lock(&pr->pr_mtx); | |||||
flags |= PD_LOCKED; | |||||
} | |||||
return flags; | |||||
} | |||||
/* | |||||
* Set or clear a permission bit in the pr_allow field, passing restrictions | * Set or clear a permission bit in the pr_allow field, passing restrictions | ||||
* (cleared permission) down to child jails. | * (cleared permission) down to child jails. | ||||
*/ | */ | ||||
void | void | ||||
prison_set_allow(struct ucred *cred, unsigned flag, int enable) | prison_set_allow(struct ucred *cred, unsigned flag, int enable) | ||||
{ | { | ||||
struct prison *pr; | struct prison *pr; | ||||
▲ Show 20 Lines • Show All 145 Lines • ▼ Show 20 Lines | prison_ischild(struct prison *pr1, struct prison *pr2) | ||||
for (pr2 = pr2->pr_parent; pr2 != NULL; pr2 = pr2->pr_parent) | for (pr2 = pr2->pr_parent; pr2 != NULL; pr2 = pr2->pr_parent) | ||||
if (pr1 == pr2) | if (pr1 == pr2) | ||||
return (1); | return (1); | ||||
return (0); | return (0); | ||||
} | } | ||||
/* | /* | ||||
* Return true if the prison is currently alive. A prison is alive if it is | * Return true if the prison is currently alive. A prison is alive if it | ||||
* valid and holds user references, and it isn't being removed. | * holds user references and it isn't being removed. | ||||
*/ | */ | ||||
bool | bool | ||||
prison_isalive(struct prison *pr) | prison_isalive(struct prison *pr) | ||||
{ | { | ||||
if (__predict_false(refcount_load(&pr->pr_ref) == 0)) | |||||
return (false); | |||||
if (__predict_false(refcount_load(&pr->pr_uref) == 0)) | if (__predict_false(refcount_load(&pr->pr_uref) == 0)) | ||||
return (false); | return (false); | ||||
if (__predict_false(pr->pr_flags & PR_REMOVE)) | if (__predict_false(pr->pr_flags & PR_REMOVE)) | ||||
return (false); | return (false); | ||||
return (true); | return (true); | ||||
} | } | ||||
/* | /* | ||||
* Return true if the prison is currently valid. A prison is valid if it has | * Return true if the prison is currently valid. A prison is valid if it has | ||||
* been fully created, and is not being destroyed. Note that dying prisons | * been fully created, and is not being destroyed. Note that dying prisons | ||||
* are still considered valid. | * are still considered valid. Invalid prisons won't be found under normal | ||||
* circumstances, as they're only put in that state by functions that have | |||||
* an exclusive hold on allprison_lock. | |||||
*/ | */ | ||||
bool | bool | ||||
prison_isvalid(struct prison *pr) | prison_isvalid(struct prison *pr) | ||||
{ | { | ||||
if (__predict_false(refcount_load(&pr->pr_ref) == 0)) | if (__predict_false(refcount_load(&pr->pr_ref) == 0)) | ||||
return (false); | return (false); | ||||
return (true); | return (true); | ||||
▲ Show 20 Lines • Show All 650 Lines • ▼ Show 20 Lines | if (cpr->pr_ip6s > 0) { | ||||
ip6 = realloc(ip6, ip6s * | ip6 = realloc(ip6, ip6s * | ||||
sizeof(struct in6_addr), M_TEMP, M_WAITOK); | sizeof(struct in6_addr), M_TEMP, M_WAITOK); | ||||
goto again; | goto again; | ||||
} | } | ||||
bcopy(cpr->pr_ip6, ip6, | bcopy(cpr->pr_ip6, ip6, | ||||
cpr->pr_ip6s * sizeof(struct in6_addr)); | cpr->pr_ip6s * sizeof(struct in6_addr)); | ||||
} | } | ||||
#endif | #endif | ||||
if (!prison_isvalid(cpr)) { | |||||
mtx_unlock(&cpr->pr_mtx); | |||||
continue; | |||||
} | |||||
bzero(xp, sizeof(*xp)); | bzero(xp, sizeof(*xp)); | ||||
xp->pr_version = XPRISON_VERSION; | xp->pr_version = XPRISON_VERSION; | ||||
xp->pr_id = cpr->pr_id; | xp->pr_id = cpr->pr_id; | ||||
xp->pr_state = prison_isalive(cpr) | xp->pr_state = prison_isalive(cpr) | ||||
? PRISON_STATE_ALIVE : PRISON_STATE_DYING; | ? PRISON_STATE_ALIVE : PRISON_STATE_DYING; | ||||
strlcpy(xp->pr_path, prison_path(pr, cpr), sizeof(xp->pr_path)); | strlcpy(xp->pr_path, prison_path(pr, cpr), sizeof(xp->pr_path)); | ||||
strlcpy(xp->pr_host, cpr->pr_hostname, sizeof(xp->pr_host)); | strlcpy(xp->pr_host, cpr->pr_hostname, sizeof(xp->pr_host)); | ||||
strlcpy(xp->pr_name, prison_name(pr, cpr), sizeof(xp->pr_name)); | strlcpy(xp->pr_name, prison_name(pr, cpr), sizeof(xp->pr_name)); | ||||
▲ Show 20 Lines • Show All 742 Lines • Show Last 20 Lines |
Looks like a long line in Phabricator.