diff --git a/lib/libjail/jail.c b/lib/libjail/jail.c --- a/lib/libjail/jail.c +++ b/lib/libjail/jail.c @@ -920,13 +920,21 @@ } desc; int mib[CTL_MAXNAME]; - /* The "lastjid" parameter isn't real. */ + /* + * Some pseudo-parameters don't show up in the sysctl + * parameter list. + */ name = jp->jp_name; - if (!strcmp(name, "lastjid")) { + if (!strcmp(name, "lastjid") || !strcmp(name, "root")) { jp->jp_valuelen = sizeof(int); jp->jp_ctltype = CTLTYPE_INT | CTLFLAG_WR; return (0); } + if (!strcmp(name, "desc")) { + jp->jp_valuelen = sizeof(int); + jp->jp_ctltype = CTLTYPE_INT | CTLFLAG_RW; + return (0); + } /* Find the sysctl that describes the parameter. */ mib[0] = 0; diff --git a/lib/libsys/cap_rights_limit.2 b/lib/libsys/cap_rights_limit.2 --- a/lib/libsys/cap_rights_limit.2 +++ b/lib/libsys/cap_rights_limit.2 @@ -45,6 +45,7 @@ .Sh DESCRIPTION When a file descriptor is created by a function such as .Xr fhopen 2 , +.Xr jail_set 2 , .Xr kqueue 2 , .Xr mq_open 2 , .Xr open 2 , @@ -134,6 +135,7 @@ .Xr accept4 2 , .Xr cap_enter 2 , .Xr fhopen 2 , +.Xr jail_set 2 , .Xr kqueue 2 , .Xr mq_open 2 , .Xr open 2 , diff --git a/lib/libsys/jail.2 b/lib/libsys/jail.2 --- a/lib/libsys/jail.2 +++ b/lib/libsys/jail.2 @@ -340,6 +340,25 @@ and .Fn jail_remove , except that they operate on the jail referred to by the passed descriptor. +.Pp +Jail descriptors are useful in +.Xr capsicum 4 +capability mode, which disallows global access to jails via +.Va jid +or +.Va name . +With appropriate capabilities set, the +.Va desc +paramter and associated flags provide capability-safe way to access +jails. +Another useful pseudo-parameter for +.Xr capsicum 4 +is +.Va root , +which is a file descriptor analog to the +.Va path +parameter, allowing a new jail's root directory to be set without +needing the file system namespace tha capsicum prohibits. .Sh RETURN VALUES If successful, .Fn jail , @@ -370,6 +389,8 @@ The version number of the argument is not correct. .It Bq Er EAGAIN No free JID could be found. +.It Bq Er ECAPMODE +The process is in capability mode. .El .Pp The @@ -397,7 +418,7 @@ or one of the addresses contained within it, points to an address outside the allocated address space of the process. .It Bq Er ENOENT -The jail referred to by a +The jail referred to by the .Va jid or .Va name @@ -405,16 +426,20 @@ .Dv JAIL_CREATE flag is not set. .It Bq Er ENOENT -The jail referred to by a +The jail referred to by the .Va jid parameter is not accessible by the process, because the process is in a different jail. .It Bq Er ENOENT -The jail referred to by a +The jail referred to by the .Va desc parameter has been removed. +.It Bq Er ENOENT +The directory referred to by the +.Va path +parameter does not exist. .It Bq Er EEXIST -The jail referred to by a +The jail referred to by the .Va jid or .Va name @@ -437,6 +462,12 @@ flags is not set. .It Bq Er ENAMETOOLONG A supplied string parameter is longer than allowed. +.It Bq Er ENOTDIR +The +.Va path +or +.Va root +parameter specifies something other that a directory. .It Bq Er EAGAIN There are no jail IDs left. .It Bq Er EMFILE @@ -457,6 +488,21 @@ .Dv JAIL_OWN_DESC flag set, because the system file table is full. +.It Bq Er ECAPMODE +Neither the +.Dv JAIL_USE_DESC +nor +.Dv JAIL_AT_DESC +flag is set and the process is in capability mode. +.It Bq Er ECAPMODE +A supplied parameter is not permitted in capability mode, such as +.Va jid +or +.Va path . +.It Bq Er ENOTCAPABLE +The process has insifficient rights for the operation(s) specifed by the +.Dv JAIL_UPDATE , Dv JAIL_CREATE , Dv JAIL_ATTACH , or Dv JAIL_OWN_DESC +flags. .El .Pp The @@ -465,13 +511,13 @@ will fail if: .Bl -tag -width Er .It Bq Er ENOENT -The jail referred to by a +The jail referred to by the .Va jid or .Va name parameter does not exist. .It Bq Er ENOENT -The jail referred to by a +The jail referred to by the .Va jid is not accessible by the process, because the process is in a different jail. @@ -480,7 +526,7 @@ .Va lastjid parameter is greater than the highest current jail ID. .It Bq Er ENOENT -The jail referred to by a +The jail referred to by the .Va desc parameter has been removed .Pq even if the Dv JAIL_CREATE flag has been set . @@ -510,6 +556,18 @@ .Dv JAIL_OWN_DESC flag set, because the system file table is full. +.It Bq Er ECAPMODE +Neither the +.Dv JAIL_USE_DESC +mor +.Dv JAIL_AT_DESC +flag is set and the process is in capability mode. +.It Bq Er ECAPMODE +A requested parameter is not permitted in capability mode. +.It Bq Er ENOTCAPABLE +The +.Dv JAIL_OWN_DESC +flag is set and the process lacks the CAP_JAIL_REMOVE right. .El .Pp The @@ -525,6 +583,8 @@ The jail specified by .Fa jid does not exist. +.It Bq Er ECAPMODE +The process is in capability mode. .El .Pp The @@ -538,12 +598,14 @@ The .Fa fd argument is not a valid jail descriptor. -.It Bq Er EPERM -The jail descriptor was created by a user other than the super-user. .It Bq Er EINVAL The jail specified by .Fa jid has been removed. +.It Bq Er EPERM +The jail descriptor was created by a user other than the super-user. +.It Bq Er ENOTCAPABLE +The process has insufficient rights to perform the oprtation. .El .Pp Further @@ -559,6 +621,7 @@ .Xr chroot 2 manual page for details. .Sh SEE ALSO +.Xr capsicum 4, .Xr chdir 2 , .Xr chroot 2 , .Xr jail 8 diff --git a/share/man/man4/rights.4 b/share/man/man4/rights.4 --- a/share/man/man4/rights.4 +++ b/share/man/man4/rights.4 @@ -335,6 +335,84 @@ The list of permitted ioctl commands can be further limited with the .Xr cap_ioctls_limit 2 system call. +.It Dv CAP_JAIL_GET +Permit +.Xr jail_get 2 +with the +.Dv JAIL_USE_DESC +flag. +.It Dv CAP_JAIL_SET +Permit +.Xr jail_set 2 +with the +.Dv JAIL_USE_DESC +and +.Dv JAIL_UPDATE +flags. +.It Dv CAP_JAIL_ATTACH +Permit +.Xr jail_attach_jd 2 , +and +.Xr jail_set 2 +with the +.Dv JAIL_ATTACH +flag. +.It Dv CAP_JAIL_REMOVE +Permit +.Xr jail_remove_jd 2 , +and either +.Xr jail_get 2 +or +.Xr jail_set 2 +with the +.Dv JAIL_OWN_DESC +flag. +.It Dv CAP_JAIL_GETAT +Permit +.Xr jail_get 2 +with the +.Dv JAIL_AT_DESC +flag. +.It Dv CAP_JAIL_SETAT +Permit +.Xr jail_set 2 +with the +.Dv JAIL_AT_DESC +and +.Dv JAIL_UPDATE +flags. +.It Dv CAP_JAIL_CREATEAT +Permit +.Xr jail_set 2 +with the +.Dv JAIL_AT_DESC +and +.Dv JAIL_CREATE +flags. +.It Dv CAP_JAIL_ATTACHAT +Permit +.Xr jail_attach_fd 2 , +following +.Xr jail_get 2 +or +.Xr jail_set 2 +with the +.Dv JAIL_AT_DESC +and +JAIL_GET_DESC +flags. +.It Dv CAP_JAIL_REMOVEAT +Permit +.Xr jail_remove_fd 2 , +following +.Xr jail_get 2 +or +.Xr jail_set 2 +with the +.Dv JAIL_AT_DESC +and +JAIL_GET_DESC +flags. .It Dv CAP_KQUEUE An alias to .Dv CAP_KQUEUE_CHANGE @@ -681,6 +759,10 @@ .Xr getsockname 2 , .Xr getsockopt 2 , .Xr ioctl 2 , +.Xr jail_attach_fd 2 , +.Xr jail_get 2 , +.Xr jail_set 2 , +.Xr jail_remove_fd 2 , .Xr kevent 2 , .Xr kqueue 2 , .Xr linkat 2 , diff --git a/sys/compat/freebsd32/freebsd32_sysent.c b/sys/compat/freebsd32/freebsd32_sysent.c --- a/sys/compat/freebsd32/freebsd32_sysent.c +++ b/sys/compat/freebsd32/freebsd32_sysent.c @@ -573,8 +573,8 @@ { .sy_narg = AS(unlinkat_args), .sy_call = (sy_call_t *)sys_unlinkat, .sy_auevent = AUE_UNLINKAT, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 503 = unlinkat */ { .sy_narg = AS(posix_openpt_args), .sy_call = (sy_call_t *)sys_posix_openpt, .sy_auevent = AUE_POSIX_OPENPT, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 504 = posix_openpt */ { .sy_narg = 0, .sy_call = (sy_call_t *)nosys, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_ABSENT }, /* 505 = obsolete kgssapi */ - { .sy_narg = AS(freebsd32_jail_get_args), .sy_call = (sy_call_t *)freebsd32_jail_get, .sy_auevent = AUE_JAIL_GET, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 506 = freebsd32_jail_get */ - { .sy_narg = AS(freebsd32_jail_set_args), .sy_call = (sy_call_t *)freebsd32_jail_set, .sy_auevent = AUE_JAIL_SET, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 507 = freebsd32_jail_set */ + { .sy_narg = AS(freebsd32_jail_get_args), .sy_call = (sy_call_t *)freebsd32_jail_get, .sy_auevent = AUE_JAIL_GET, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 506 = freebsd32_jail_get */ + { .sy_narg = AS(freebsd32_jail_set_args), .sy_call = (sy_call_t *)freebsd32_jail_set, .sy_auevent = AUE_JAIL_SET, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 507 = freebsd32_jail_set */ { .sy_narg = AS(jail_remove_args), .sy_call = (sy_call_t *)sys_jail_remove, .sy_auevent = AUE_JAIL_REMOVE, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 508 = jail_remove */ { compat12(AS(freebsd12_closefrom_args),closefrom), .sy_auevent = AUE_CLOSEFROM, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 509 = freebsd12 closefrom */ { .sy_narg = AS(freebsd32___semctl_args), .sy_call = (sy_call_t *)lkmressys, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_ABSENT }, /* 510 = freebsd32___semctl */ @@ -664,6 +664,6 @@ { .sy_narg = AS(inotify_rm_watch_args), .sy_call = (sy_call_t *)sys_inotify_rm_watch, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 594 = inotify_rm_watch */ { .sy_narg = AS(getgroups_args), .sy_call = (sy_call_t *)sys_getgroups, .sy_auevent = AUE_GETGROUPS, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 595 = getgroups */ { .sy_narg = AS(setgroups_args), .sy_call = (sy_call_t *)sys_setgroups, .sy_auevent = AUE_SETGROUPS, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 596 = setgroups */ - { .sy_narg = AS(jail_attach_jd_args), .sy_call = (sy_call_t *)sys_jail_attach_jd, .sy_auevent = AUE_JAIL_ATTACH, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 597 = jail_attach_jd */ - { .sy_narg = AS(jail_remove_jd_args), .sy_call = (sy_call_t *)sys_jail_remove_jd, .sy_auevent = AUE_JAIL_REMOVE, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 598 = jail_remove_jd */ + { .sy_narg = AS(jail_attach_jd_args), .sy_call = (sy_call_t *)sys_jail_attach_jd, .sy_auevent = AUE_JAIL_ATTACH, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 597 = jail_attach_jd */ + { .sy_narg = AS(jail_remove_jd_args), .sy_call = (sy_call_t *)sys_jail_remove_jd, .sy_auevent = AUE_JAIL_REMOVE, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 598 = jail_remove_jd */ }; diff --git a/sys/compat/linux/linux_mib.c b/sys/compat/linux/linux_mib.c --- a/sys/compat/linux/linux_mib.c +++ b/sys/compat/linux/linux_mib.c @@ -394,11 +394,11 @@ } SYSCTL_JAIL_PARAM_SYS_NODE(linux, CTLFLAG_RW, "Jail Linux parameters"); -SYSCTL_JAIL_PARAM_STRING(_linux, osname, CTLFLAG_RW, LINUX_MAX_UTSNAME, - "Jail Linux kernel OS name"); -SYSCTL_JAIL_PARAM_STRING(_linux, osrelease, CTLFLAG_RW, LINUX_MAX_UTSNAME, - "Jail Linux kernel OS release"); -SYSCTL_JAIL_PARAM(_linux, oss_version, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_JAIL_PARAM_STRING(_linux, osname, CTLFLAG_RW | CTLFLAG_CAPRW, + LINUX_MAX_UTSNAME, "Jail Linux kernel OS name"); +SYSCTL_JAIL_PARAM_STRING(_linux, osrelease, CTLFLAG_RW | CTLFLAG_CAPRW, + LINUX_MAX_UTSNAME, "Jail Linux kernel OS release"); +SYSCTL_JAIL_PARAM(_linux, oss_version, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_CAPRW, "I", "Jail Linux OSS version"); static int diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c --- a/sys/kern/init_sysent.c +++ b/sys/kern/init_sysent.c @@ -572,8 +572,8 @@ { .sy_narg = AS(unlinkat_args), .sy_call = (sy_call_t *)sys_unlinkat, .sy_auevent = AUE_UNLINKAT, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 503 = unlinkat */ { .sy_narg = AS(posix_openpt_args), .sy_call = (sy_call_t *)sys_posix_openpt, .sy_auevent = AUE_POSIX_OPENPT, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 504 = posix_openpt */ { .sy_narg = 0, .sy_call = (sy_call_t *)nosys, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_ABSENT }, /* 505 = obsolete kgssapi */ - { .sy_narg = AS(jail_get_args), .sy_call = (sy_call_t *)sys_jail_get, .sy_auevent = AUE_JAIL_GET, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 506 = jail_get */ - { .sy_narg = AS(jail_set_args), .sy_call = (sy_call_t *)sys_jail_set, .sy_auevent = AUE_JAIL_SET, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 507 = jail_set */ + { .sy_narg = AS(jail_get_args), .sy_call = (sy_call_t *)sys_jail_get, .sy_auevent = AUE_JAIL_GET, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 506 = jail_get */ + { .sy_narg = AS(jail_set_args), .sy_call = (sy_call_t *)sys_jail_set, .sy_auevent = AUE_JAIL_SET, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 507 = jail_set */ { .sy_narg = AS(jail_remove_args), .sy_call = (sy_call_t *)sys_jail_remove, .sy_auevent = AUE_JAIL_REMOVE, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 508 = jail_remove */ { compat12(AS(freebsd12_closefrom_args),closefrom), .sy_auevent = AUE_CLOSEFROM, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 509 = freebsd12 closefrom */ { .sy_narg = AS(__semctl_args), .sy_call = (sy_call_t *)lkmressys, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_ABSENT }, /* 510 = __semctl */ @@ -663,6 +663,6 @@ { .sy_narg = AS(inotify_rm_watch_args), .sy_call = (sy_call_t *)sys_inotify_rm_watch, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 594 = inotify_rm_watch */ { .sy_narg = AS(getgroups_args), .sy_call = (sy_call_t *)sys_getgroups, .sy_auevent = AUE_GETGROUPS, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 595 = getgroups */ { .sy_narg = AS(setgroups_args), .sy_call = (sy_call_t *)sys_setgroups, .sy_auevent = AUE_SETGROUPS, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 596 = setgroups */ - { .sy_narg = AS(jail_attach_jd_args), .sy_call = (sy_call_t *)sys_jail_attach_jd, .sy_auevent = AUE_JAIL_ATTACH, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 597 = jail_attach_jd */ - { .sy_narg = AS(jail_remove_jd_args), .sy_call = (sy_call_t *)sys_jail_remove_jd, .sy_auevent = AUE_JAIL_REMOVE, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 598 = jail_remove_jd */ + { .sy_narg = AS(jail_attach_jd_args), .sy_call = (sy_call_t *)sys_jail_attach_jd, .sy_auevent = AUE_JAIL_ATTACH, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 597 = jail_attach_jd */ + { .sy_narg = AS(jail_remove_jd_args), .sy_call = (sy_call_t *)sys_jail_remove_jd, .sy_auevent = AUE_JAIL_REMOVE, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 598 = jail_remove_jd */ }; diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -1907,7 +1907,7 @@ /* * Fill the given filecaps structure with full rights. */ -static void +void filecaps_fill(struct filecaps *fcaps) { diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c --- a/sys/kern/kern_jail.c +++ b/sys/kern/kern_jail.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -166,6 +167,7 @@ static void prison_set_allow_locked(struct prison *pr, unsigned flag, int enable); static char *prison_path(struct prison *pr1, struct prison *pr2); +static void prison_shift_capabilities(struct filecaps *fcaps); #ifdef RACCT static void prison_racct_attach(struct prison *pr); static void prison_racct_modify(struct prison *pr); @@ -990,7 +992,8 @@ int kern_jail_set(struct thread *td, struct uio *optuio, int flags) { - struct file *jfp_out; + struct file *jfp_out, *rootfp; + struct filecaps fcaps; struct nameidata nd; #ifdef INET struct prison_ip *ip4; @@ -1007,6 +1010,7 @@ char *g_path, *osrelstr; struct bool_flags *bf; struct jailsys_flags *jsf; + cap_rights_t rights; #if defined(INET) || defined(INET6) void *op; #endif @@ -1016,7 +1020,7 @@ int error, errmsg_len, errmsg_pos; int gotchildmax, gotenforce, gothid, gotrsnum, gotslevel; int deadid, jfd_in, jfd_out, jfd_pos, jid, jsys, len, level; - int childmax, osreldt, rsnum, slevel; + int childmax, osreldt, rootfd, rsnum, slevel; #ifdef INET int ip4s; bool redo_ip4; @@ -1029,12 +1033,15 @@ uint64_t pr_allow, ch_allow, pr_flags, ch_flags; uint64_t pr_allow_diff; unsigned tallow; + uint8_t rootfd_flags; char numbuf[12]; mypr = td->td_ucred->cr_prison; if (((flags & (JAIL_CREATE | JAIL_AT_DESC)) == JAIL_CREATE) && mypr->pr_childmax == 0) return (EPERM); + if (IN_CAPABILITY_MODE(td) && !(flags & (JAIL_USE_DESC | JAIL_AT_DESC))) + return (ECAPMODE); if (flags & ~JAIL_SET_MASK) return (EINVAL); if ((flags & (JAIL_USE_DESC | JAIL_AT_DESC)) == @@ -1072,49 +1079,6 @@ goto done_errmsg; } - error = vfs_copyopt(opts, "desc", &jfd_in, sizeof(jfd_in)); - if (error == ENOENT) { - if (flags & (JAIL_USE_DESC | JAIL_AT_DESC | JAIL_GET_DESC | - JAIL_OWN_DESC)) { - vfs_opterror(opts, "missing desc"); - goto done_errmsg; - } - jfd_in = -1; - } else if (error != 0) - goto done_free; - else { - if (!(flags & (JAIL_USE_DESC | JAIL_AT_DESC | JAIL_GET_DESC | - JAIL_OWN_DESC))) { - vfs_opterror(opts, "unexpected desc"); - goto done_errmsg; - } - if (flags & JAIL_AT_DESC) { - /* - * Look up and create jails based on the - * descriptor's prison. - */ - prison_free(mypr); - error = jaildesc_find(td, jfd_in, &mypr, NULL); - if (error != 0) { - vfs_opterror(opts, error == ENOENT ? - "descriptor to dead jail" : - "not a jail descriptor"); - goto done_errmsg; - } - if ((flags & JAIL_CREATE) && mypr->pr_childmax == 0) { - error = EPERM; - goto done_free; - } - } - if (flags & (JAIL_GET_DESC | JAIL_OWN_DESC)) { - /* Allocate a jail descriptor to return later. */ - error = jaildesc_alloc(td, &jfp_out, &jfd_out, - flags & JAIL_OWN_DESC); - if (error) - goto done_free; - } - } - /* * Delay the permission check if using a jail descriptor, * until we get the descriptor's credentials. @@ -1132,6 +1096,10 @@ jid = 0; else if (error != 0) goto done_free; + else if ((flags & JAIL_CREATE) && IN_CAPABILITY_MODE(td)) { + error = ECAPMODE; + goto done_free; + } error = vfs_copyopt(opts, "securelevel", &slevel, sizeof(slevel)); if (error == ENOENT) @@ -1447,6 +1415,10 @@ else if (error != 0) goto done_free; else { + if (IN_CAPABILITY_MODE(td)) { + error = ECAPMODE; + goto done_free; + } if (flags & JAIL_UPDATE) { error = EINVAL; vfs_opterror(opts, @@ -1475,19 +1447,158 @@ if (root->v_type != VDIR) { error = ENOTDIR; vput(root); + root = NULL; goto done_free; } VOP_UNLOCK(root); } + error = vfs_copyopt(opts, "root", &rootfd, sizeof(rootfd)); + if (error != 0) { + if (error != ENOENT) + goto done_free; + } else { + if (flags & JAIL_UPDATE) { + error = EINVAL; + vfs_opterror(opts, + "root cannot be changed after creation"); + goto done_errmsg; + } + if (path != NULL) { + error = EINVAL; + vfs_opterror(opts, + "path and root cannot both be set"); + goto done_errmsg; + } + /* + * Attachment includes chroot, so get the descriptor + * the same way fchroot(2) does. + */ + error = getvnode_path(td, rootfd, &cap_fchroot_rights, + &rootfd_flags, &rootfp); + if (error != 0) + goto done_free; + if ((rootfd_flags & UF_RESOLVE_BENEATH) != 0) { + fdrop(rootfp, td); + error = ENOTCAPABLE; + goto done_free; + } + root = rootfp->f_vnode; + vrefact(root); + fdrop(rootfp, td); + if (root->v_type != VDIR) { + error = ENOTDIR; + vrele(root); + root = NULL; + goto done_free; + } + error = vn_fullpath_global(root, &path, &g_path); + if (error == 0 && strlen(path) >= MAXPATHLEN) + error = ENAMETOOLONG; + if (error != 0) { + vrele(root); + root = NULL; + goto done_free; + } + } + + /* + * Read the jail descriptor passed in and/or create one to return. + */ + pr = NULL; + filecaps_fill(&fcaps); + drflags = 0; + created = false; + maybe_changed = false; + error = vfs_copyopt(opts, "desc", &jfd_in, sizeof(jfd_in)); + if (error == ENOENT) { + if (flags & (JAIL_USE_DESC | JAIL_AT_DESC | JAIL_GET_DESC | + JAIL_OWN_DESC)) { + vfs_opterror(opts, "missing desc"); + goto done_errmsg; + } + jfd_in = -1; + } else if (error != 0) + goto done_free; + else { + if (!(flags & (JAIL_USE_DESC | JAIL_AT_DESC | JAIL_GET_DESC | + JAIL_OWN_DESC))) { + vfs_opterror(opts, "unexpected desc"); + goto done_errmsg; + } + if (flags & JAIL_USE_DESC) { + /* Get the jail from its descriptor. */ + rights = cap_jail_set_rights; + if (flags & JAIL_ATTACH) + cap_rights_set_one(&rights, CAP_JAIL_ATTACH); + if (flags & JAIL_OWN_DESC) + cap_rights_set_one(&rights, CAP_JAIL_REMOVE); + error = jaildesc_find(td, jfd_in, &rights, &pr, &jdcred, + &fcaps); + if (error != 0) { + if (error == ENOENT) + vfs_opterror(opts, + "descriptor to dead jail"); + goto done_deref; + } + drflags |= PD_DEREF; + error = priv_check_cred(jdcred, PRIV_JAIL_SET); + if (error == 0 && (flags & JAIL_ATTACH)) + error = priv_check_cred(jdcred, + PRIV_JAIL_ATTACH); + crfree(jdcred); + if (error) + goto done_deref; + } else if (flags & JAIL_AT_DESC) { + /* + * Look up and create jails based on the + * descriptor's prison. + */ + cap_rights_init_zero(&rights); + if (flags & JAIL_UPDATE) + cap_rights_set_one(&rights, CAP_JAIL_SETAT); + if (flags & JAIL_CREATE) + cap_rights_set_one(&rights, CAP_JAIL_CREATEAT); + if (flags & JAIL_ATTACH) + cap_rights_set_one(&rights, CAP_JAIL_ATTACHAT); + if (flags & JAIL_OWN_DESC) + cap_rights_set_one(&rights, CAP_JAIL_REMOVEAT); + error = jaildesc_find(td, jfd_in, &rights, &tpr, NULL, + &fcaps); + if (error == ENOENT) { + vfs_opterror(opts, "descriptor to dead jail"); + goto done_errmsg; + } + if (error != 0) + goto done_free; + prison_free(mypr); + mypr = tpr; + if ((flags & JAIL_CREATE) && mypr->pr_childmax == 0) { + error = EPERM; + goto done_free; + } + } + if (flags & (JAIL_GET_DESC | JAIL_OWN_DESC)) { + /* Allocate a jail descriptor to return later. */ + if (flags & (JAIL_USE_DESC | JAIL_AT_DESC)) { + if (flags & JAIL_AT_DESC) + prison_shift_capabilities(&fcaps); + error = jaildesc_alloc(td, &jfp_out, &jfd_out, + &fcaps, flags & JAIL_OWN_DESC); + } else + error = jaildesc_alloc(td, &jfp_out, &jfd_out, + NULL, flags & JAIL_OWN_DESC); + if (error) + goto done_deref; + } + } + /* * Find the specified jail, or at least its parent. * This abuses the file error codes ENOENT and EEXIST. */ - pr = NULL; inspr = NULL; deadpr = NULL; - maybe_changed = false; if (cuflags == JAIL_CREATE && jid == 0 && name != NULL) { namelc = strrchr(name, '.'); jid = strtoul(namelc != NULL ? namelc + 1 : name, &p, 10); @@ -1495,7 +1606,7 @@ jid = 0; } sx_xlock(&allprison_lock); - drflags = PD_LIST_XLOCKED; + drflags |= PD_LIST_XLOCKED; ppr = mypr; if (!prison_isalive(ppr)) { /* This jail is dying. This process will surely follow. */ @@ -1503,21 +1614,7 @@ goto done_deref; } if (flags & JAIL_USE_DESC) { - /* Get the jail from its descriptor. */ - error = jaildesc_find(td, jfd_in, &pr, &jdcred); - if (error) { - vfs_opterror(opts, error == ENOENT ? - "descriptor to dead jail" : - "not a jail descriptor"); - goto done_deref; - } - drflags |= PD_DEREF; - error = priv_check_cred(jdcred, PRIV_JAIL_SET); - if (error == 0 && (flags & JAIL_ATTACH)) - error = priv_check_cred(jdcred, PRIV_JAIL_ATTACH); - crfree(jdcred); - if (error) - goto done_deref; + /* Use the jail from the descriptor. */ mtx_lock(&pr->pr_mtx); drflags |= PD_LOCKED; if (cuflags == JAIL_CREATE) { @@ -2281,6 +2378,7 @@ td->td_retval[0] = pr->pr_id; done_deref: + filecaps_free(&fcaps); /* * Report changes to kevent. This can happen even if the * system call fails, as changes might have been made before @@ -2474,15 +2572,19 @@ { struct bool_flags *bf; struct file *jfp_out; + struct filecaps fcaps; struct jailsys_flags *jsf; - struct prison *pr, *mypr; + struct prison *pr, *mypr, *tpr; struct vfsopt *opt; struct vfsoptlist *opts; + cap_rights_t rights; char *errmsg, *name; int drflags, error, errmsg_len, errmsg_pos, i, jid, len, pos; int jfd_in, jfd_out; unsigned f; + if (IN_CAPABILITY_MODE(td) && !(flags & (JAIL_USE_DESC | JAIL_AT_DESC))) + return (ECAPMODE); if (flags & ~JAIL_GET_MASK) return (EINVAL); if ((flags & (JAIL_USE_DESC | JAIL_AT_DESC)) == @@ -2501,18 +2603,21 @@ jfd_out = -1; /* - * Find the prison specified by one of: desc, lastjid, jid, name. + * Read the jail descriptor passed in and/or create one to return. */ - sx_slock(&allprison_lock); - drflags = PD_LIST_SLOCKED; - + filecaps_fill(&fcaps); + drflags = 0; error = vfs_copyopt(opts, "desc", &jfd_in, sizeof(jfd_in)); if (error == ENOENT) { - if (flags & (JAIL_AT_DESC | JAIL_GET_DESC | JAIL_OWN_DESC)) { + if (flags & (JAIL_USE_DESC | JAIL_AT_DESC | JAIL_GET_DESC | + JAIL_OWN_DESC)) { vfs_opterror(opts, "missing desc"); goto done; } - } else if (error == 0) { + jfd_in = -1; + } else if (error != 0) + goto done; + else { if (!(flags & (JAIL_USE_DESC | JAIL_AT_DESC | JAIL_GET_DESC | JAIL_OWN_DESC))) { vfs_opterror(opts, "unexpected desc"); @@ -2520,44 +2625,66 @@ } if (flags & JAIL_USE_DESC) { /* Get the jail from its descriptor. */ - error = jaildesc_find(td, jfd_in, &pr, NULL); - if (error) { - vfs_opterror(opts, error == ENOENT ? - "descriptor to dead jail" : - "not a jail descriptor"); + rights = cap_jail_get_rights; + if (flags & JAIL_OWN_DESC) + cap_rights_set_one(&rights, CAP_JAIL_REMOVE); + error = jaildesc_find(td, jfd_in, &rights, &pr, NULL, + &fcaps); + if (error != 0) { + if (error == ENOENT) + vfs_opterror(opts, + "descriptor to dead jail"); goto done; } drflags |= PD_DEREF; - mtx_lock(&pr->pr_mtx); - drflags |= PD_LOCKED; - if (!(prison_isalive(pr) || (flags & JAIL_DYING))) { - error = ENOENT; - vfs_opterror(opts, "jail %d is dying", - pr->pr_id); - goto done; - } - goto found_prison; - } - if (flags & JAIL_AT_DESC) { + } else if (flags & JAIL_AT_DESC) { /* Look up jails based on the descriptor's prison. */ - prison_free(mypr); - error = jaildesc_find(td, jfd_in, &mypr, NULL); + rights = cap_jail_getat_rights; + if (flags & JAIL_OWN_DESC) + cap_rights_set_one(&rights, CAP_JAIL_REMOVEAT); + error = jaildesc_find(td, jfd_in, &rights, &tpr, NULL, + &fcaps); if (error != 0) { - vfs_opterror(opts, error == ENOENT ? - "descriptor to dead jail" : - "not a jail descriptor"); + if (error == ENOENT) + vfs_opterror(opts, + "descriptor to dead jail"); goto done; } + prison_free(mypr); + mypr = tpr; } if (flags & (JAIL_GET_DESC | JAIL_OWN_DESC)) { /* Allocate a jail descriptor to return later. */ - error = jaildesc_alloc(td, &jfp_out, &jfd_out, - flags & JAIL_OWN_DESC); + if (flags & (JAIL_USE_DESC | JAIL_AT_DESC)) { + if (flags & JAIL_AT_DESC) + prison_shift_capabilities(&fcaps); + error = jaildesc_alloc(td, &jfp_out, &jfd_out, + &fcaps, flags & JAIL_OWN_DESC); + } else + error = jaildesc_alloc(td, &jfp_out, &jfd_out, + NULL, flags & JAIL_OWN_DESC); if (error) goto done; } - } else - goto done; + } + + /* + * Find the prison specified by one of: desc, lastjid, jid, name. + */ + sx_slock(&allprison_lock); + drflags |= PD_LIST_SLOCKED; + + if (flags & JAIL_USE_DESC) { + /* Use the jail from the descriptor. */ + mtx_lock(&pr->pr_mtx); + drflags |= PD_LOCKED; + if (!(prison_isalive(pr) || (flags & JAIL_DYING))) { + error = ENOENT; + vfs_opterror(opts, "jail %d is dying", pr->pr_id); + goto done; + } + goto found_prison; + } error = vfs_copyopt(opts, "lastjid", &jid, sizeof(jid)); if (error == 0) { @@ -2820,6 +2947,7 @@ else if (drflags & PD_LIST_XLOCKED) sx_xunlock(&allprison_lock); /* Clean up other resources. */ + filecaps_free(&fcaps); if (jfp_out != NULL) (void)fdrop(jfp_out, td); if (error && jfd_out >= 0) @@ -2882,7 +3010,8 @@ struct ucred *jdcred; int error; - error = jaildesc_find(td, uap->fd, &pr, &jdcred); + error = jaildesc_find(td, uap->fd, &cap_jail_remove_rights, &pr, + &jdcred, NULL); if (error) return (error); error = priv_check_cred(jdcred, PRIV_JAIL_REMOVE); @@ -2961,7 +3090,8 @@ sx_slock(&allprison_lock); drflags = PD_LIST_SLOCKED; - error = jaildesc_find(td, uap->fd, &pr, &jdcred); + error = jaildesc_find(td, uap->fd, &cap_jail_attach_rights, &pr, + &jdcred, NULL); if (error) goto fail; drflags |= PD_DEREF; @@ -4934,110 +5064,115 @@ * CTLFLAG_RDTUN in the following indicates jail parameters that can be set at * jail creation time but cannot be changed in an existing jail. */ -SYSCTL_JAIL_PARAM(, jid, CTLTYPE_INT | CTLFLAG_RDTUN, "I", "Jail ID"); -SYSCTL_JAIL_PARAM(, desc, CTLTYPE_INT | CTLFLAG_RW, "I", "Jail descriptor"); -SYSCTL_JAIL_PARAM(, parent, CTLTYPE_INT | CTLFLAG_RD, "I", "Jail parent ID"); -SYSCTL_JAIL_PARAM_STRING(, name, CTLFLAG_RW, MAXHOSTNAMELEN, "Jail name"); -SYSCTL_JAIL_PARAM_STRING(, path, CTLFLAG_RDTUN, MAXPATHLEN, "Jail root path"); -SYSCTL_JAIL_PARAM(, securelevel, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_JAIL_PARAM(, jid, CTLTYPE_INT | CTLFLAG_RDTUN | CTLFLAG_CAPRW, "I", + "Jail ID"); +SYSCTL_JAIL_PARAM(, parent, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_CAPRW, "I", + "Jail parent ID"); +SYSCTL_JAIL_PARAM_STRING(, name, CTLFLAG_RW, MAXHOSTNAMELEN | CTLFLAG_CAPRW, + "Jail name"); +SYSCTL_JAIL_PARAM_STRING(, path, CTLFLAG_RDTUN | CTLFLAG_CAPRW, MAXPATHLEN, + "Jail root path"); +SYSCTL_JAIL_PARAM(, securelevel, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_CAPRW, "I", "Jail secure level"); -SYSCTL_JAIL_PARAM(, osreldate, CTLTYPE_INT | CTLFLAG_RDTUN, "I", +SYSCTL_JAIL_PARAM(, osreldate, CTLTYPE_INT | CTLFLAG_RDTUN | CTLFLAG_CAPRW, "I", "Jail value for kern.osreldate and uname -K"); -SYSCTL_JAIL_PARAM_STRING(, osrelease, CTLFLAG_RDTUN, OSRELEASELEN, - "Jail value for kern.osrelease and uname -r"); -SYSCTL_JAIL_PARAM(, enforce_statfs, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_JAIL_PARAM_STRING(, osrelease, CTLFLAG_RDTUN | CTLFLAG_CAPRW, + OSRELEASELEN, "Jail value for kern.osrelease and uname -r"); +SYSCTL_JAIL_PARAM(, enforce_statfs, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_CAPRW, "I", "Jail cannot see all mounted file systems"); -SYSCTL_JAIL_PARAM(, devfs_ruleset, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_JAIL_PARAM(, devfs_ruleset, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_CAPRW, "I", "Ruleset for in-jail devfs mounts"); -SYSCTL_JAIL_PARAM(, persist, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_JAIL_PARAM(, persist, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_CAPRW, "B", "Jail persistence"); #ifdef VIMAGE -SYSCTL_JAIL_PARAM(, vnet, CTLTYPE_INT | CTLFLAG_RDTUN, +SYSCTL_JAIL_PARAM(, vnet, CTLTYPE_INT | CTLFLAG_RDTUN | CTLFLAG_CAPRW, "E,jailsys", "Virtual network stack"); #endif -SYSCTL_JAIL_PARAM(, dying, CTLTYPE_INT | CTLFLAG_RD, +SYSCTL_JAIL_PARAM(, dying, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_CAPRW, "B", "Jail is in the process of shutting down"); SYSCTL_JAIL_PARAM_NODE(children, "Number of child jails"); -SYSCTL_JAIL_PARAM(_children, cur, CTLTYPE_INT | CTLFLAG_RD, +SYSCTL_JAIL_PARAM(_children, cur, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_CAPRW, "I", "Current number of child jails"); -SYSCTL_JAIL_PARAM(_children, max, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_JAIL_PARAM(_children, max, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_CAPRW, "I", "Maximum number of child jails"); SYSCTL_JAIL_PARAM_SYS_NODE(host, CTLFLAG_RW, "Jail host info"); -SYSCTL_JAIL_PARAM_STRING(_host, hostname, CTLFLAG_RW, MAXHOSTNAMELEN, - "Jail hostname"); -SYSCTL_JAIL_PARAM_STRING(_host, domainname, CTLFLAG_RW, MAXHOSTNAMELEN, - "Jail NIS domainname"); -SYSCTL_JAIL_PARAM_STRING(_host, hostuuid, CTLFLAG_RW, HOSTUUIDLEN, - "Jail host UUID"); -SYSCTL_JAIL_PARAM(_host, hostid, CTLTYPE_ULONG | CTLFLAG_RW, +SYSCTL_JAIL_PARAM_STRING(_host, hostname, CTLFLAG_RW | CTLFLAG_CAPRW, + MAXHOSTNAMELEN, "Jail hostname"); +SYSCTL_JAIL_PARAM_STRING(_host, domainname, CTLFLAG_RW | CTLFLAG_CAPRW, + MAXHOSTNAMELEN, "Jail NIS domainname"); +SYSCTL_JAIL_PARAM_STRING(_host, hostuuid, CTLFLAG_RW | CTLFLAG_CAPRW, + HOSTUUIDLEN, "Jail host UUID"); +SYSCTL_JAIL_PARAM(_host, hostid, CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_CAPRW, "LU", "Jail host ID"); SYSCTL_JAIL_PARAM_NODE(cpuset, "Jail cpuset"); -SYSCTL_JAIL_PARAM(_cpuset, id, CTLTYPE_INT | CTLFLAG_RD, "I", "Jail cpuset ID"); +SYSCTL_JAIL_PARAM(_cpuset, id, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_CAPRW, "I", + "Jail cpuset ID"); #ifdef INET SYSCTL_JAIL_PARAM_SYS_NODE(ip4, CTLFLAG_RDTUN, "Jail IPv4 address virtualization"); -SYSCTL_JAIL_PARAM_STRUCT(_ip4, addr, CTLFLAG_RW, sizeof(struct in_addr), - "S,in_addr,a", "Jail IPv4 addresses"); -SYSCTL_JAIL_PARAM(_ip4, saddrsel, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_JAIL_PARAM_STRUCT(_ip4, addr, CTLFLAG_RW | CTLFLAG_CAPRW, + sizeof(struct in_addr), "S,in_addr,a", "Jail IPv4 addresses"); +SYSCTL_JAIL_PARAM(_ip4, saddrsel, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_CAPRW, "B", "Do (not) use IPv4 source address selection rather than the " "primary jail IPv4 address."); #endif #ifdef INET6 SYSCTL_JAIL_PARAM_SYS_NODE(ip6, CTLFLAG_RDTUN, "Jail IPv6 address virtualization"); -SYSCTL_JAIL_PARAM_STRUCT(_ip6, addr, CTLFLAG_RW, sizeof(struct in6_addr), - "S,in6_addr,a", "Jail IPv6 addresses"); -SYSCTL_JAIL_PARAM(_ip6, saddrsel, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_JAIL_PARAM_STRUCT(_ip6, addr, CTLFLAG_RW | CTLFLAG_CAPRW, + sizeof(struct in6_addr), "S,in6_addr,a", "Jail IPv6 addresses"); +SYSCTL_JAIL_PARAM(_ip6, saddrsel, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_CAPRW, "B", "Do (not) use IPv6 source address selection rather than the " "primary jail IPv6 address."); #endif SYSCTL_JAIL_PARAM_NODE(allow, "Jail permission flags"); -SYSCTL_JAIL_PARAM(_allow, set_hostname, CTLTYPE_INT | CTLFLAG_RW, - "B", "Jail may set hostname"); -SYSCTL_JAIL_PARAM(_allow, sysvipc, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_JAIL_PARAM(_allow, set_hostname, CTLTYPE_INT | CTLFLAG_RW | + CTLFLAG_CAPRW, "B", "Jail may set hostname"); +SYSCTL_JAIL_PARAM(_allow, sysvipc, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_CAPRW, "B", "Jail may use SYSV IPC"); -SYSCTL_JAIL_PARAM(_allow, raw_sockets, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_JAIL_PARAM(_allow, raw_sockets, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_CAPRW, "B", "Jail may create raw sockets"); -SYSCTL_JAIL_PARAM(_allow, chflags, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_JAIL_PARAM(_allow, chflags, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_CAPRW, "B", "Jail may alter system file flags"); -SYSCTL_JAIL_PARAM(_allow, quotas, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_JAIL_PARAM(_allow, quotas, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_CAPRW, "B", "Jail may set file quotas"); -SYSCTL_JAIL_PARAM(_allow, socket_af, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_JAIL_PARAM(_allow, socket_af, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_CAPRW, "B", "Jail may create sockets other than just UNIX/IPv4/IPv6/route"); -SYSCTL_JAIL_PARAM(_allow, mlock, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_JAIL_PARAM(_allow, mlock, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_CAPRW, "B", "Jail may lock (unlock) physical pages in memory"); -SYSCTL_JAIL_PARAM(_allow, reserved_ports, CTLTYPE_INT | CTLFLAG_RW, - "B", "Jail may bind sockets to reserved ports"); -SYSCTL_JAIL_PARAM(_allow, read_msgbuf, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_JAIL_PARAM(_allow, reserved_ports, CTLTYPE_INT | CTLFLAG_RW | + CTLFLAG_CAPRW, "B", "Jail may bind sockets to reserved ports"); +SYSCTL_JAIL_PARAM(_allow, read_msgbuf, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_CAPRW, "B", "Jail may read the kernel message buffer"); -SYSCTL_JAIL_PARAM(_allow, unprivileged_proc_debug, CTLTYPE_INT | CTLFLAG_RW, - "B", "Unprivileged processes may use process debugging facilities"); +SYSCTL_JAIL_PARAM(_allow, unprivileged_proc_debug, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_CAPRW, "B", + "Unprivileged processes may use process debugging facilities"); SYSCTL_JAIL_PARAM(_allow, unprivileged_parent_tampering, - CTLTYPE_INT | CTLFLAG_RW, "B", + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_CAPRW, "B", "Unprivileged parent jail processes may tamper with same-uid processes" " (signal/debug/cpuset)"); -SYSCTL_JAIL_PARAM(_allow, suser, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_JAIL_PARAM(_allow, suser, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_CAPRW, "B", "Processes in jail with uid 0 have privilege"); #ifdef VIMAGE -SYSCTL_JAIL_PARAM(_allow, nfsd, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_JAIL_PARAM(_allow, nfsd, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_CAPRW, "B", "Mountd/nfsd may run in the jail"); #endif -SYSCTL_JAIL_PARAM(_allow, extattr, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_JAIL_PARAM(_allow, extattr, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_CAPRW, "B", "Jail may set system-level filesystem extended attributes"); -SYSCTL_JAIL_PARAM(_allow, adjtime, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_JAIL_PARAM(_allow, adjtime, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_CAPRW, "B", "Jail may adjust system time"); -SYSCTL_JAIL_PARAM(_allow, settime, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_JAIL_PARAM(_allow, settime, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_CAPRW, "B", "Jail may set system time"); -SYSCTL_JAIL_PARAM(_allow, routing, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_JAIL_PARAM(_allow, routing, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_CAPRW, "B", "Jail may modify routing table"); SYSCTL_JAIL_PARAM_SUBNODE(allow, mount, "Jail mount/unmount permission flags"); -SYSCTL_JAIL_PARAM(_allow_mount, , CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_JAIL_PARAM(_allow_mount, , CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_CAPRW, "B", "Jail may mount/unmount jail-friendly file systems in general"); /* @@ -5177,6 +5312,35 @@ #endif } +/* + * When a jail returns a new descriptor after being accessed via + * JAIL_AT_DESC, change the rights attached to that descriptor to + * reflect that what was before an "at" jail (accessed with + * JAIL_AT_DESC, controlled with such capabilities as CAP_JAIL_GETAT) + * is now a "current" jail (accessed with JAIL_USE_DESC, controlled + * with such capabilities as CAP_JAIL_GET). + */ +static void +prison_shift_capabilities(struct filecaps *fcaps) +{ + if (cap_rights_contains(&fcaps->fc_rights, &cap_jail_getat_rights)) + cap_rights_set_one(&fcaps->fc_rights, CAP_JAIL_GET); + else + cap_rights_clear(&fcaps->fc_rights, CAP_JAIL_GET); + if (cap_rights_contains(&fcaps->fc_rights, &cap_jail_setat_rights)) + cap_rights_set_one(&fcaps->fc_rights, CAP_JAIL_SET); + else + cap_rights_clear(&fcaps->fc_rights, CAP_JAIL_SET); + if (cap_rights_contains(&fcaps->fc_rights, &cap_jail_attachat_rights)) + cap_rights_set_one(&fcaps->fc_rights, CAP_JAIL_ATTACH); + else + cap_rights_clear(&fcaps->fc_rights, CAP_JAIL_ATTACH); + if (cap_rights_contains(&fcaps->fc_rights, &cap_jail_removeat_rights)) + cap_rights_set_one(&fcaps->fc_rights, CAP_JAIL_REMOVE); + else + cap_rights_clear(&fcaps->fc_rights, CAP_JAIL_REMOVE); +} + #ifdef RACCT void prison_racct_foreach(void (*callback)(struct racct *racct, diff --git a/sys/kern/kern_jaildesc.c b/sys/kern/kern_jaildesc.c --- a/sys/kern/kern_jaildesc.c +++ b/sys/kern/kern_jaildesc.c @@ -72,20 +72,20 @@ }; /* - * Given a jail descriptor number, return its prison and/or its - * credential. They are returned held, and will need to be released - * by the caller. + * Given a jail descriptor number, return its prison, its credential, + * and/or its capabilities. They are returned held, and will need to + * be released by the caller. */ int -jaildesc_find(struct thread *td, int fd, struct prison **prp, - struct ucred **ucredp) +jaildesc_find(struct thread *td, int fd, const cap_rights_t *rights, + struct prison **prp, struct ucred **ucredp, struct filecaps *fcaps) { struct file *fp; struct jaildesc *jd; struct prison *pr; int error; - error = fget(td, fd, &cap_no_rights, &fp); + error = fget_cap(td, fd, rights, NULL, &fp, fcaps); if (error != 0) return (error); if (fp->f_type != DTYPE_JAILDESC) { @@ -109,6 +109,8 @@ *ucredp = crhold(fp->f_cred); out: fdrop(fp, td); + if (error != 0 && fcaps != NULL) + filecaps_free(fcaps); return (error); } @@ -118,7 +120,8 @@ * number. */ int -jaildesc_alloc(struct thread *td, struct file **fpp, int *fdp, int owning) +jaildesc_alloc(struct thread *td, struct file **fpp, int *fdp, + struct filecaps *fcaps, int owning) { struct file *fp; struct jaildesc *jd; @@ -130,7 +133,7 @@ return (error); } jd = malloc(sizeof(*jd), M_JAILDESC, M_WAITOK | M_ZERO); - error = falloc_caps(td, &fp, fdp, 0, NULL); + error = falloc_caps(td, &fp, fdp, 0, fcaps); if (error != 0) { free(jd, M_JAILDESC); return (error); diff --git a/sys/kern/subr_capability.c b/sys/kern/subr_capability.c --- a/sys/kern/subr_capability.c +++ b/sys/kern/subr_capability.c @@ -79,6 +79,20 @@ const cap_rights_t cap_inotify_rm_rights = CAP_RIGHTS_INITIALIZER(CAP_INOTIFY_RM); const cap_rights_t cap_ioctl_rights = CAP_RIGHTS_INITIALIZER(CAP_IOCTL); +const cap_rights_t cap_jail_attach_rights = + CAP_RIGHTS_INITIALIZER(CAP_JAIL_ATTACH); +const cap_rights_t cap_jail_attachat_rights = + CAP_RIGHTS_INITIALIZER(CAP_JAIL_ATTACHAT); +const cap_rights_t cap_jail_get_rights = CAP_RIGHTS_INITIALIZER(CAP_JAIL_GET); +const cap_rights_t cap_jail_getat_rights = + CAP_RIGHTS_INITIALIZER(CAP_JAIL_GETAT); +const cap_rights_t cap_jail_remove_rights = + CAP_RIGHTS_INITIALIZER(CAP_JAIL_REMOVE); +const cap_rights_t cap_jail_removeat_rights = + CAP_RIGHTS_INITIALIZER(CAP_JAIL_REMOVEAT); +const cap_rights_t cap_jail_set_rights = CAP_RIGHTS_INITIALIZER(CAP_JAIL_SET); +const cap_rights_t cap_jail_setat_rights = + CAP_RIGHTS_INITIALIZER(CAP_JAIL_SETAT); const cap_rights_t cap_listen_rights = CAP_RIGHTS_INITIALIZER(CAP_LISTEN); const cap_rights_t cap_linkat_source_rights = CAP_RIGHTS_INITIALIZER(CAP_LINKAT_SOURCE); diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master --- a/sys/kern/syscalls.master +++ b/sys/kern/syscalls.master @@ -2772,14 +2772,14 @@ ); } 505 AUE_NULL OBSOL kgssapi -506 AUE_JAIL_GET STD { +506 AUE_JAIL_GET STD|CAPENABLED { int jail_get( _In_reads_(iovcnt) _Contains_long_ptr_ struct iovec *iovp, unsigned int iovcnt, int flags ); } -507 AUE_JAIL_SET STD { +507 AUE_JAIL_SET STD|CAPENABLED { int jail_set( _In_reads_(iovcnt) _Contains_long_ptr_ struct iovec *iovp, unsigned int iovcnt, @@ -3383,12 +3383,12 @@ _In_reads_(gidsetsize) const gid_t *gidset ); } -597 AUE_JAIL_ATTACH STD { +597 AUE_JAIL_ATTACH STD|CAPENABLED { int jail_attach_jd( int fd ); } -598 AUE_JAIL_REMOVE STD { +598 AUE_JAIL_REMOVE STD|CAPENABLED { int jail_remove_jd( int fd ); diff --git a/sys/sys/caprights.h b/sys/sys/caprights.h --- a/sys/sys/caprights.h +++ b/sys/sys/caprights.h @@ -82,6 +82,14 @@ extern const cap_rights_t cap_inotify_add_rights; extern const cap_rights_t cap_inotify_rm_rights; extern const cap_rights_t cap_ioctl_rights; +extern const cap_rights_t cap_jail_attach_rights; +extern const cap_rights_t cap_jail_attachat_rights; +extern const cap_rights_t cap_jail_get_rights; +extern const cap_rights_t cap_jail_getat_rights; +extern const cap_rights_t cap_jail_remove_rights; +extern const cap_rights_t cap_jail_removeat_rights; +extern const cap_rights_t cap_jail_set_rights; +extern const cap_rights_t cap_jail_setat_rights; extern const cap_rights_t cap_linkat_source_rights; extern const cap_rights_t cap_linkat_target_rights; extern const cap_rights_t cap_listen_rights; diff --git a/sys/sys/capsicum.h b/sys/sys/capsicum.h --- a/sys/sys/capsicum.h +++ b/sys/sys/capsicum.h @@ -283,11 +283,37 @@ #define CAP_INOTIFY_ADD CAPRIGHT(1, 0x0000000000200000ULL) #define CAP_INOTIFY_RM CAPRIGHT(1, 0x0000000000400000ULL) +/* Jail operations. */ +/* Allows for jail_get(JAIL_USE_DESC). */ +#define CAP_JAIL_GET CAPRIGHT(1, 0x0000000000800000ULL) +/* Allows for jail_set(JAIL_USE_DESC | JAIL_UPDATE). */ +#define CAP_JAIL_SET CAPRIGHT(1, 0x0000000001000000ULL) +/* Allows for jail_attach_jd(2) and jail_set(JAIL_SET_ATTACH). */ +#define CAP_JAIL_ATTACH CAPRIGHT(1, 0x0000000002000000ULL) +/* Allows for jail_remove_jd(2) and jail_get/set(JAIL_OWN_DESC). */ +#define CAP_JAIL_REMOVE CAPRIGHT(1, 0x0000000004000000ULL) +/* Allows for jail_get(JAIL_AT_DESC). */ +#define CAP_JAIL_GETAT CAPRIGHT(1, 0x0000000008000000ULL) +/* Allows for jail_set(JAIL_AT_DESC | JAIL_UPDATE). */ +#define CAP_JAIL_SETAT CAPRIGHT(1, 0x0000000010000000ULL) +/* Allows for jail_set(JAIL_AT_DESC | JAIL_CREATE). */ +#define CAP_JAIL_CREATEAT CAPRIGHT(1, 0x0000000020000000ULL) +/* + * Allows for jail_attach_jd(2) following + * jail_get(JAIL_AT_DESC | JAIL_GET_DESC). + */ +#define CAP_JAIL_ATTACHAT CAPRIGHT(1, 0x0000000040000000ULL) +/* + * Allows for jail_remove_jd(2) following + * jail_get(JAIL_AT_DESC | JAIL_GET_DESC). + */ +#define CAP_JAIL_REMOVEAT CAPRIGHT(1, 0x0000000080000000ULL) + /* All used bits for index 1. */ -#define CAP_ALL1 CAPRIGHT(1, 0x00000000007FFFFFULL) +#define CAP_ALL1 CAPRIGHT(1, 0x00000000FFFFFFFFULL) /* Available bits for index 1. */ -#define CAP_UNUSED1_22 CAPRIGHT(1, 0x0000000000800000ULL) +#define CAP_UNUSED1_33 CAPRIGHT(1, 0x0000000100000000ULL) /* ... */ #define CAP_UNUSED1_57 CAPRIGHT(1, 0x0100000000000000ULL) diff --git a/sys/sys/filedesc.h b/sys/sys/filedesc.h --- a/sys/sys/filedesc.h +++ b/sys/sys/filedesc.h @@ -241,6 +241,7 @@ bool filecaps_copy(const struct filecaps *src, struct filecaps *dst, bool locked); void filecaps_move(struct filecaps *src, struct filecaps *dst); +void filecaps_fill(struct filecaps *fcaps); void filecaps_free(struct filecaps *fcaps); int closef(struct file *fp, struct thread *td); diff --git a/sys/sys/jail.h b/sys/sys/jail.h --- a/sys/sys/jail.h +++ b/sys/sys/jail.h @@ -399,7 +399,14 @@ extern struct sx allprison_lock; /* - * Sysctls to describe jail parameters. + * Sysctls to describe jail parameters. In this context, + * CTLFLAG_RDTUN indicates jail parameters that can be set at jail + * creation time but cannot be changed in an existing jail. + * CTLFLAG_CAPRW indicates parameters that can be accessed (but not + * necessarily set) in capability mode; this is necessary because + * libjail "writes" to string parameter sysctls to query their size. + * It is up to kern_jail_set and modules' PR_METHOD_CHECK to actually + * restrict writes as appropriate. */ SYSCTL_DECL(_security_jail); SYSCTL_DECL(_security_jail_param); diff --git a/sys/sys/jaildesc.h b/sys/sys/jaildesc.h --- a/sys/sys/jaildesc.h +++ b/sys/sys/jaildesc.h @@ -75,9 +75,12 @@ #define JDF_REMOVED 0x00000002 /* jail was removed */ #define JDF_OWNING 0x00000004 /* closing descriptor removes jail */ -int jaildesc_find(struct thread *td, int fd, struct prison **prp, - struct ucred **ucredp); -int jaildesc_alloc(struct thread *td, struct file **fpp, int *fdp, int owning); +struct filecaps; + +int jaildesc_find(struct thread *td, int fd, const cap_rights_t *rights, + struct prison **prp, struct ucred **ucredp, struct filecaps *fcaps); +int jaildesc_alloc(struct thread *td, struct file **fpp, int *fdp, + struct filecaps *fcaps, int owning); void jaildesc_set_prison(struct file *jd, struct prison *pr); void jaildesc_prison_cleanup(struct prison *pr); void jaildesc_knote(struct prison *pr, long hint);