diff --git a/lib/libc/sys/Symbol.map b/lib/libc/sys/Symbol.map --- a/lib/libc/sys/Symbol.map +++ b/lib/libc/sys/Symbol.map @@ -425,6 +425,8 @@ }; FBSD_1.8 { + jail_attach_jd; + jail_remove_jd; kcmp; }; diff --git a/sys/bsm/audit_kevents.h b/sys/bsm/audit_kevents.h --- a/sys/bsm/audit_kevents.h +++ b/sys/bsm/audit_kevents.h @@ -662,6 +662,8 @@ #define AUE_AIO_READV 43268 /* FreeBSD-specific. */ #define AUE_FSPACECTL 43269 /* FreeBSD-specific. */ #define AUE_TIMERFD 43270 /* FreeBSD/Linux. */ +#define AUE_JAIL_ATTACH_JD 43271 /* FreeBSD-specific. */ +#define AUE_JAIL_REMOVE_JD 43272 /* FreeBSD-specific. */ /* * Darwin BSM uses a number of AUE_O_* definitions, which are aliased to the diff --git a/sys/compat/freebsd32/freebsd32_syscall.h b/sys/compat/freebsd32/freebsd32_syscall.h --- a/sys/compat/freebsd32/freebsd32_syscall.h +++ b/sys/compat/freebsd32/freebsd32_syscall.h @@ -507,4 +507,6 @@ #define FREEBSD32_SYS_freebsd32_timerfd_gettime 586 #define FREEBSD32_SYS_freebsd32_timerfd_settime 587 #define FREEBSD32_SYS_kcmp 588 -#define FREEBSD32_SYS_MAXSYSCALL 589 +#define FREEBSD32_SYS_jail_attach_jd 589 +#define FREEBSD32_SYS_jail_remove_jd 590 +#define FREEBSD32_SYS_MAXSYSCALL 591 diff --git a/sys/compat/freebsd32/freebsd32_syscalls.c b/sys/compat/freebsd32/freebsd32_syscalls.c --- a/sys/compat/freebsd32/freebsd32_syscalls.c +++ b/sys/compat/freebsd32/freebsd32_syscalls.c @@ -594,4 +594,6 @@ "freebsd32_timerfd_gettime", /* 586 = freebsd32_timerfd_gettime */ "freebsd32_timerfd_settime", /* 587 = freebsd32_timerfd_settime */ "kcmp", /* 588 = kcmp */ + "jail_attach_jd", /* 589 = jail_attach_jd */ + "jail_remove_jd", /* 590 = jail_remove_jd */ }; diff --git a/sys/compat/freebsd32/freebsd32_sysent.c b/sys/compat/freebsd32/freebsd32_sysent.c --- a/sys/compat/freebsd32/freebsd32_sysent.c +++ b/sys/compat/freebsd32/freebsd32_sysent.c @@ -650,4 +650,6 @@ { .sy_narg = AS(freebsd32_timerfd_gettime_args), .sy_call = (sy_call_t *)freebsd32_timerfd_gettime, .sy_auevent = AUE_TIMERFD, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 586 = freebsd32_timerfd_gettime */ { .sy_narg = AS(freebsd32_timerfd_settime_args), .sy_call = (sy_call_t *)freebsd32_timerfd_settime, .sy_auevent = AUE_TIMERFD, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 587 = freebsd32_timerfd_settime */ { .sy_narg = AS(kcmp_args), .sy_call = (sy_call_t *)sys_kcmp, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 588 = kcmp */ + { .sy_narg = AS(jail_attach_jd_args), .sy_call = (sy_call_t *)sys_jail_attach_jd, .sy_auevent = AUE_JAIL_ATTACH_JD, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 589 = jail_attach_jd */ + { .sy_narg = AS(jail_remove_jd_args), .sy_call = (sy_call_t *)sys_jail_remove_jd, .sy_auevent = AUE_JAIL_REMOVE_JD, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 590 = jail_remove_jd */ }; diff --git a/sys/compat/freebsd32/freebsd32_systrace_args.c b/sys/compat/freebsd32/freebsd32_systrace_args.c --- a/sys/compat/freebsd32/freebsd32_systrace_args.c +++ b/sys/compat/freebsd32/freebsd32_systrace_args.c @@ -3368,6 +3368,20 @@ *n_args = 5; break; } + /* jail_attach_jd */ + case 589: { + struct jail_attach_jd_args *p = params; + iarg[a++] = p->fd; /* int */ + *n_args = 1; + break; + } + /* jail_remove_jd */ + case 590: { + struct jail_remove_jd_args *p = params; + iarg[a++] = p->fd; /* int */ + *n_args = 1; + break; + } default: *n_args = 0; break; @@ -9100,6 +9114,26 @@ break; }; break; + /* jail_attach_jd */ + case 589: + switch (ndx) { + case 0: + p = "int"; + break; + default: + break; + }; + break; + /* jail_remove_jd */ + case 590: + switch (ndx) { + case 0: + p = "int"; + break; + default: + break; + }; + break; default: break; }; @@ -10983,6 +11017,16 @@ if (ndx == 0 || ndx == 1) p = "int"; break; + /* jail_attach_jd */ + case 589: + if (ndx == 0 || ndx == 1) + p = "int"; + break; + /* jail_remove_jd */ + case 590: + if (ndx == 0 || ndx == 1) + p = "int"; + break; default: break; }; diff --git a/sys/conf/files b/sys/conf/files --- a/sys/conf/files +++ b/sys/conf/files @@ -3766,6 +3766,7 @@ kern/kern_idle.c standard kern/kern_intr.c standard kern/kern_jail.c standard +kern/kern_jaildesc.c standard kern/kern_kcov.c optional kcov \ compile-with "${NORMAL_C:N-fsanitize*} ${NORMAL_C:M-fsanitize=kernel-memory}" kern/kern_khelp.c standard diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c --- a/sys/kern/init_sysent.c +++ b/sys/kern/init_sysent.c @@ -649,4 +649,6 @@ { .sy_narg = AS(timerfd_gettime_args), .sy_call = (sy_call_t *)sys_timerfd_gettime, .sy_auevent = AUE_TIMERFD, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 586 = timerfd_gettime */ { .sy_narg = AS(timerfd_settime_args), .sy_call = (sy_call_t *)sys_timerfd_settime, .sy_auevent = AUE_TIMERFD, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 587 = timerfd_settime */ { .sy_narg = AS(kcmp_args), .sy_call = (sy_call_t *)sys_kcmp, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 588 = kcmp */ + { .sy_narg = AS(jail_attach_jd_args), .sy_call = (sy_call_t *)sys_jail_attach_jd, .sy_auevent = AUE_JAIL_ATTACH_JD, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 589 = jail_attach_jd */ + { .sy_narg = AS(jail_remove_jd_args), .sy_call = (sy_call_t *)sys_jail_remove_jd, .sy_auevent = AUE_JAIL_REMOVE_JD, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 590 = jail_remove_jd */ }; diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -5037,6 +5037,8 @@ return ("eventfd"); case DTYPE_TIMERFD: return ("timerfd"); + case DTYPE_JAILDESC: + return ("jail"); default: return ("unkn"); } diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c --- a/sys/kern/kern_jail.c +++ b/sys/kern/kern_jail.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -48,6 +49,7 @@ #include #include #include +#include #include #include #include @@ -970,6 +972,8 @@ int kern_jail_set(struct thread *td, struct uio *optuio, int flags) { + struct file *jfp; + struct jaildesc *desc; struct nameidata nd; #ifdef INET struct prison_ip *ip4; @@ -980,6 +984,7 @@ struct vfsopt *opt; struct vfsoptlist *opts; struct prison *pr, *deadpr, *dinspr, *inspr, *mypr, *ppr, *tpr; + struct ucred *jdcred; struct vnode *root; char *domain, *errmsg, *host, *name, *namelc, *p, *path, *uuid; char *g_path, *osrelstr; @@ -993,7 +998,7 @@ int created, cuflags, descend, drflags, enforce; int error, errmsg_len, errmsg_pos; int gotchildmax, gotenforce, gothid, gotrsnum, gotslevel; - int deadid, jid, jsys, len, level; + int deadid, jfd_in, jfd_out, jid, jsys, len, level; int childmax, osreldt, rsnum, slevel; #ifdef INET int ip4s; @@ -1008,11 +1013,6 @@ unsigned tallow; char numbuf[12]; - error = priv_check(td, PRIV_JAIL_SET); - if (!error && (flags & JAIL_ATTACH)) - error = priv_check(td, PRIV_JAIL_ATTACH); - if (error) - return (error); mypr = td->td_ucred->cr_prison; if ((flags & JAIL_CREATE) && mypr->pr_childmax == 0) return (EPERM); @@ -1039,6 +1039,9 @@ ip6 = NULL; #endif g_path = NULL; + jfp = NULL; + desc = NULL; + jfd_out = -1; cuflags = flags & (JAIL_CREATE | JAIL_UPDATE); if (!cuflags) { @@ -1047,6 +1050,30 @@ goto done_errmsg; } + error = vfs_copyopt(opts, "desc", &jfd_in, sizeof(jfd_in)); + if (error == ENOENT) + jfd_in = -1; + else if (error != 0) + goto done_free; + else if (jfd_in < 0) { + /* Allocate a jail descriptor to return later. */ + error = jaildesc_alloc(td, &jfp, &jfd_out, &desc); + if (error) + goto done_free; + } + + /* + * Delay the permission check if there's a jail descriptor, + * until we get the descriptor's credentials. + */ + if (jfd_in < 0) { + error = priv_check(td, PRIV_JAIL_SET); + if (!error && (flags & JAIL_ATTACH)) + error = priv_check(td, PRIV_JAIL_ATTACH); + if (error) + goto done_free; + } + error = vfs_copyopt(opts, "jid", &jid, sizeof(jid)); if (error == ENOENT) jid = 0; @@ -1421,7 +1448,41 @@ error = EAGAIN; goto done_deref; } - if (jid != 0) { + if (jfd_in >= 0) { + /* Get the jail from its descriptor. */ + error = jaildesc_find(td, jfd_in, NULL, &pr, &jdcred); + if (error) { + vfs_opterror(opts, "not a jail descriptor"); + goto done_deref; + } + drflags |= PD_LOCKED; + /* Check permissions using the descriptor's credentials. */ + error = priv_check_cred(jdcred, PRIV_JAIL_SET); + if (!error && (flags & JAIL_ATTACH)) + error = priv_check_cred(jdcred, PRIV_JAIL_ATTACH); + crfree(jdcred); + if (error) + goto done_deref; + if (cuflags == JAIL_CREATE) { + error = EEXIST; + vfs_opterror(opts, "jail %d already exists", + pr->pr_id); + goto done_deref; + } + if (!prison_isalive(pr)) { + /* While a jid can be resurrected, the prison + * itself cannot. + */ + error = ENOENT; + vfs_opterror(opts, "jail %d is dying", jid); + goto done_deref; + } + if (jid != 0 && jid != pr->pr_id) { + error = EINVAL; + vfs_opterror(opts, "cannot change jid"); + goto done_deref; + } + } else if (jid != 0) { if (jid < 0) { error = EINVAL; vfs_opterror(opts, "negative jid"); @@ -1555,7 +1616,7 @@ } } } - /* Update: must provide a jid or name. */ + /* Update: must provide a desc, jid, or name. */ else if (cuflags == JAIL_UPDATE && pr == NULL) { error = ENOENT; vfs_opterror(opts, "update specified no jail"); @@ -1835,6 +1896,21 @@ goto done_deref; } + /* + * Set the jail descriptor if one was requested. This is the + * only parameter that is returned to the caller (except the + * error message). + */ + if (jfd_out >= 0) { + error = vfs_setopt(opts, "desc", &jfd_out, sizeof(jfd_out)); + if (error != 0) + goto done_deref; + jaildesc_set_prison(jfp, desc, pr); + desc = NULL; + fdrop(jfp, td); + jfp = NULL; + } + /* * Let modules check their parameters. This requires unlocking and * then re-locking the prison, but this is still a valid state as long @@ -2158,6 +2234,13 @@ #ifdef INET6 prison_ip_free(ip6); #endif + /* Clean up other resources. */ + if (jfp != NULL) + fdrop(jfp, td); + if (desc != NULL) + jaildesc_free(desc); + if (error && jfd_out >= 0) + (void)kern_close(td, jfd_out); if (g_path != NULL) free(g_path, M_TEMP); vfs_freeopts(opts); @@ -2305,12 +2388,15 @@ kern_jail_get(struct thread *td, struct uio *optuio, int flags) { struct bool_flags *bf; + struct file *jfp; + struct jaildesc *desc; struct jailsys_flags *jsf; struct prison *pr, *mypr; struct vfsopt *opt; struct vfsoptlist *opts; char *errmsg, *name; int drflags, error, errmsg_len, errmsg_pos, i, jid, len, pos; + int jfd_in, jfd_out; unsigned f; if (flags & ~JAIL_GET_MASK) @@ -2323,12 +2409,41 @@ errmsg_pos = vfs_getopt_pos(opts, "errmsg"); mypr = td->td_ucred->cr_prison; pr = NULL; + jfp = NULL; + desc = NULL; + jfd_out = -1; /* - * Find the prison specified by one of: lastjid, jid, name. + * Find the prison specified by one of: desc, lastjid, jid, name. */ sx_slock(&allprison_lock); drflags = PD_LIST_SLOCKED; + error = vfs_copyopt(opts, "desc", &jfd_in, sizeof(jfd_in)); + if (error == 0) { + if (jfd_in >= 0) { + /* Get the jail from its descriptor. */ + error = jaildesc_find(td, jfd_in, NULL, &pr, NULL); + if (error) { + vfs_opterror(opts, "not a jail descriptor"); + goto done; + } + drflags |= PD_LOCKED; + if (!(prison_isalive(pr) || (flags & JAIL_DYING))) { + error = ENOENT; + vfs_opterror(opts, "jail %d is dying", + pr->pr_id); + goto done; + } + goto found_prison; + } else { + /* Allocate a jail descriptor to return later. */ + error = jaildesc_alloc(td, &jfp, &jfd_out, &desc); + if (error) + goto done; + } + } else if (error != ENOENT) + goto done; + error = vfs_copyopt(opts, "lastjid", &jid, sizeof(jid)); if (error == 0) { TAILQ_FOREACH(pr, &allprison, pr_list) { @@ -2400,6 +2515,15 @@ prison_hold(pr); drflags |= PD_DEREF; td->td_retval[0] = pr->pr_id; + if (jfd_out >= 0) { + error = vfs_setopt(opts, "desc", &jfd_out, sizeof(jfd_out)); + if (error != 0) + goto done; + jaildesc_set_prison(jfp, desc, pr); + desc = NULL; + fdrop(jfp, td); + jfp = NULL; + } error = vfs_setopt(opts, "jid", &pr->pr_id, sizeof(pr->pr_id)); if (error != 0 && error != ENOENT) goto done; @@ -2565,11 +2689,20 @@ } done: + /* Clean up jail descriptor bits. */ + if (jfp != NULL) + fdrop(jfp, td); + if (desc != NULL) + jaildesc_free(desc); + if (error && jfd_out >= 0) + (void)kern_close(td, jfd_out); /* Release any temporary prison holds and/or locks. */ if (pr != NULL) prison_deref(pr, drflags); else if (drflags & PD_LIST_SLOCKED) sx_sunlock(&allprison_lock); + else if (drflags & PD_LIST_XLOCKED) + sx_xunlock(&allprison_lock); if (error && errmsg_pos >= 0) { /* Write the error message back to userspace. */ vfs_getopt(opts, "errmsg", (void **)&errmsg, &errmsg_len); @@ -2620,6 +2753,41 @@ return (0); } +/* + * struct jail_remove_jd_args { + * int fd; + * }; + */ +int +sys_jail_remove_jd(struct thread *td, struct jail_remove_jd_args *uap) +{ + struct prison *pr; + struct ucred *jdcred; + int error; + + sx_xlock(&allprison_lock); + error = jaildesc_find(td, uap->fd, NULL, &pr, &jdcred); + if (error) + goto fail_allprison; + error = priv_check_cred(jdcred, PRIV_JAIL_REMOVE); + crfree(jdcred); + if (error) + goto fail_prison; + if (!prison_isalive(pr)) { + /* Silently ignore already-dying prisons. */ + goto fail_prison; + } + + prison_deref(pr, PD_KILL | PD_LOCKED | PD_LIST_XLOCKED); + return (0); + + fail_prison: + mtx_unlock(&pr->pr_mtx); + fail_allprison: + sx_xunlock(&allprison_lock); + return (error); +} + /* * struct jail_attach_args { * int jid; @@ -2652,6 +2820,42 @@ return (do_jail_attach(td, pr, PD_LOCKED | PD_LIST_SLOCKED)); } +/* + * struct jail_attach_fd_args { + * int fd; + * }; + */ +int +sys_jail_attach_jd(struct thread *td, struct jail_attach_jd_args *uap) +{ + struct prison *pr; + struct ucred *jdcred; + int error; + + sx_slock(&allprison_lock); + error = jaildesc_find(td, uap->fd, NULL, &pr, &jdcred); + if (error) + goto fail_allprison; + error = priv_check_cred(jdcred, PRIV_JAIL_ATTACH); + crfree(jdcred); + if (error) + goto fail_prison; + + /* Do not allow a process to attach to a prison that is not alive. */ + if (!prison_isalive(pr)) { + error = EINVAL; + goto fail_prison; + } + + return (do_jail_attach(td, pr, PD_LOCKED | PD_LIST_SLOCKED)); + + fail_prison: + mtx_unlock(&pr->pr_mtx); + fail_allprison: + sx_xunlock(&allprison_lock); + return (error); +} + static int do_jail_attach(struct thread *td, struct prison *pr, int drflags) { @@ -4542,6 +4746,7 @@ * jail creation time but cannot be changed in an existing jail. */ SYSCTL_JAIL_PARAM(, jid, CTLTYPE_INT | CTLFLAG_RDTUN, "I", "Jail ID"); +SYSCTL_JAIL_PARAM(, desc, CTLTYPE_INT | CTLFLAG_RW, "I", "Jail descriptor"); SYSCTL_JAIL_PARAM(, parent, CTLTYPE_INT | CTLFLAG_RD, "I", "Jail parent ID"); SYSCTL_JAIL_PARAM_STRING(, name, CTLFLAG_RW, MAXHOSTNAMELEN, "Jail name"); SYSCTL_JAIL_PARAM_STRING(, path, CTLFLAG_RDTUN, MAXPATHLEN, "Jail root path"); diff --git a/sys/kern/kern_jaildesc.c b/sys/kern/kern_jaildesc.c new file mode 100644 --- /dev/null +++ b/sys/kern/kern_jaildesc.c @@ -0,0 +1,177 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 James Gritton. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +MALLOC_DEFINE(M_JAILDESC, "jaildesc", "jail descriptors"); + +static fo_stat_t jaildesc_stat; +static fo_close_t jaildesc_close; +static fo_fill_kinfo_t jaildesc_fill_kinfo; + +static struct fileops jaildesc_ops = { + .fo_read = invfo_rdwr, + .fo_write = invfo_rdwr, + .fo_truncate = invfo_truncate, + .fo_ioctl = invfo_ioctl, + .fo_poll = invfo_poll, + .fo_kqfilter = invfo_kqfilter, + .fo_stat = jaildesc_stat, + .fo_close = jaildesc_close, + .fo_chmod = invfo_chmod, + .fo_chown = invfo_chown, + .fo_sendfile = invfo_sendfile, + .fo_fill_kinfo = jaildesc_fill_kinfo, + .fo_flags = DFLAG_PASSABLE, +}; + +/* + * Given a jail descriptor, return its prison and optionally its credential. + * The prison will be returned locked, and the credential returned held. + */ +int +jaildesc_find(struct thread *td, int fd, cap_rights_t *rights, + struct prison **prp, struct ucred **ucredp) +{ + struct file *fp; + struct prison *pr; + int error; + + error = fget(td, fd, rights, &fp); + if (error) + return (error); + if (fp->f_type != DTYPE_JAILDESC) { + error = EBADF; + goto out; + } + pr = ((struct jaildesc *)fp->f_data)->jd_prison; + MPASS(pr != NULL); + KASSERT(prison_isvalid(pr), ("jaildesc has invalid prison %p", pr)); + *prp = pr; + prison_lock(pr); + if (ucredp) + *ucredp = crhold(fp->f_cred); +out: + fdrop(fp, td); + return (error); +} + +/* + * Allocate a new jail decriptor, not yet associated with a prison. + */ +int +jaildesc_alloc(struct thread *td, struct file **fpp, int *fdp, + struct jaildesc **jdp) +{ + struct file *fp; + struct jaildesc *jd; + int error; + + jd = malloc(sizeof(*jd), M_JAILDESC, M_WAITOK | M_ZERO); + error = falloc_caps(td, &fp, fdp, 0, NULL); + if (error) { + free(jd, M_JAILDESC); + return (error); + } + *fpp = fp; + *jdp = jd; + return (0); +} + +/* + * Assocate a jail descriptor with its prison. + */ +void +jaildesc_set_prison(struct file *fp, struct jaildesc *jd, struct prison *pr) +{ + mtx_assert(&pr->pr_mtx, MA_OWNED); + jd->jd_prison = pr; + LIST_INSERT_HEAD(&pr->pr_descs, jd, jd_list); + prison_hold(pr); + /* + * Now the file is ready to be a jail descriptor. + */ + finit(fp, FREAD | FWRITE, DTYPE_JAILDESC, jd, &jaildesc_ops); +} + + +/* + * Detach the jail descriptor from its associated prison and free it. + */ +void +jaildesc_free(struct jaildesc *jd) +{ + struct prison *pr; + int locked; + + pr = jd->jd_prison; + if (pr != NULL) + { + locked = mtx_owned(&pr->pr_mtx); + if (!locked) + prison_lock(pr); + LIST_REMOVE(jd, jd_list); + prison_free(pr); + if (!locked) + prison_unlock(pr); + } + free(jd, M_JAILDESC); +} + +static int +jaildesc_close(struct file *fp, struct thread *td) +{ + struct jaildesc *jd; + + jd = fp->f_data; + finit(fp, 0, DTYPE_NONE, NULL, &badfileops); + if (jd != NULL) + jaildesc_free(jd); + return (0); +} + +static int +jaildesc_stat(struct file *fp, struct stat *sb, struct ucred *active_cred) +{ + return (EINVAL); +} + +static int +jaildesc_fill_kinfo(struct file *fp, struct kinfo_file *kif, + struct filedesc *fdp) +{ + return (EINVAL); +} diff --git a/sys/kern/syscalls.c b/sys/kern/syscalls.c --- a/sys/kern/syscalls.c +++ b/sys/kern/syscalls.c @@ -594,4 +594,6 @@ "timerfd_gettime", /* 586 = timerfd_gettime */ "timerfd_settime", /* 587 = timerfd_settime */ "kcmp", /* 588 = kcmp */ + "jail_attach_jd", /* 589 = jail_attach_jd */ + "jail_remove_jd", /* 590 = jail_remove_jd */ }; diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master --- a/sys/kern/syscalls.master +++ b/sys/kern/syscalls.master @@ -3333,5 +3333,15 @@ uintptr_t idx2 ); } +589 AUE_JAIL_ATTACH_JD STD|CAPENABLED { + int jail_attach_jd( + int fd + ); + } +590 AUE_JAIL_REMOVE_JD STD|CAPENABLED { + int jail_remove_jd( + int fd + ); + } ; vim: syntax=off diff --git a/sys/kern/systrace_args.c b/sys/kern/systrace_args.c --- a/sys/kern/systrace_args.c +++ b/sys/kern/systrace_args.c @@ -3455,6 +3455,20 @@ *n_args = 5; break; } + /* jail_attach_jd */ + case 589: { + struct jail_attach_jd_args *p = params; + iarg[a++] = p->fd; /* int */ + *n_args = 1; + break; + } + /* jail_remove_jd */ + case 590: { + struct jail_remove_jd_args *p = params; + iarg[a++] = p->fd; /* int */ + *n_args = 1; + break; + } default: *n_args = 0; break; @@ -9245,6 +9259,26 @@ break; }; break; + /* jail_attach_jd */ + case 589: + switch (ndx) { + case 0: + p = "int"; + break; + default: + break; + }; + break; + /* jail_remove_jd */ + case 590: + switch (ndx) { + case 0: + p = "int"; + break; + default: + break; + }; + break; default: break; }; @@ -11218,6 +11252,16 @@ if (ndx == 0 || ndx == 1) p = "int"; break; + /* jail_attach_jd */ + case 589: + if (ndx == 0 || ndx == 1) + p = "int"; + break; + /* jail_remove_jd */ + case 590: + if (ndx == 0 || ndx == 1) + p = "int"; + break; default: break; }; diff --git a/sys/sys/event.h b/sys/sys/event.h --- a/sys/sys/event.h +++ b/sys/sys/event.h @@ -45,7 +45,8 @@ #define EVFILT_USER (-11) /* User events */ #define EVFILT_SENDFILE (-12) /* attached to sendfile requests */ #define EVFILT_EMPTY (-13) /* empty send socket buf */ -#define EVFILT_SYSCOUNT 13 +#define EVFILT_JAIL (-14) /* attached to jail descriptors */ +#define EVFILT_SYSCOUNT 14 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L #define EV_SET(kevp_, a, b, c, d, e, f) do { \ diff --git a/sys/sys/file.h b/sys/sys/file.h --- a/sys/sys/file.h +++ b/sys/sys/file.h @@ -68,6 +68,7 @@ #define DTYPE_PROCDESC 12 /* process descriptor */ #define DTYPE_EVENTFD 13 /* eventfd */ #define DTYPE_TIMERFD 14 /* timerfd */ +#define DTYPE_JAILDESC 15 /* jail descriptor */ #ifdef _KERNEL diff --git a/sys/sys/jail.h b/sys/sys/jail.h --- a/sys/sys/jail.h +++ b/sys/sys/jail.h @@ -115,7 +115,9 @@ int jail_set(struct iovec *, unsigned int, int); int jail_get(struct iovec *, unsigned int, int); int jail_attach(int); +int jail_attach_jd(int); int jail_remove(int); +int jail_remove_jd(int); __END_DECLS #else /* _KERNEL */ @@ -141,6 +143,7 @@ #define DEFAULT_HOSTUUID "00000000-0000-0000-0000-000000000000" #define OSRELEASELEN 32 +struct jaildesc; struct racct; struct prison_racct; @@ -186,7 +189,8 @@ struct vnode *pr_root; /* (c) vnode to rdir */ struct prison_ip *pr_addrs[PR_FAMILY_MAX]; /* (p,n) IPs of jail */ struct prison_racct *pr_prison_racct; /* (c) racct jail proxy */ - void *pr_sparep[3]; + LIST_HEAD(, jaildesc) pr_descs; /* (a) attached descriptors */ + void *pr_sparep[2]; int pr_childcount; /* (a) number of child jails */ int pr_childmax; /* (p) maximum child jails */ unsigned pr_allow; /* (p) PR_ALLOW_* flags */ @@ -418,6 +422,8 @@ void getcredhostuuid(struct ucred *, char *, size_t); void getcredhostid(struct ucred *, unsigned long *); void getjailname(struct ucred *cred, char *name, size_t len); +int kern_jail_attach(struct thread *td, struct prison *pr); +int kern_jail_remove(struct prison *pr); void prison0_init(void); bool prison_allow(struct ucred *, unsigned); int prison_check(struct ucred *cred1, struct ucred *cred2); diff --git a/sys/sys/jaildesc.h b/sys/sys/jaildesc.h new file mode 100644 --- /dev/null +++ b/sys/sys/jaildesc.h @@ -0,0 +1,64 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 James Gritton. + * All rights reserved. + * + * This software was developed at the University of Cambridge Computer + * Laboratory with support from a grant from Google, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _SYS_JAILDESC_H_ +#define _SYS_JAILDESC_H_ + +#ifdef _KERNEL + +#include + +struct prison; + +/*- + * struct jaildesc describes a jail descriptor, which points to a struct + * prison. struct prison in turn has a linked list of struct jaildesc. + * + * Locking key: + * (c) set on creation, remains unchanged + * (p) jd_prison->pr_mtx + */ +struct jaildesc { + LIST_ENTRY(jaildesc) jd_list; /* (c,p) this prison's descs */ + struct prison *jd_prison; /* (c) the prison */ +}; + +int jaildesc_find(struct thread *td, int fd, cap_rights_t *rights, + struct prison **prp, struct ucred **ucredp); +int jaildesc_alloc(struct thread *td, struct file **fpp, int *fdp, + struct jaildesc **jdp); +void jaildesc_set_prison(struct file *fp, struct jaildesc *jd, + struct prison *pr); +void jaildesc_free(struct jaildesc *jd); + +#endif /* _KERNEL */ + +#endif /* !_SYS_JAILDESC_H_ */ diff --git a/sys/sys/syscall.h b/sys/sys/syscall.h --- a/sys/sys/syscall.h +++ b/sys/sys/syscall.h @@ -525,4 +525,6 @@ #define SYS_timerfd_gettime 586 #define SYS_timerfd_settime 587 #define SYS_kcmp 588 -#define SYS_MAXSYSCALL 589 +#define SYS_jail_attach_jd 589 +#define SYS_jail_remove_jd 590 +#define SYS_MAXSYSCALL 591 diff --git a/sys/sys/syscall.mk b/sys/sys/syscall.mk --- a/sys/sys/syscall.mk +++ b/sys/sys/syscall.mk @@ -428,4 +428,6 @@ timerfd_create.o \ timerfd_gettime.o \ timerfd_settime.o \ - kcmp.o + kcmp.o \ + jail_attach_jd.o \ + jail_remove_jd.o diff --git a/sys/sys/sysproto.h b/sys/sys/sysproto.h --- a/sys/sys/sysproto.h +++ b/sys/sys/sysproto.h @@ -1877,6 +1877,12 @@ char idx1_l_[PADL_(uintptr_t)]; uintptr_t idx1; char idx1_r_[PADR_(uintptr_t)]; char idx2_l_[PADL_(uintptr_t)]; uintptr_t idx2; char idx2_r_[PADR_(uintptr_t)]; }; +struct jail_attach_jd_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; +}; +struct jail_remove_jd_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; +}; int sys_exit(struct thread *, struct exit_args *); int sys_fork(struct thread *, struct fork_args *); int sys_read(struct thread *, struct read_args *); @@ -2276,6 +2282,8 @@ int sys_timerfd_gettime(struct thread *, struct timerfd_gettime_args *); int sys_timerfd_settime(struct thread *, struct timerfd_settime_args *); int sys_kcmp(struct thread *, struct kcmp_args *); +int sys_jail_attach_jd(struct thread *, struct jail_attach_jd_args *); +int sys_jail_remove_jd(struct thread *, struct jail_remove_jd_args *); #ifdef COMPAT_43 @@ -3255,6 +3263,8 @@ #define SYS_AUE_timerfd_gettime AUE_TIMERFD #define SYS_AUE_timerfd_settime AUE_TIMERFD #define SYS_AUE_kcmp AUE_NULL +#define SYS_AUE_jail_attach_jd AUE_JAIL_ATTACH_JD +#define SYS_AUE_jail_remove_jd AUE_JAIL_REMOVE_JD #undef PAD_ #undef PADL_ diff --git a/sys/sys/user.h b/sys/sys/user.h --- a/sys/sys/user.h +++ b/sys/sys/user.h @@ -263,6 +263,7 @@ #define KF_TYPE_DEV 12 #define KF_TYPE_EVENTFD 13 #define KF_TYPE_TIMERFD 14 +#define KF_TYPE_JAILDESC 15 #define KF_TYPE_UNKNOWN 255 #define KF_VTYPE_VNON 0 @@ -449,6 +450,9 @@ uint32_t kf_timerfd_flags; uint64_t kf_timerfd_addr; } kf_timerfd; + struct { + int32_t kf_jid; + } kf_jail; struct { uint64_t kf_kqueue_addr; int32_t kf_kqueue_count;