Page MenuHomeFreeBSD

D43696.id159468.diff
No OneTemporary

D43696.id159468.diff

diff --git a/lib/libjail/jail.c b/lib/libjail/jail.c
--- a/lib/libjail/jail.c
+++ b/lib/libjail/jail.c
@@ -75,8 +75,9 @@
jail_setv(int flags, ...)
{
va_list ap, tap;
- struct jailparam *jp;
- const char *name, *value;
+ struct jailparam *jp, *jp_desc;
+ const char *name;
+ char *value, *desc_value;
int njp, jid;
/* Create the parameter list and import the parameters. */
@@ -86,15 +87,24 @@
(void)va_arg(tap, char *);
va_end(tap);
jp = alloca(njp * sizeof(struct jailparam));
- for (njp = 0; (name = va_arg(ap, char *)) != NULL;) {
+ jp_desc = NULL;
+ desc_value = NULL;
+ for (njp = 0; (name = va_arg(ap, char *)) != NULL; njp++) {
value = va_arg(ap, char *);
if (jailparam_init(jp + njp, name) < 0)
goto error;
- if (jailparam_import(jp + njp++, value) < 0)
+ if (jailparam_import(jp + njp, value) < 0)
goto error;
+ if (!strcmp(name, "desc")
+ && (flags & (JAIL_GET_DESC | JAIL_OWN_DESC))) {
+ jp_desc = jp + njp;
+ desc_value = value;
+ }
}
va_end(ap);
jid = jailparam_set(jp, njp, flags);
+ if (jid > 0 && jp_desc != NULL)
+ sprintf(desc_value, "%d", *(int *)jp_desc->jp_value);
jailparam_free(jp, njp);
return (jid);
@@ -112,9 +122,10 @@
jail_getv(int flags, ...)
{
va_list ap, tap;
- struct jailparam *jp, *jp_lastjid, *jp_jid, *jp_name, *jp_key;
+ struct jailparam *jp, *jp_desc, *jp_lastjid, *jp_jid, *jp_name, *jp_key;
char *valarg, *value;
- const char *name, *key_value, *lastjid_value, *jid_value, *name_value;
+ const char *name, *key_value, *desc_value, *lastjid_value, *jid_value;
+ const char *name_value;
int njp, i, jid;
/* Create the parameter list and find the key. */
@@ -126,15 +137,19 @@
jp = alloca(njp * sizeof(struct jailparam));
va_copy(tap, ap);
- jp_lastjid = jp_jid = jp_name = NULL;
- lastjid_value = jid_value = name_value = NULL;
+ jp_desc = jp_lastjid = jp_jid = jp_name = NULL;
+ desc_value = lastjid_value = jid_value = name_value = NULL;
for (njp = 0; (name = va_arg(tap, char *)) != NULL; njp++) {
value = va_arg(tap, char *);
if (jailparam_init(jp + njp, name) < 0) {
va_end(tap);
goto error;
}
- if (!strcmp(jp[njp].jp_name, "lastjid")) {
+ if (!strcmp(jp[njp].jp_name, "desc")
+ && (flags & (JAIL_USE_DESC | JAIL_AT_DESC))) {
+ jp_desc = jp + njp;
+ desc_value = value;
+ } else if (!strcmp(jp[njp].jp_name, "lastjid")) {
jp_lastjid = jp + njp;
lastjid_value = value;
} else if (!strcmp(jp[njp].jp_name, "jid")) {
@@ -147,7 +162,10 @@
}
va_end(tap);
/* Import the key parameter. */
- if (jp_lastjid != NULL) {
+ if (jp_desc != NULL && (flags & JAIL_USE_DESC)) {
+ jp_key = jp_desc;
+ key_value = desc_value;
+ } else if (jp_lastjid != NULL) {
jp_key = jp_lastjid;
key_value = lastjid_value;
} else if (jp_jid != NULL && strtol(jid_value, NULL, 10) != 0) {
@@ -163,6 +181,9 @@
}
if (jailparam_import(jp_key, key_value) < 0)
goto error;
+ if (jp_desc != NULL && jp_desc != jp_key
+ && jailparam_import(jp_desc, desc_value) < 0)
+ goto error;
/* Get the jail and export the parameters. */
jid = jailparam_get(jp, njp, flags);
if (jid < 0)
@@ -571,7 +592,7 @@
jailparam_get(struct jailparam *jp, unsigned njp, int flags)
{
struct iovec *jiov;
- struct jailparam *jp_lastjid, *jp_jid, *jp_name, *jp_key;
+ struct jailparam *jp_desc, *jp_lastjid, *jp_jid, *jp_name, *jp_key;
int i, ai, ki, jid, arrays, sanity;
unsigned j;
@@ -580,10 +601,13 @@
* Find the key and any array parameters.
*/
jiov = alloca(sizeof(struct iovec) * 2 * (njp + 1));
- jp_lastjid = jp_jid = jp_name = NULL;
+ jp_desc = jp_lastjid = jp_jid = jp_name = NULL;
arrays = 0;
for (ai = j = 0; j < njp; j++) {
- if (!strcmp(jp[j].jp_name, "lastjid"))
+ if (!strcmp(jp[j].jp_name, "desc")
+ && (flags & (JAIL_USE_DESC | JAIL_AT_DESC)))
+ jp_desc = jp + j;
+ else if (!strcmp(jp[j].jp_name, "lastjid"))
jp_lastjid = jp + j;
else if (!strcmp(jp[j].jp_name, "jid"))
jp_jid = jp + j;
@@ -599,7 +623,9 @@
ai++;
}
}
- jp_key = jp_lastjid ? jp_lastjid :
+ jp_key = jp_desc && jp_desc->jp_valuelen == sizeof(int) &&
+ jp_desc->jp_value && (flags & JAIL_USE_DESC) ? jp_desc :
+ jp_lastjid ? jp_lastjid :
jp_jid && jp_jid->jp_valuelen == sizeof(int) &&
jp_jid->jp_value && *(int *)jp_jid->jp_value ? jp_jid : jp_name;
if (jp_key == NULL || jp_key->jp_value == NULL) {
@@ -622,6 +648,14 @@
jiov[ki].iov_len = JAIL_ERRMSGLEN;
ki++;
jail_errmsg[0] = 0;
+ if (jp_desc != NULL && jp_desc != jp_key) {
+ jiov[ki].iov_base = jp_desc->jp_name;
+ jiov[ki].iov_len = strlen(jp_desc->jp_name) + 1;
+ ki++;
+ jiov[ki].iov_base = jp_desc->jp_value;
+ jiov[ki].iov_len = jp_desc->jp_valuelen;
+ ki++;
+ }
if (arrays && jail_get(jiov, ki, flags) < 0) {
if (!jail_errmsg[0])
snprintf(jail_errmsg, sizeof(jail_errmsg),
@@ -649,7 +683,7 @@
jiov[ai].iov_base = jp[j].jp_value;
memset(jiov[ai].iov_base, 0, jiov[ai].iov_len);
ai++;
- } else if (jp + j != jp_key) {
+ } else if (jp + j != jp_key && jp + j != jp_desc) {
jiov[i].iov_base = jp[j].jp_name;
jiov[i].iov_len = strlen(jp[j].jp_name) + 1;
i++;
diff --git a/lib/libsys/Symbol.sys.map b/lib/libsys/Symbol.sys.map
--- a/lib/libsys/Symbol.sys.map
+++ b/lib/libsys/Symbol.sys.map
@@ -383,6 +383,8 @@
getrlimitusage;
inotify_add_watch_at;
inotify_rm_watch;
+ jail_attach_jd;
+ jail_remove_jd;
kcmp;
setcred;
};
diff --git a/lib/libsys/_libsys.h b/lib/libsys/_libsys.h
--- a/lib/libsys/_libsys.h
+++ b/lib/libsys/_libsys.h
@@ -468,6 +468,8 @@
typedef int (__sys_exterrctl_t)(u_int, u_int, void *);
typedef int (__sys_inotify_add_watch_at_t)(int, int, const char *, uint32_t);
typedef int (__sys_inotify_rm_watch_t)(int, int);
+typedef int (__sys_jail_attach_jd_t)(int);
+typedef int (__sys_jail_remove_jd_t)(int);
void __sys_exit(int rval);
int __sys_fork(void);
@@ -872,6 +874,8 @@
int __sys_exterrctl(u_int op, u_int flags, void * ptr);
int __sys_inotify_add_watch_at(int fd, int dfd, const char * path, uint32_t mask);
int __sys_inotify_rm_watch(int fd, int wd);
+int __sys_jail_attach_jd(int fd);
+int __sys_jail_remove_jd(int fd);
__END_DECLS
#endif /* __LIBSYS_H_ */
diff --git a/lib/libsys/syscalls.map b/lib/libsys/syscalls.map
--- a/lib/libsys/syscalls.map
+++ b/lib/libsys/syscalls.map
@@ -813,4 +813,8 @@
__sys_inotify_add_watch_at;
_inotify_rm_watch;
__sys_inotify_rm_watch;
+ _jail_attach_jd;
+ __sys_jail_attach_jd;
+ _jail_remove_jd;
+ __sys_jail_remove_jd;
};
diff --git a/sys/compat/freebsd32/freebsd32_syscall.h b/sys/compat/freebsd32/freebsd32_syscall.h
--- a/sys/compat/freebsd32/freebsd32_syscall.h
+++ b/sys/compat/freebsd32/freebsd32_syscall.h
@@ -513,4 +513,6 @@
#define FREEBSD32_SYS_exterrctl 592
#define FREEBSD32_SYS_inotify_add_watch_at 593
#define FREEBSD32_SYS_inotify_rm_watch 594
-#define FREEBSD32_SYS_MAXSYSCALL 595
+#define FREEBSD32_SYS_jail_attach_jd 595
+#define FREEBSD32_SYS_jail_remove_jd 596
+#define FREEBSD32_SYS_MAXSYSCALL 597
diff --git a/sys/compat/freebsd32/freebsd32_syscalls.c b/sys/compat/freebsd32/freebsd32_syscalls.c
--- a/sys/compat/freebsd32/freebsd32_syscalls.c
+++ b/sys/compat/freebsd32/freebsd32_syscalls.c
@@ -600,4 +600,6 @@
"exterrctl", /* 592 = exterrctl */
"inotify_add_watch_at", /* 593 = inotify_add_watch_at */
"inotify_rm_watch", /* 594 = inotify_rm_watch */
+ "jail_attach_jd", /* 595 = jail_attach_jd */
+ "jail_remove_jd", /* 596 = jail_remove_jd */
};
diff --git a/sys/compat/freebsd32/freebsd32_sysent.c b/sys/compat/freebsd32/freebsd32_sysent.c
--- a/sys/compat/freebsd32/freebsd32_sysent.c
+++ b/sys/compat/freebsd32/freebsd32_sysent.c
@@ -662,4 +662,6 @@
{ .sy_narg = AS(exterrctl_args), .sy_call = (sy_call_t *)sys_exterrctl, .sy_auevent = AUE_NULL, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 592 = exterrctl */
{ .sy_narg = AS(inotify_add_watch_at_args), .sy_call = (sy_call_t *)sys_inotify_add_watch_at, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 593 = inotify_add_watch_at */
{ .sy_narg = AS(inotify_rm_watch_args), .sy_call = (sy_call_t *)sys_inotify_rm_watch, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 594 = inotify_rm_watch */
+ { .sy_narg = AS(jail_attach_jd_args), .sy_call = (sy_call_t *)sys_jail_attach_jd, .sy_auevent = AUE_JAIL_ATTACH, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 595 = jail_attach_jd */
+ { .sy_narg = AS(jail_remove_jd_args), .sy_call = (sy_call_t *)sys_jail_remove_jd, .sy_auevent = AUE_JAIL_REMOVE, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 596 = jail_remove_jd */
};
diff --git a/sys/compat/freebsd32/freebsd32_systrace_args.c b/sys/compat/freebsd32/freebsd32_systrace_args.c
--- a/sys/compat/freebsd32/freebsd32_systrace_args.c
+++ b/sys/compat/freebsd32/freebsd32_systrace_args.c
@@ -3413,6 +3413,20 @@
*n_args = 2;
break;
}
+ /* jail_attach_jd */
+ case 595: {
+ struct jail_attach_jd_args *p = params;
+ iarg[a++] = p->fd; /* int */
+ *n_args = 1;
+ break;
+ }
+ /* jail_remove_jd */
+ case 596: {
+ struct jail_remove_jd_args *p = params;
+ iarg[a++] = p->fd; /* int */
+ *n_args = 1;
+ break;
+ }
default:
*n_args = 0;
break;
@@ -9222,6 +9236,26 @@
break;
};
break;
+ /* jail_attach_jd */
+ case 595:
+ switch (ndx) {
+ case 0:
+ p = "int";
+ break;
+ default:
+ break;
+ };
+ break;
+ /* jail_remove_jd */
+ case 596:
+ switch (ndx) {
+ case 0:
+ p = "int";
+ break;
+ default:
+ break;
+ };
+ break;
default:
break;
};
@@ -11130,6 +11164,16 @@
if (ndx == 0 || ndx == 1)
p = "int";
break;
+ /* jail_attach_jd */
+ case 595:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
+ /* jail_remove_jd */
+ case 596:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
default:
break;
};
diff --git a/sys/conf/files b/sys/conf/files
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -3804,6 +3804,7 @@
kern/kern_idle.c standard
kern/kern_intr.c standard
kern/kern_jail.c standard
+kern/kern_jaildesc.c standard
kern/kern_jailmeta.c standard
kern/kern_kcov.c optional kcov \
compile-with "${NOSAN_C} ${MSAN_CFLAGS}"
diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c
--- a/sys/kern/init_sysent.c
+++ b/sys/kern/init_sysent.c
@@ -661,4 +661,6 @@
{ .sy_narg = AS(exterrctl_args), .sy_call = (sy_call_t *)sys_exterrctl, .sy_auevent = AUE_NULL, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 592 = exterrctl */
{ .sy_narg = AS(inotify_add_watch_at_args), .sy_call = (sy_call_t *)sys_inotify_add_watch_at, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 593 = inotify_add_watch_at */
{ .sy_narg = AS(inotify_rm_watch_args), .sy_call = (sy_call_t *)sys_inotify_rm_watch, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 594 = inotify_rm_watch */
+ { .sy_narg = AS(jail_attach_jd_args), .sy_call = (sy_call_t *)sys_jail_attach_jd, .sy_auevent = AUE_JAIL_ATTACH, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 595 = jail_attach_jd */
+ { .sy_narg = AS(jail_remove_jd_args), .sy_call = (sy_call_t *)sys_jail_remove_jd, .sy_auevent = AUE_JAIL_REMOVE, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 596 = jail_remove_jd */
};
diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
--- a/sys/kern/kern_descrip.c
+++ b/sys/kern/kern_descrip.c
@@ -5250,6 +5250,8 @@
return ("eventfd");
case DTYPE_TIMERFD:
return ("timerfd");
+ case DTYPE_JAILDESC:
+ return ("jail");
default:
return ("unkn");
}
diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c
--- a/sys/kern/kern_event.c
+++ b/sys/kern/kern_event.c
@@ -365,6 +365,7 @@
[~EVFILT_USER] = { &user_filtops, 1 },
[~EVFILT_SENDFILE] = { &null_filtops },
[~EVFILT_EMPTY] = { &file_filtops, 1 },
+ [~EVFILT_JAILDESC] = { &file_filtops, 1 },
};
/*
diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c
--- a/sys/kern/kern_jail.c
+++ b/sys/kern/kern_jail.c
@@ -39,6 +39,7 @@
#include <sys/kernel.h>
#include <sys/systm.h>
#include <sys/errno.h>
+#include <sys/file.h>
#include <sys/sysproto.h>
#include <sys/malloc.h>
#include <sys/osd.h>
@@ -48,6 +49,7 @@
#include <sys/taskqueue.h>
#include <sys/fcntl.h>
#include <sys/jail.h>
+#include <sys/jaildesc.h>
#include <sys/linker.h>
#include <sys/lock.h>
#include <sys/mman.h>
@@ -979,6 +981,8 @@
int
kern_jail_set(struct thread *td, struct uio *optuio, int flags)
{
+ struct file *jfp_out;
+ struct jaildesc *desc_in;
struct nameidata nd;
#ifdef INET
struct prison_ip *ip4;
@@ -989,6 +993,7 @@
struct vfsopt *opt;
struct vfsoptlist *opts;
struct prison *pr, *deadpr, *dinspr, *inspr, *mypr, *ppr, *tpr;
+ struct ucred *jdcred;
struct vnode *root;
char *domain, *errmsg, *host, *name, *namelc, *p, *path, *uuid;
char *g_path, *osrelstr;
@@ -1002,7 +1007,7 @@
int created, cuflags, descend, drflags, enforce;
int error, errmsg_len, errmsg_pos;
int gotchildmax, gotenforce, gothid, gotrsnum, gotslevel;
- int deadid, jid, jsys, len, level;
+ int deadid, jfd_in, jfd_out, jfd_pos, jid, jsys, len, level;
int childmax, osreldt, rsnum, slevel;
#ifdef INET
int ip4s;
@@ -1017,17 +1022,26 @@
unsigned tallow;
char numbuf[12];
- error = priv_check(td, PRIV_JAIL_SET);
- if (!error && (flags & JAIL_ATTACH))
- error = priv_check(td, PRIV_JAIL_ATTACH);
- if (error)
- return (error);
mypr = td->td_ucred->cr_prison;
- if ((flags & JAIL_CREATE) && mypr->pr_childmax == 0)
+ if (((flags & (JAIL_CREATE | JAIL_AT_DESC)) == JAIL_CREATE)
+ && mypr->pr_childmax == 0)
return (EPERM);
if (flags & ~JAIL_SET_MASK)
return (EINVAL);
+ if ((flags & (JAIL_USE_DESC | JAIL_AT_DESC))
+ == (JAIL_USE_DESC | JAIL_AT_DESC))
+ return (EINVAL);
+ prison_hold(mypr);
+#ifdef INET
+ ip4 = NULL;
+#endif
+#ifdef INET6
+ ip6 = NULL;
+#endif
+ g_path = NULL;
+ jfp_out = NULL;
+ jfd_out = -1;
/*
* Check all the parameters before committing to anything. Not all
* errors can be caught early, but we may as well try. Also, this
@@ -1040,14 +1054,7 @@
*/
error = vfs_buildopts(optuio, &opts);
if (error)
- return (error);
-#ifdef INET
- ip4 = NULL;
-#endif
-#ifdef INET6
- ip6 = NULL;
-#endif
- g_path = NULL;
+ goto done_free;
cuflags = flags & (JAIL_CREATE | JAIL_UPDATE);
if (!cuflags) {
@@ -1056,6 +1063,72 @@
goto done_errmsg;
}
+ error = vfs_copyopt(opts, "desc", &jfd_in, sizeof(jfd_in));
+ if (error == ENOENT) {
+ if (flags & (JAIL_USE_DESC | JAIL_AT_DESC | JAIL_GET_DESC |
+ JAIL_OWN_DESC)) {
+ vfs_opterror(opts, "missing desc");
+ goto done_errmsg;
+ }
+ jfd_in = -1;
+ } else if (error != 0)
+ goto done_free;
+ else {
+ if (!(flags & (JAIL_USE_DESC | JAIL_AT_DESC | JAIL_GET_DESC |
+ JAIL_OWN_DESC))) {
+ vfs_opterror(opts, "unexpected desc");
+ goto done_errmsg;
+ }
+ if (flags & JAIL_AT_DESC) {
+ /*
+ * Look up and create jails based on the
+ * descriptor's prison.
+ */
+ prison_free(mypr);
+ error = jaildesc_find(td, jfd_in, &desc_in, &mypr,
+ NULL);
+ if (error != 0) {
+ vfs_opterror(opts, error == ENOENT
+ ? "descriptor to dead jail"
+ : "not a jail descriptor");
+ goto done_errmsg;
+ }
+ /*
+ * Check file permissions using the current
+ * credentials, and operation permissions
+ * using the descriptor's credentials.
+ */
+ error = vaccess(VREG, desc_in->jd_mode, desc_in->jd_uid,
+ desc_in->jd_gid, VEXEC, td->td_ucred);
+ JAILDESC_UNLOCK(desc_in);
+ if (error != 0)
+ goto done_free;
+ if ((flags & JAIL_CREATE) && mypr->pr_childmax == 0) {
+ error = EPERM;
+ goto done_free;
+ }
+ }
+ if (flags & (JAIL_GET_DESC | JAIL_OWN_DESC)) {
+ /* Allocate a jail descriptor to return later. */
+ error = jaildesc_alloc(td, &jfp_out, &jfd_out,
+ flags & JAIL_OWN_DESC);
+ if (error)
+ goto done_free;
+ }
+ }
+
+ /*
+ * Delay the permission check if using a jail descriptor,
+ * until we get the descriptor's credentials.
+ */
+ if (!(flags & JAIL_USE_DESC)) {
+ error = priv_check(td, PRIV_JAIL_SET);
+ if (error == 0 && (flags & JAIL_ATTACH))
+ error = priv_check(td, PRIV_JAIL_ATTACH);
+ if (error)
+ goto done_free;
+ }
+
error = vfs_copyopt(opts, "jid", &jid, sizeof(jid));
if (error == ENOENT)
jid = 0;
@@ -1430,7 +1503,57 @@
error = EAGAIN;
goto done_deref;
}
- if (jid != 0) {
+ if (flags & JAIL_USE_DESC) {
+ /* Get the jail from its descriptor. */
+ error = jaildesc_find(td, jfd_in, &desc_in, &pr, &jdcred);
+ if (error) {
+ vfs_opterror(opts, error == ENOENT
+ ? "descriptor to dead jail"
+ : "not a jail descriptor");
+ goto done_deref;
+ }
+ drflags |= PD_DEREF;
+ /*
+ * Check file permissions using the current credentials,
+ * and operation permissions using the descriptor's
+ * credentials.
+ */
+ error = vaccess(VREG, desc_in->jd_mode, desc_in->jd_uid,
+ desc_in->jd_gid, VWRITE, td->td_ucred);
+ if (error == 0 && (flags & JAIL_ATTACH))
+ error = vaccess(VREG, desc_in->jd_mode, desc_in->jd_uid,
+ desc_in->jd_gid, VEXEC, td->td_ucred);
+ JAILDESC_UNLOCK(desc_in);
+ if (error == 0)
+ error = priv_check_cred(jdcred, PRIV_JAIL_SET);
+ if (error == 0 && (flags & JAIL_ATTACH))
+ error = priv_check_cred(jdcred, PRIV_JAIL_ATTACH);
+ crfree(jdcred);
+ if (error)
+ goto done_deref;
+ mtx_lock(&pr->pr_mtx);
+ drflags |= PD_LOCKED;
+ if (cuflags == JAIL_CREATE) {
+ error = EEXIST;
+ vfs_opterror(opts, "jail %d already exists",
+ pr->pr_id);
+ goto done_deref;
+ }
+ if (!prison_isalive(pr)) {
+ /* While a jid can be resurrected, the prison
+ * itself cannot.
+ */
+ error = ENOENT;
+ vfs_opterror(opts, "jail %d is dying", pr->pr_id);
+ goto done_deref;
+ }
+ if (jid != 0 && jid != pr->pr_id) {
+ error = EINVAL;
+ vfs_opterror(opts, "cannot change jid");
+ goto done_deref;
+ }
+ jid = pr->pr_id;
+ } else if (jid != 0) {
if (jid < 0) {
error = EINVAL;
vfs_opterror(opts, "negative jid");
@@ -1564,7 +1687,7 @@
}
}
}
- /* Update: must provide a jid or name. */
+ /* Update: must provide a desc, jid, or name. */
else if (cuflags == JAIL_UPDATE && pr == NULL) {
error = ENOENT;
vfs_opterror(opts, "update specified no jail");
@@ -1716,8 +1839,10 @@
* Grab a reference for existing prisons, to ensure they
* continue to exist for the duration of the call.
*/
- prison_hold(pr);
- drflags |= PD_DEREF;
+ if (!(drflags & PD_DEREF)) {
+ prison_hold(pr);
+ drflags |= PD_DEREF;
+ }
#if defined(VIMAGE) && (defined(INET) || defined(INET6))
if ((pr->pr_flags & PR_VNET) &&
(ch_flags & (PR_IP4_USER | PR_IP6_USER))) {
@@ -2108,7 +2233,9 @@
if (created) {
drflags = prison_lock_xlock(pr, drflags);
pr->pr_state = PRISON_STATE_ALIVE;
- }
+ jaildesc_note(ppr, NOTE_JAIL_CREATE, pr->pr_id);
+ } else
+ jaildesc_note(pr, NOTE_JAIL_SET, 0);
/* Attach this process to the prison if requested. */
if (flags & JAIL_ATTACH) {
@@ -2140,6 +2267,31 @@
printf("Warning jail jid=%d: mountd/nfsd requires a separate"
" file system\n", pr->pr_id);
+ /*
+ * Now that the prison is fully created without error, set the
+ * jail descriptor if one was requested. This is the only
+ * parameter that is returned to the caller (except the error
+ * message).
+ */
+ if (jfd_out >= 0) {
+ if (!(drflags & PD_LOCKED)) {
+ mtx_lock(&pr->pr_mtx);
+ drflags |= PD_LOCKED;
+ }
+ jfd_pos = 2 * vfs_getopt_pos(opts, "desc") + 1;
+ if (optuio->uio_segflg == UIO_SYSSPACE)
+ *(int*)optuio->uio_iov[jfd_pos].iov_base = jfd_out;
+ else
+ (void)copyout(&jfd_out,
+ optuio->uio_iov[jfd_pos].iov_base, sizeof(jfd_out));
+ jaildesc_set_prison(jfp_out, pr);
+ }
+ /* Let kinfo listeners know about the prison. */
+ if (created)
+ jaildesc_note(ppr, NOTE_JAIL_CREATE, pr->pr_id);
+ else
+ jaildesc_note(pr, NOTE_JAIL_SET, 0);
+
drflags &= ~PD_KILL;
td->td_retval[0] = pr->pr_id;
@@ -2170,15 +2322,21 @@
}
}
done_free:
+ /* Clean up other resources. */
#ifdef INET
prison_ip_free(ip4);
#endif
#ifdef INET6
prison_ip_free(ip6);
#endif
+ if (jfp_out != NULL)
+ fdrop(jfp_out, td);
+ if (error && jfd_out >= 0)
+ (void)kern_close(td, jfd_out);
if (g_path != NULL)
free(g_path, M_TEMP);
vfs_freeopts(opts);
+ prison_free(mypr);
return (error);
}
@@ -2323,16 +2481,22 @@
kern_jail_get(struct thread *td, struct uio *optuio, int flags)
{
struct bool_flags *bf;
+ struct file *jfp_out;
+ struct jaildesc *desc_in;
struct jailsys_flags *jsf;
struct prison *pr, *mypr;
struct vfsopt *opt;
struct vfsoptlist *opts;
char *errmsg, *name;
int drflags, error, errmsg_len, errmsg_pos, i, jid, len, pos;
+ int jfd_in, jfd_out;
unsigned f;
if (flags & ~JAIL_GET_MASK)
return (EINVAL);
+ if ((flags & (JAIL_USE_DESC | JAIL_AT_DESC))
+ == (JAIL_USE_DESC | JAIL_AT_DESC))
+ return (EINVAL);
/* Get the parameter list. */
error = vfs_buildopts(optuio, &opts);
@@ -2340,13 +2504,81 @@
return (error);
errmsg_pos = vfs_getopt_pos(opts, "errmsg");
mypr = td->td_ucred->cr_prison;
+ prison_hold(mypr);
pr = NULL;
+ jfp_out = NULL;
+ jfd_out = -1;
/*
- * Find the prison specified by one of: lastjid, jid, name.
+ * Find the prison specified by one of: desc, lastjid, jid, name.
*/
sx_slock(&allprison_lock);
drflags = PD_LIST_SLOCKED;
+
+ error = vfs_copyopt(opts, "desc", &jfd_in, sizeof(jfd_in));
+ if (error == ENOENT) {
+ if (flags & (JAIL_AT_DESC | JAIL_GET_DESC | JAIL_OWN_DESC)) {
+ vfs_opterror(opts, "missing desc");
+ goto done;
+ }
+ } else if (error == 0) {
+ if (!(flags & (JAIL_USE_DESC | JAIL_AT_DESC | JAIL_GET_DESC |
+ JAIL_OWN_DESC))) {
+ vfs_opterror(opts, "unexpected desc");
+ goto done;
+ }
+ if (flags & JAIL_USE_DESC) {
+ /* Get the jail from its descriptor. */
+ error = jaildesc_find(td, jfd_in, &desc_in, &pr, NULL);
+ if (error) {
+ vfs_opterror(opts, error == ENOENT
+ ? "descriptor to dead jail"
+ : "not a jail descriptor");
+ goto done;
+ }
+ drflags |= PD_DEREF;
+ error = vaccess(VREG, desc_in->jd_mode, desc_in->jd_uid,
+ desc_in->jd_gid, VREAD, td->td_ucred);
+ JAILDESC_UNLOCK(desc_in);
+ if (error != 0)
+ goto done;
+ mtx_lock(&pr->pr_mtx);
+ drflags |= PD_LOCKED;
+ if (!(prison_isalive(pr) || (flags & JAIL_DYING))) {
+ error = ENOENT;
+ vfs_opterror(opts, "jail %d is dying",
+ pr->pr_id);
+ goto done;
+ }
+ goto found_prison;
+ }
+ if (flags & JAIL_AT_DESC) {
+ /* Look up jails based on the descriptor's prison. */
+ prison_free(mypr);
+ error = jaildesc_find(td, jfd_in, &desc_in, &mypr,
+ NULL);
+ if (error != 0) {
+ vfs_opterror(opts, error == ENOENT
+ ? "descriptor to dead jail"
+ : "not a jail descriptor");
+ goto done;
+ }
+ error = vaccess(VREG, desc_in->jd_mode, desc_in->jd_uid,
+ desc_in->jd_gid, VEXEC, td->td_ucred);
+ JAILDESC_UNLOCK(desc_in);
+ if (error != 0)
+ goto done;
+ }
+ if (flags & (JAIL_GET_DESC | JAIL_OWN_DESC)) {
+ /* Allocate a jail descriptor to return later. */
+ error = jaildesc_alloc(td, &jfp_out, &jfd_out,
+ flags & JAIL_OWN_DESC);
+ if (error)
+ goto done;
+ }
+ } else
+ goto done;
+
error = vfs_copyopt(opts, "lastjid", &jid, sizeof(jid));
if (error == 0) {
TAILQ_FOREACH(pr, &allprison, pr_list) {
@@ -2415,9 +2647,17 @@
found_prison:
/* Get the parameters of the prison. */
- prison_hold(pr);
- drflags |= PD_DEREF;
+ if (!(drflags & PD_DEREF)) {
+ prison_hold(pr);
+ drflags |= PD_DEREF;
+ }
td->td_retval[0] = pr->pr_id;
+ if (jfd_out >= 0) {
+ error = vfs_setopt(opts, "desc", &jfd_out, sizeof(jfd_out));
+ if (error != 0 && error != ENOENT)
+ goto done;
+ jaildesc_set_prison(jfp_out, pr);
+ }
error = vfs_setopt(opts, "jid", &pr->pr_id, sizeof(pr->pr_id));
if (error != 0 && error != ENOENT)
goto done;
@@ -2597,6 +2837,13 @@
prison_deref(pr, drflags);
else if (drflags & PD_LIST_SLOCKED)
sx_sunlock(&allprison_lock);
+ else if (drflags & PD_LIST_XLOCKED)
+ sx_xunlock(&allprison_lock);
+ /* Clean up other resources. */
+ if (jfp_out != NULL)
+ (void)fdrop(jfp_out, td);
+ if (error && jfd_out >= 0)
+ (void)kern_close(td, jfd_out);
if (error && errmsg_pos >= 0) {
/* Write the error message back to userspace. */
vfs_getopt(opts, "errmsg", (void **)&errmsg, &errmsg_len);
@@ -2613,6 +2860,7 @@
}
}
vfs_freeopts(opts);
+ prison_free(mypr);
return (error);
}
@@ -2637,14 +2885,54 @@
sx_xunlock(&allprison_lock);
return (EINVAL);
}
+ prison_hold(pr);
+ prison_remove(pr);
+ return (0);
+}
+
+/*
+ * struct jail_remove_jd_args {
+ * int fd;
+ * };
+ */
+int
+sys_jail_remove_jd(struct thread *td, struct jail_remove_jd_args *uap)
+{
+ struct prison *pr;
+ struct ucred *jdcred;
+ int error;
+
+ error = jaildesc_find(td, uap->fd, NULL, &pr, &jdcred);
+ if (error)
+ return (error);
+ error = priv_check_cred(jdcred, PRIV_JAIL_REMOVE);
+ crfree(jdcred);
+ if (error) {
+ prison_free(pr);
+ return (error);
+ }
+ sx_xlock(&allprison_lock);
+ mtx_lock(&pr->pr_mtx);
+ prison_remove(pr);
+ return (0);
+}
+
+/*
+ * Begin the removal process for a prison. The allprison lock should
+ * be held exclusively, and the prison should be both locked and held.
+ */
+void
+prison_remove(struct prison *pr)
+{
+ sx_assert(&allprison_lock, SA_XLOCKED);
+ mtx_assert(&pr->pr_mtx, MA_OWNED);
if (!prison_isalive(pr)) {
/* Silently ignore already-dying prisons. */
mtx_unlock(&pr->pr_mtx);
sx_xunlock(&allprison_lock);
- return (0);
+ return;
}
- prison_deref(pr, PD_KILL | PD_LOCKED | PD_LIST_XLOCKED);
- return (0);
+ prison_deref(pr, PD_KILL | PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED);
}
/*
@@ -2679,6 +2967,53 @@
return (do_jail_attach(td, pr, PD_LOCKED | PD_LIST_SLOCKED));
}
+/*
+ * struct jail_attach_jd_args {
+ * int fd;
+ * };
+ */
+int
+sys_jail_attach_jd(struct thread *td, struct jail_attach_jd_args *uap)
+{
+ struct jaildesc *jd;
+ struct prison *pr;
+ struct ucred *jdcred;
+ int drflags, error;
+
+ sx_slock(&allprison_lock);
+ drflags = PD_LIST_SLOCKED;
+ error = jaildesc_find(td, uap->fd, &jd, &pr, &jdcred);
+ if (error)
+ goto fail;
+ drflags |= PD_DEREF;
+ /*
+ * Check file permissions using the current credentials, and
+ * operation permissions using the descriptor's credentials.
+ */
+ error = vaccess(VREG, jd->jd_mode, jd->jd_uid, jd->jd_gid, VEXEC,
+ td->td_ucred);
+ JAILDESC_UNLOCK(jd);
+ if (error == 0)
+ error = priv_check_cred(jdcred, PRIV_JAIL_ATTACH);
+ crfree(jdcred);
+ if (error)
+ goto fail;
+ mtx_lock(&pr->pr_mtx);
+ drflags |= PD_LOCKED;
+
+ /* Do not allow a process to attach to a prison that is not alive. */
+ if (!prison_isalive(pr)) {
+ error = EINVAL;
+ goto fail;
+ }
+
+ return (do_jail_attach(td, pr, drflags));
+
+ fail:
+ prison_deref(pr, drflags);
+ return (error);
+}
+
static int
do_jail_attach(struct thread *td, struct prison *pr, int drflags)
{
@@ -2697,9 +3032,12 @@
* a process root from one prison, but attached to the jail
* of another.
*/
- prison_hold(pr);
+ if (!(drflags & PD_DEREF)) {
+ prison_hold(pr);
+ drflags |= PD_DEREF;
+ }
refcount_acquire(&pr->pr_uref);
- drflags |= PD_DEREF | PD_DEUREF;
+ drflags |= PD_DEUREF;
mtx_unlock(&pr->pr_mtx);
drflags &= ~PD_LOCKED;
@@ -2709,6 +3047,7 @@
prison_deref(pr, drflags);
return (error);
}
+ jaildesc_note(pr, NOTE_JAIL_ATTACH, td->td_proc->p_pid);
sx_unlock(&allprison_lock);
drflags &= ~(PD_LIST_SLOCKED | PD_LIST_XLOCKED);
@@ -3176,6 +3515,7 @@
refcount_load(&prison0.pr_uref) > 0,
("prison0 pr_uref=0"));
pr->pr_state = PRISON_STATE_DYING;
+ jaildesc_remove_prison(pr);
mtx_unlock(&pr->pr_mtx);
flags &= ~PD_LOCKED;
prison_cleanup(pr);
@@ -3323,6 +3663,7 @@
continue;
prison_cleanup(cpr);
mtx_lock(&cpr->pr_mtx);
+ jaildesc_remove_prison(cpr);
cpr->pr_flags &= ~PR_REMOVE;
if (cpr->pr_flags & PR_PERSIST) {
cpr->pr_flags &= ~PR_PERSIST;
@@ -3359,6 +3700,7 @@
prison_cleanup(pr);
mtx_lock(&pr->pr_mtx);
+ jaildesc_remove_prison(pr);
if (pr->pr_flags & PR_PERSIST) {
pr->pr_flags &= ~PR_PERSIST;
prison_proc_free_not_last(pr);
@@ -4610,6 +4952,7 @@
* jail creation time but cannot be changed in an existing jail.
*/
SYSCTL_JAIL_PARAM(, jid, CTLTYPE_INT | CTLFLAG_RDTUN, "I", "Jail ID");
+SYSCTL_JAIL_PARAM(, desc, CTLTYPE_INT | CTLFLAG_RW, "I", "Jail descriptor");
SYSCTL_JAIL_PARAM(, parent, CTLTYPE_INT | CTLFLAG_RD, "I", "Jail parent ID");
SYSCTL_JAIL_PARAM_STRING(, name, CTLFLAG_RW, MAXHOSTNAMELEN, "Jail name");
SYSCTL_JAIL_PARAM_STRING(, path, CTLFLAG_RDTUN, MAXPATHLEN, "Jail root path");
diff --git a/sys/kern/kern_jaildesc.c b/sys/kern/kern_jaildesc.c
--- a/sys/kern/kern_jaildesc.c
+++ b/sys/kern/kern_jaildesc.c
@@ -0,0 +1,459 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 James Gritton.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/fcntl.h>
+#include <sys/file.h>
+#include <sys/filedesc.h>
+#include <sys/kernel.h>
+#include <sys/jail.h>
+#include <sys/jaildesc.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/poll.h>
+#include <sys/priv.h>
+#include <sys/stat.h>
+#include <sys/sysproto.h>
+#include <sys/systm.h>
+#include <sys/ucred.h>
+#include <sys/vnode.h>
+
+MALLOC_DEFINE(M_JAILDESC, "jaildesc", "jail descriptors");
+
+static fo_poll_t jaildesc_poll;
+static fo_kqfilter_t jaildesc_kqfilter;
+static fo_stat_t jaildesc_stat;
+static fo_close_t jaildesc_close;
+static fo_chmod_t jaildesc_chmod;
+static fo_chown_t jaildesc_chown;
+static fo_fill_kinfo_t jaildesc_fill_kinfo;
+static fo_cmp_t jaildesc_cmp;
+
+static struct fileops jaildesc_ops = {
+ .fo_read = invfo_rdwr,
+ .fo_write = invfo_rdwr,
+ .fo_truncate = invfo_truncate,
+ .fo_ioctl = invfo_ioctl,
+ .fo_poll = jaildesc_poll,
+ .fo_kqfilter = jaildesc_kqfilter,
+ .fo_stat = jaildesc_stat,
+ .fo_close = jaildesc_close,
+ .fo_chmod = jaildesc_chmod,
+ .fo_chown = jaildesc_chown,
+ .fo_sendfile = invfo_sendfile,
+ .fo_fill_kinfo = jaildesc_fill_kinfo,
+ .fo_cmp = jaildesc_cmp,
+ .fo_flags = DFLAG_PASSABLE,
+};
+
+/*
+ * Given a jail descriptor number, return the jaildesc, its prison,
+ * and its credential. The jaildesc will be returned locked, and
+ * prison and the credential will be returned held.
+ */
+int
+jaildesc_find(struct thread *td, int fd, struct jaildesc **jdp,
+ struct prison **prp, struct ucred **ucredp)
+{
+ struct file *fp;
+ struct jaildesc *jd;
+ struct prison *pr;
+ int error;
+
+ error = fget(td, fd, &cap_no_rights, &fp);
+ if (error != 0)
+ return (error);
+ if (fp->f_type != DTYPE_JAILDESC) {
+ error = EBADF;
+ goto out;
+ }
+ jd = fp->f_data;
+ JAILDESC_LOCK(jd);
+ pr = jd->jd_prison;
+ if (pr == NULL || !prison_isvalid(pr)) {
+ error = ENOENT;
+ JAILDESC_UNLOCK(jd);
+ goto out;
+ }
+ prison_hold(pr);
+ *prp = pr;
+ if (jdp != NULL)
+ *jdp = jd;
+ else
+ JAILDESC_UNLOCK(jd);
+ if (ucredp != NULL)
+ *ucredp = crhold(fp->f_cred);
+ out:
+ fdrop(fp, td);
+ return (error);
+}
+
+/*
+ * Allocate a new jail decriptor, not yet associated with a prison.
+ * Return the file pointer (with a reference held) and the descriptor
+ * number.
+ */
+int
+jaildesc_alloc(struct thread *td, struct file **fpp, int *fdp, int owning)
+{
+ struct file *fp;
+ struct jaildesc *jd;
+ int error;
+ mode_t mode;
+
+ if (owning) {
+ error = priv_check(td, PRIV_JAIL_REMOVE);
+ if (error != 0)
+ return (error);
+ mode = S_ISTXT;
+ } else
+ mode = 0;
+ jd = malloc(sizeof(*jd), M_JAILDESC, M_WAITOK | M_ZERO);
+ error = falloc_caps(td, &fp, fdp, 0, NULL);
+ finit(fp, priv_check_cred(fp->f_cred, PRIV_JAIL_SET) == 0
+ ? FREAD | FWRITE : FREAD, DTYPE_JAILDESC, jd, &jaildesc_ops);
+ if (error != 0) {
+ free(jd, M_JAILDESC);
+ return (error);
+ }
+ JAILDESC_LOCK_INIT(jd);
+ knlist_init_mtx(&jd->jd_selinfo.si_note, &jd->jd_lock);
+ jd->jd_uid = fp->f_cred->cr_uid;
+ jd->jd_gid = fp->f_cred->cr_gid;
+ jd->jd_mode = S_IFREG | S_IRUSR | S_IRGRP | S_IROTH | mode
+ | (priv_check(td, PRIV_JAIL_SET) == 0 ? S_IWUSR | S_IXUSR : 0)
+ | (priv_check(td, PRIV_JAIL_ATTACH) == 0 ? S_IXUSR : 0);
+ *fpp = fp;
+ return (0);
+}
+
+/*
+ * Assocate a jail descriptor with its prison.
+ */
+void
+jaildesc_set_prison(struct file *fp, struct prison *pr)
+{
+ struct jaildesc *jd;
+
+ mtx_assert(&pr->pr_mtx, MA_OWNED);
+ jd = fp->f_data;
+ JAILDESC_LOCK(jd);
+ jd->jd_prison = pr;
+ LIST_INSERT_HEAD(&pr->pr_descs, jd, jd_list);
+ prison_hold(pr);
+ JAILDESC_UNLOCK(jd);
+}
+
+/*
+ * Note prison removal, and detach the all jail descriptors from a prison.
+ */
+void
+jaildesc_remove_prison(struct prison *pr)
+{
+ struct jaildesc *jd;
+
+ mtx_assert(&pr->pr_mtx, MA_OWNED);
+ jaildesc_note(pr, NOTE_JAIL_REMOVE, 0);
+ while ((jd = LIST_FIRST(&pr->pr_descs))) {
+ JAILDESC_LOCK(jd);
+ LIST_REMOVE(jd, jd_list);
+ jd->jd_prison = NULL;
+ JAILDESC_UNLOCK(jd);
+ prison_free(pr);
+ }
+}
+
+/*
+ * Pass a note to all listening kqueues.
+ */
+void
+jaildesc_note(struct prison *pr, long hint, int64_t data)
+{
+ struct jaildesc *jd;
+ int prison_locked;
+
+ if (!LIST_EMPTY(&pr->pr_descs)) {
+ prison_locked = mtx_owned(&pr->pr_mtx);
+ if (!prison_locked)
+ prison_lock(pr);
+ LIST_FOREACH(jd, &pr->pr_descs, jd_list) {
+ JAILDESC_LOCK(jd);
+ jd->jd_event_data = data;
+ if (hint == NOTE_JAIL_REMOVE) {
+ jd->jd_flags |= JDF_REMOVED;
+ if (jd->jd_flags & JDF_SELECTED) {
+ jd->jd_flags &= ~JDF_SELECTED;
+ selwakeup(&jd->jd_selinfo);
+ }
+ }
+ KNOTE_LOCKED(&jd->jd_selinfo.si_note, hint);
+ JAILDESC_UNLOCK(jd);
+ }
+ if (!prison_locked)
+ prison_unlock(pr);
+ }
+}
+
+static int
+jaildesc_close(struct file *fp, struct thread *td)
+{
+ struct jaildesc *jd;
+ struct prison *pr;
+
+ jd = fp->f_data;
+ fp->f_data = NULL;
+ if (jd != NULL) {
+ JAILDESC_LOCK(jd);
+ pr = jd->jd_prison;
+ if (pr != NULL) {
+ /*
+ * Free or remove the associated prison.
+ * This requires a second check after re-
+ * ordering locks. This jaildesc can remain
+ * unlocked once we have a prison reference,
+ * because that prison is the only place that
+ * still points back to it.
+ */
+ prison_hold(pr);
+ JAILDESC_UNLOCK(jd);
+ if (jd->jd_mode & S_ISTXT) {
+ sx_xlock(&allprison_lock);
+ prison_lock(pr);
+ if (jd->jd_prison != NULL) {
+ /*
+ * Unlink the prison, but don't free
+ * it; that will be done as part of
+ * of prison_remove.
+ */
+ LIST_REMOVE(jd, jd_list);
+ prison_remove(pr);
+ } else {
+ prison_unlock(pr);
+ sx_xunlock(&allprison_lock);
+ }
+ } else {
+ prison_lock(pr);
+ if (jd->jd_prison != NULL) {
+ LIST_REMOVE(jd, jd_list);
+ prison_free(pr);
+ }
+ prison_unlock(pr);
+ }
+ prison_free(pr);
+ }
+ knlist_destroy(&jd->jd_selinfo.si_note);
+ JAILDESC_LOCK_DESTROY(jd);
+ free(jd, M_JAILDESC);
+ }
+ finit(fp, 0, DTYPE_NONE, NULL, &badfileops);
+ return (0);
+}
+
+static int
+jaildesc_poll(struct file *fp, int events, struct ucred *active_cred,
+ struct thread *td)
+{
+ struct jaildesc *jd;
+ int revents;
+
+ revents = 0;
+ jd = fp->f_data;
+ JAILDESC_LOCK(jd);
+ if (jd->jd_flags & JDF_REMOVED)
+ revents |= POLLHUP;
+ if (revents == 0) {
+ selrecord(td, &jd->jd_selinfo);
+ jd->jd_flags |= JDF_SELECTED;
+ }
+ JAILDESC_UNLOCK(jd);
+ return (revents);
+}
+
+static void
+jaildesc_kqops_detach(struct knote *kn)
+{
+ struct jaildesc *jd;
+
+ jd = kn->kn_fp->f_data;
+ knlist_remove(&jd->jd_selinfo.si_note, kn, 0);
+}
+
+static int
+jaildesc_kqops_event(struct knote *kn, long hint)
+{
+ struct jaildesc *jd;
+ u_int event;
+
+ jd = kn->kn_fp->f_data;
+ kn->kn_data = jd->jd_event_data;
+ if (hint == 0) {
+ /*
+ * Initial test after registration. Generate a
+ * NOTE_JAIL_REMOVE in case the prison already died
+ * before registration.
+ */
+ event = jd->jd_flags & JDF_REMOVED ? NOTE_JAIL_REMOVE : 0;
+ } else
+ event = (u_int)hint;
+
+ /* If the user is interested in this event, record it. */
+ if (kn->kn_sfflags & event)
+ kn->kn_fflags |= event;
+
+ /* Prison is gone, so flag the event as finished. */
+ if (event == NOTE_JAIL_REMOVE) {
+ kn->kn_flags |= EV_EOF | EV_ONESHOT;
+ if (kn->kn_fflags == 0)
+ kn->kn_flags |= EV_DROP;
+ return (1);
+ }
+
+ return (kn->kn_fflags != 0);
+}
+
+static const struct filterops jaildesc_kqops = {
+ .f_isfd = 1,
+ .f_detach = jaildesc_kqops_detach,
+ .f_event = jaildesc_kqops_event,
+};
+
+static int
+jaildesc_kqfilter(struct file *fp, struct knote *kn)
+{
+ struct jaildesc *jd;
+
+ jd = fp->f_data;
+ switch (kn->kn_filter) {
+ case EVFILT_JAILDESC:
+ kn->kn_fop = &jaildesc_kqops;
+ kn->kn_flags |= EV_CLEAR;
+ knlist_add(&jd->jd_selinfo.si_note, kn, 0);
+ return (0);
+ default:
+ return (EINVAL);
+ }
+}
+
+static int
+jaildesc_stat(struct file *fp, struct stat *sb, struct ucred *active_cred)
+{
+ struct jaildesc *jd;
+
+ bzero(sb, sizeof(struct stat));
+ jd = fp->f_data;
+ JAILDESC_LOCK(jd);
+ if (jd->jd_prison != NULL) {
+ sb->st_ino = jd->jd_prison ? jd->jd_prison->pr_id : 0;
+ sb->st_uid = jd->jd_uid;
+ sb->st_gid = jd->jd_gid;
+ sb->st_mode = jd->jd_mode;
+ } else
+ sb->st_mode = S_IFREG;
+ JAILDESC_UNLOCK(jd);
+ return (0);
+}
+
+static int
+jaildesc_chmod(struct file *fp, mode_t mode, struct ucred *active_cred,
+ struct thread *td)
+{
+ struct jaildesc *jd;
+ int error;
+
+ /* Reject permissions that the creator doesn't have. */
+ if (((mode & (S_IWUSR | S_IWGRP | S_IWOTH))
+ && priv_check_cred(fp->f_cred, PRIV_JAIL_SET) != 0)
+ || ((mode & (S_IXUSR | S_IXGRP | S_IXOTH))
+ && priv_check_cred(fp->f_cred, PRIV_JAIL_ATTACH) != 0
+ && priv_check_cred(fp->f_cred, PRIV_JAIL_SET) != 0)
+ || ((mode & S_ISTXT)
+ && priv_check_cred(fp->f_cred, PRIV_JAIL_REMOVE) != 0))
+ return (EPERM);
+ if (mode & (S_ISUID | S_ISGID))
+ return (EINVAL);
+ jd = fp->f_data;
+ JAILDESC_LOCK(jd);
+ error = vaccess(VREG, jd->jd_mode, jd->jd_uid, jd->jd_gid, VADMIN,
+ active_cred);
+ if (error == 0)
+ jd->jd_mode = S_IFREG | (mode & ALLPERMS);
+ JAILDESC_UNLOCK(jd);
+ return (error);
+}
+
+static int
+jaildesc_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred,
+ struct thread *td)
+{
+ struct jaildesc *jd;
+ int error;
+
+ error = 0;
+ jd = fp->f_data;
+ JAILDESC_LOCK(jd);
+ if (uid == (uid_t)-1)
+ uid = jd->jd_uid;
+ if (gid == (gid_t)-1)
+ gid = jd->jd_gid;
+ if ((uid != jd->jd_uid && uid != active_cred->cr_uid) ||
+ (gid != jd->jd_gid && !groupmember(gid, active_cred)))
+ error = priv_check_cred(active_cred, PRIV_VFS_CHOWN);
+ if (error == 0) {
+ jd->jd_uid = uid;
+ jd->jd_gid = gid;
+ }
+ JAILDESC_UNLOCK(jd);
+ return (error);
+}
+
+static int
+jaildesc_fill_kinfo(struct file *fp, struct kinfo_file *kif,
+ struct filedesc *fdp)
+{
+ return (EINVAL);
+}
+
+static int
+jaildesc_cmp(struct file *fp1, struct file *fp2, struct thread *td)
+{
+ struct jaildesc *jd1, *jd2;
+ int jid1, jid2;
+
+ if (fp2->f_type != DTYPE_JAILDESC)
+ return (3);
+ jd1 = fp1->f_data;
+ JAILDESC_LOCK(jd1);
+ jid1 = jd1->jd_prison ? (uintptr_t)jd1->jd_prison->pr_id : 0;
+ JAILDESC_UNLOCK(jd1);
+ jd2 = fp2->f_data;
+ JAILDESC_LOCK(jd2);
+ jid2 = jd2->jd_prison ? (uintptr_t)jd2->jd_prison->pr_id : 0;
+ JAILDESC_UNLOCK(jd2);
+ return (kcmp_cmp(jid1, jid2));
+}
diff --git a/sys/kern/syscalls.c b/sys/kern/syscalls.c
--- a/sys/kern/syscalls.c
+++ b/sys/kern/syscalls.c
@@ -600,4 +600,6 @@
"exterrctl", /* 592 = exterrctl */
"inotify_add_watch_at", /* 593 = inotify_add_watch_at */
"inotify_rm_watch", /* 594 = inotify_rm_watch */
+ "jail_attach_jd", /* 595 = jail_attach_jd */
+ "jail_remove_jd", /* 596 = jail_remove_jd */
};
diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master
--- a/sys/kern/syscalls.master
+++ b/sys/kern/syscalls.master
@@ -3370,5 +3370,15 @@
int wd
);
}
+595 AUE_JAIL_ATTACH STD {
+ int jail_attach_jd(
+ int fd
+ );
+ }
+596 AUE_JAIL_REMOVE STD {
+ int jail_remove_jd(
+ int fd
+ );
+ }
; vim: syntax=off
diff --git a/sys/kern/systrace_args.c b/sys/kern/systrace_args.c
--- a/sys/kern/systrace_args.c
+++ b/sys/kern/systrace_args.c
@@ -3500,6 +3500,20 @@
*n_args = 2;
break;
}
+ /* jail_attach_jd */
+ case 595: {
+ struct jail_attach_jd_args *p = params;
+ iarg[a++] = p->fd; /* int */
+ *n_args = 1;
+ break;
+ }
+ /* jail_remove_jd */
+ case 596: {
+ struct jail_remove_jd_args *p = params;
+ iarg[a++] = p->fd; /* int */
+ *n_args = 1;
+ break;
+ }
default:
*n_args = 0;
break;
@@ -9367,6 +9381,26 @@
break;
};
break;
+ /* jail_attach_jd */
+ case 595:
+ switch (ndx) {
+ case 0:
+ p = "int";
+ break;
+ default:
+ break;
+ };
+ break;
+ /* jail_remove_jd */
+ case 596:
+ switch (ndx) {
+ case 0:
+ p = "int";
+ break;
+ default:
+ break;
+ };
+ break;
default:
break;
};
@@ -11365,6 +11399,16 @@
if (ndx == 0 || ndx == 1)
p = "int";
break;
+ /* jail_attach_jd */
+ case 595:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
+ /* jail_remove_jd */
+ case 596:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
default:
break;
};
diff --git a/sys/sys/event.h b/sys/sys/event.h
--- a/sys/sys/event.h
+++ b/sys/sys/event.h
@@ -45,7 +45,8 @@
#define EVFILT_USER (-11) /* User events */
#define EVFILT_SENDFILE (-12) /* attached to sendfile requests */
#define EVFILT_EMPTY (-13) /* empty send socket buf */
-#define EVFILT_SYSCOUNT 13
+#define EVFILT_JAILDESC (-14) /* attached to jail descriptors */
+#define EVFILT_SYSCOUNT 14
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
#define EV_SET(kevp_, a, b, c, d, e, f) do { \
@@ -216,6 +217,12 @@
#define NOTE_NSECONDS 0x00000008 /* data is nanoseconds */
#define NOTE_ABSTIME 0x00000010 /* timeout is absolute */
+/* additional flags for EVFILT_JAIL */
+#define NOTE_JAIL_SET 0x00000001 /* jail was modified */
+#define NOTE_JAIL_ATTACH 0x00000002 /* jail was attached to */
+#define NOTE_JAIL_REMOVE 0x00000004 /* jail was removed */
+#define NOTE_JAIL_CREATE 0x00000008 /* a child jail was created */
+
/* Flags for kqueuex(2) */
#define KQUEUE_CLOEXEC 0x00000001 /* close on exec */
diff --git a/sys/sys/file.h b/sys/sys/file.h
--- a/sys/sys/file.h
+++ b/sys/sys/file.h
@@ -72,6 +72,7 @@
#define DTYPE_EVENTFD 13 /* eventfd */
#define DTYPE_TIMERFD 14 /* timerfd */
#define DTYPE_INOTIFY 15 /* inotify descriptor */
+#define DTYPE_JAILDESC 16 /* jail descriptor */
#ifdef _KERNEL
diff --git a/sys/sys/jail.h b/sys/sys/jail.h
--- a/sys/sys/jail.h
+++ b/sys/sys/jail.h
@@ -99,8 +99,12 @@
#define JAIL_UPDATE 0x02 /* Update parameters of existing jail */
#define JAIL_ATTACH 0x04 /* Attach to jail upon creation */
#define JAIL_DYING 0x08 /* Allow getting a dying jail */
-#define JAIL_SET_MASK 0x0f /* JAIL_DYING is deprecated/ignored here */
-#define JAIL_GET_MASK 0x08
+#define JAIL_USE_DESC 0x10 /* Get/set jail in descriptor */
+#define JAIL_AT_DESC 0x20 /* Find/add jail under descriptor */
+#define JAIL_GET_DESC 0x40 /* Return a new jail descriptor */
+#define JAIL_OWN_DESC 0x80 /* Return a new owning jail descriptor */
+#define JAIL_SET_MASK 0xff /* JAIL_DYING is deprecated/ignored here */
+#define JAIL_GET_MASK 0xf8
#define JAIL_SYS_DISABLE 0
#define JAIL_SYS_NEW 1
@@ -115,7 +119,9 @@
int jail_set(struct iovec *, unsigned int, int);
int jail_get(struct iovec *, unsigned int, int);
int jail_attach(int);
+int jail_attach_jd(int);
int jail_remove(int);
+int jail_remove_jd(int);
__END_DECLS
#else /* _KERNEL */
@@ -144,6 +150,7 @@
#define JAIL_META_PRIVATE "meta"
#define JAIL_META_SHARED "env"
+struct jaildesc;
struct racct;
struct prison_racct;
@@ -189,7 +196,8 @@
struct vnode *pr_root; /* (c) vnode to rdir */
struct prison_ip *pr_addrs[PR_FAMILY_MAX]; /* (p,n) IPs of jail */
struct prison_racct *pr_prison_racct; /* (c) racct jail proxy */
- void *pr_sparep[3];
+ LIST_HEAD(, jaildesc) pr_descs; /* (a) attached descriptors */
+ void *pr_sparep[2];
int pr_childcount; /* (a) number of child jails */
int pr_childmax; /* (p) maximum child jails */
unsigned pr_allow; /* (p) PR_ALLOW_* flags */
@@ -452,6 +460,7 @@
void prison_proc_link(struct prison *, struct proc *);
void prison_proc_unlink(struct prison *, struct proc *);
void prison_proc_iterate(struct prison *, void (*)(struct proc *, void *), void *);
+void prison_remove(struct prison *);
void prison_set_allow(struct ucred *cred, unsigned flag, int enable);
bool prison_ischild(struct prison *, struct prison *);
bool prison_isalive(const struct prison *);
diff --git a/sys/sys/jaildesc.h b/sys/sys/jaildesc.h
--- a/sys/sys/jaildesc.h
+++ b/sys/sys/jaildesc.h
@@ -0,0 +1,90 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 James Gritton.
+ * All rights reserved.
+ *
+ * This software was developed at the University of Cambridge Computer
+ * Laboratory with support from a grant from Google, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _SYS_JAILDESC_H_
+#define _SYS_JAILDESC_H_
+
+#ifdef _KERNEL
+
+#include <sys/selinfo.h>
+#include <sys/queue.h>
+#include <sys/_lock.h>
+#include <sys/_mutex.h>
+#include <sys/_types.h>
+
+struct prison;
+
+/*-
+ * struct jaildesc describes a jail descriptor, which points to a struct
+ * prison. struct prison in turn has a linked list of struct jaildesc.
+ *
+ * Locking key:
+ * (c) set on creation, remains unchanged
+ * (d) jd_lock
+ * (p) jd_prison->pr_mtx
+ */
+struct jaildesc {
+ LIST_ENTRY(jaildesc) jd_list; /* (d,p) this prison's descs */
+ struct prison *jd_prison; /* (d) the prison */
+ struct mtx jd_lock;
+ struct selinfo jd_selinfo; /* (d) event notification */
+ int64_t jd_event_data; /* (d) data to pass in kevent */
+ uid_t jd_uid; /* (d) nominal file owner */
+ gid_t jd_gid; /* (d) nominal file group */
+ mode_t jd_mode; /* (d) descriptor permissions */
+ unsigned jd_flags; /* (d) JDF_* flags */
+};
+
+/*
+ * Locking macros for the jaildesc.
+ */
+#define JAILDESC_LOCK_DESTROY(jd) mtx_destroy(&(jd)->jd_lock)
+#define JAILDESC_LOCK_INIT(jd) mtx_init(&(jd)->jd_lock, "jaildesc", \
+ NULL, MTX_DEF)
+#define JAILDESC_LOCK(jd) mtx_lock(&(jd)->jd_lock)
+#define JAILDESC_UNLOCK(jd) mtx_unlock(&(jd)->jd_lock)
+
+/*
+ * Flags for the jd_flags field
+ */
+#define JDF_SELECTED 0x00000001 /* issue selwakeup() */
+#define JDF_REMOVED 0x00000002 /* jail was removed */
+
+int jaildesc_find(struct thread *td, int fd, struct jaildesc **jdp,
+ struct prison **prp, struct ucred **ucredp);
+int jaildesc_alloc(struct thread *td, struct file **fpp, int *fdp, int owning);
+void jaildesc_set_prison(struct file *jd, struct prison *pr);
+void jaildesc_remove_prison(struct prison *pr);
+void jaildesc_note(struct prison *pr, long hint, int64_t data);
+
+#endif /* _KERNEL */
+
+#endif /* !_SYS_JAILDESC_H_ */
diff --git a/sys/sys/syscall.h b/sys/sys/syscall.h
--- a/sys/sys/syscall.h
+++ b/sys/sys/syscall.h
@@ -531,4 +531,6 @@
#define SYS_exterrctl 592
#define SYS_inotify_add_watch_at 593
#define SYS_inotify_rm_watch 594
-#define SYS_MAXSYSCALL 595
+#define SYS_jail_attach_jd 595
+#define SYS_jail_remove_jd 596
+#define SYS_MAXSYSCALL 597
diff --git a/sys/sys/syscall.mk b/sys/sys/syscall.mk
--- a/sys/sys/syscall.mk
+++ b/sys/sys/syscall.mk
@@ -436,4 +436,6 @@
setcred.o \
exterrctl.o \
inotify_add_watch_at.o \
- inotify_rm_watch.o
+ inotify_rm_watch.o \
+ jail_attach_jd.o \
+ jail_remove_jd.o
diff --git a/sys/sys/sysproto.h b/sys/sys/sysproto.h
--- a/sys/sys/sysproto.h
+++ b/sys/sys/sysproto.h
@@ -1901,6 +1901,12 @@
char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
char wd_l_[PADL_(int)]; int wd; char wd_r_[PADR_(int)];
};
+struct jail_attach_jd_args {
+ char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
+};
+struct jail_remove_jd_args {
+ char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
+};
int sys_exit(struct thread *, struct exit_args *);
int sys_fork(struct thread *, struct fork_args *);
int sys_read(struct thread *, struct read_args *);
@@ -2305,6 +2311,8 @@
int sys_exterrctl(struct thread *, struct exterrctl_args *);
int sys_inotify_add_watch_at(struct thread *, struct inotify_add_watch_at_args *);
int sys_inotify_rm_watch(struct thread *, struct inotify_rm_watch_args *);
+int sys_jail_attach_jd(struct thread *, struct jail_attach_jd_args *);
+int sys_jail_remove_jd(struct thread *, struct jail_remove_jd_args *);
#ifdef COMPAT_43
@@ -3289,6 +3297,8 @@
#define SYS_AUE_exterrctl AUE_NULL
#define SYS_AUE_inotify_add_watch_at AUE_INOTIFY
#define SYS_AUE_inotify_rm_watch AUE_INOTIFY
+#define SYS_AUE_jail_attach_jd AUE_JAIL_ATTACH
+#define SYS_AUE_jail_remove_jd AUE_JAIL_REMOVE
#undef PAD_
#undef PADL_
diff --git a/sys/sys/user.h b/sys/sys/user.h
--- a/sys/sys/user.h
+++ b/sys/sys/user.h
@@ -266,6 +266,7 @@
#define KF_TYPE_EVENTFD 13
#define KF_TYPE_TIMERFD 14
#define KF_TYPE_INOTIFY 15
+#define KF_TYPE_JAILDESC 16
#define KF_TYPE_UNKNOWN 255
#define KF_VTYPE_VNON 0
@@ -452,6 +453,9 @@
uint32_t kf_timerfd_flags;
uint64_t kf_timerfd_addr;
} kf_timerfd;
+ struct {
+ int32_t kf_jid;
+ } kf_jail;
struct {
uint64_t kf_kqueue_addr;
int32_t kf_kqueue_count;

File Metadata

Mime Type
text/plain
Expires
Sat, Oct 11, 11:30 PM (3 h, 25 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
23597586
Default Alt Text
D43696.id159468.diff (50 KB)

Event Timeline