diff --git a/lib/libsys/Symbol.sys.map b/lib/libsys/Symbol.sys.map
--- a/lib/libsys/Symbol.sys.map
+++ b/lib/libsys/Symbol.sys.map
@@ -380,6 +380,7 @@
 FBSD_1.8 {
 	getrlimitusage;
 	kcmp;
+	setcred;
 };
 
 FBSDprivate_1.0 {
diff --git a/lib/libsys/_libsys.h b/lib/libsys/_libsys.h
--- a/lib/libsys/_libsys.h
+++ b/lib/libsys/_libsys.h
@@ -463,6 +463,7 @@
 typedef int (__sys_timerfd_settime_t)(int, int, const struct itimerspec *, struct itimerspec *);
 typedef int (__sys_kcmp_t)(pid_t, pid_t, int, uintptr_t, uintptr_t);
 typedef int (__sys_getrlimitusage_t)(u_int, int, rlim_t *);
+typedef int (__sys_setcred_t)(u_int, const void *, size_t);
 
 void __sys_exit(int rval);
 int __sys_fork(void);
@@ -863,6 +864,7 @@
 int __sys_timerfd_settime(int fd, int flags, const struct itimerspec * new_value, struct itimerspec * old_value);
 int __sys_kcmp(pid_t pid1, pid_t pid2, int type, uintptr_t idx1, uintptr_t idx2);
 int __sys_getrlimitusage(u_int which, int flags, rlim_t * res);
+int __sys_setcred(u_int flags, const void * wcred, size_t size);
 __END_DECLS
 
 #endif /* __LIBSYS_H_ */
diff --git a/lib/libsys/syscalls.map b/lib/libsys/syscalls.map
--- a/lib/libsys/syscalls.map
+++ b/lib/libsys/syscalls.map
@@ -805,4 +805,6 @@
 	__sys_kcmp;
 	_getrlimitusage;
 	__sys_getrlimitusage;
+	_setcred;
+	__sys_setcred;
 };
diff --git a/sys/bsm/audit_kevents.h b/sys/bsm/audit_kevents.h
--- a/sys/bsm/audit_kevents.h
+++ b/sys/bsm/audit_kevents.h
@@ -662,6 +662,7 @@
 #define	AUE_AIO_READV		43268	/* FreeBSD-specific. */
 #define	AUE_FSPACECTL		43269	/* FreeBSD-specific. */
 #define	AUE_TIMERFD		43270	/* FreeBSD/Linux. */
+#define	AUE_SETCRED		43271	/* FreeBSD-specific. */
 
 /*
  * Darwin BSM uses a number of AUE_O_* definitions, which are aliased to the
diff --git a/sys/compat/freebsd32/freebsd32_syscall.h b/sys/compat/freebsd32/freebsd32_syscall.h
--- a/sys/compat/freebsd32/freebsd32_syscall.h
+++ b/sys/compat/freebsd32/freebsd32_syscall.h
@@ -508,4 +508,5 @@
 #define	FREEBSD32_SYS_freebsd32_timerfd_settime	587
 #define	FREEBSD32_SYS_kcmp	588
 #define	FREEBSD32_SYS_getrlimitusage	589
-#define	FREEBSD32_SYS_MAXSYSCALL	590
+#define	FREEBSD32_SYS_setcred	590
+#define	FREEBSD32_SYS_MAXSYSCALL	591
diff --git a/sys/compat/freebsd32/freebsd32_syscalls.c b/sys/compat/freebsd32/freebsd32_syscalls.c
--- a/sys/compat/freebsd32/freebsd32_syscalls.c
+++ b/sys/compat/freebsd32/freebsd32_syscalls.c
@@ -595,4 +595,5 @@
 	"freebsd32_timerfd_settime",			/* 587 = freebsd32_timerfd_settime */
 	"kcmp",			/* 588 = kcmp */
 	"getrlimitusage",			/* 589 = getrlimitusage */
+	"setcred",			/* 590 = setcred */
 };
diff --git a/sys/compat/freebsd32/freebsd32_sysent.c b/sys/compat/freebsd32/freebsd32_sysent.c
--- a/sys/compat/freebsd32/freebsd32_sysent.c
+++ b/sys/compat/freebsd32/freebsd32_sysent.c
@@ -657,4 +657,5 @@
 	{ .sy_narg = AS(freebsd32_timerfd_settime_args), .sy_call = (sy_call_t *)freebsd32_timerfd_settime, .sy_auevent = AUE_TIMERFD, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC },	/* 587 = freebsd32_timerfd_settime */
 	{ .sy_narg = AS(kcmp_args), .sy_call = (sy_call_t *)sys_kcmp, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC },	/* 588 = kcmp */
 	{ .sy_narg = AS(getrlimitusage_args), .sy_call = (sy_call_t *)sys_getrlimitusage, .sy_auevent = AUE_NULL, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC },	/* 589 = getrlimitusage */
+	{ .sy_narg = AS(setcred_args), .sy_call = (sy_call_t *)sys_setcred, .sy_auevent = AUE_SETCRED, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC },	/* 590 = setcred */
 };
diff --git a/sys/compat/freebsd32/freebsd32_systrace_args.c b/sys/compat/freebsd32/freebsd32_systrace_args.c
--- a/sys/compat/freebsd32/freebsd32_systrace_args.c
+++ b/sys/compat/freebsd32/freebsd32_systrace_args.c
@@ -3378,6 +3378,15 @@
 		*n_args = 3;
 		break;
 	}
+	/* setcred */
+	case 590: {
+		struct setcred_args *p = params;
+		uarg[a++] = p->flags; /* u_int */
+		uarg[a++] = (intptr_t)p->wcred; /* const void * */
+		uarg[a++] = p->size; /* size_t */
+		*n_args = 3;
+		break;
+	}
 	default:
 		*n_args = 0;
 		break;
@@ -9126,6 +9135,22 @@
 			break;
 		};
 		break;
+	/* setcred */
+	case 590:
+		switch (ndx) {
+		case 0:
+			p = "u_int";
+			break;
+		case 1:
+			p = "userland const void *";
+			break;
+		case 2:
+			p = "size_t";
+			break;
+		default:
+			break;
+		};
+		break;
 	default:
 		break;
 	};
@@ -11014,6 +11039,11 @@
 		if (ndx == 0 || ndx == 1)
 			p = "int";
 		break;
+	/* setcred */
+	case 590:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
 	default:
 		break;
 	};
diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c
--- a/sys/kern/init_sysent.c
+++ b/sys/kern/init_sysent.c
@@ -656,4 +656,5 @@
 	{ .sy_narg = AS(timerfd_settime_args), .sy_call = (sy_call_t *)sys_timerfd_settime, .sy_auevent = AUE_TIMERFD, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC },	/* 587 = timerfd_settime */
 	{ .sy_narg = AS(kcmp_args), .sy_call = (sy_call_t *)sys_kcmp, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC },	/* 588 = kcmp */
 	{ .sy_narg = AS(getrlimitusage_args), .sy_call = (sy_call_t *)sys_getrlimitusage, .sy_auevent = AUE_NULL, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC },	/* 589 = getrlimitusage */
+	{ .sy_narg = AS(setcred_args), .sy_call = (sy_call_t *)sys_setcred, .sy_auevent = AUE_SETCRED, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC },	/* 590 = setcred */
 };
diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c
--- a/sys/kern/kern_jail.c
+++ b/sys/kern/kern_jail.c
@@ -3956,6 +3956,7 @@
 		 * Allow jailed processes to manipulate process UNIX
 		 * credentials in any way they see fit.
 		 */
+	case PRIV_CRED_SETCRED:
 	case PRIV_CRED_SETUID:
 	case PRIV_CRED_SETEUID:
 	case PRIV_CRED_SETGID:
diff --git a/sys/kern/kern_prot.c b/sys/kern/kern_prot.c
--- a/sys/kern/kern_prot.c
+++ b/sys/kern/kern_prot.c
@@ -73,6 +73,10 @@
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 
+#ifdef MAC
+#include <security/mac/mac_syscalls.h>
+#endif
+
 #include <vm/uma.h>
 
 #ifdef REGRESSION
@@ -484,6 +488,303 @@
 	return (error);
 }
 
+static int
+gidp_cmp(const void *p1, const void *p2)
+{
+	const gid_t g1 = *(const gid_t *)p1;
+	const gid_t g2 = *(const gid_t *)p2;
+
+	return ((g1 > g2) - (g1 < g2));
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct setcred_args {
+	u_int		 flags;		/* Flags, including version. */
+	const void	*wcred;		/* Some setcred_vX structure. */
+	size_t		 size;		/* Length of setcred_vX structure. */
+};
+#endif
+int
+sys_setcred(struct thread *td, struct setcred_args *uap)
+{
+	const u_int flags = uap->flags;
+	const void *const uwcred = uap->wcred;
+	const size_t size = uap->size;
+	struct setcred_v0 wcred;
+#ifdef MAC
+	struct mac mac;
+#endif
+	gid_t smallgroups[CRED_SMALLGROUPS_NB];
+	gid_t *groups = NULL;
+	int error;
+
+	/*
+	 * As the only point of this wrapper function is to copyin() from
+	 * userland, we only interpret the data pieces we need to perform this
+	 * operation and defer further sanity checks to kern_setcred_vX().
+	 */
+
+	/* There is only one version for now. */
+	if (SETCREDF_TO_VERSION(flags) != 0 || size != sizeof(wcred))
+		return (EINVAL);
+
+	error = copyin(uwcred, &wcred, sizeof(wcred));
+	if (error != 0)
+		return (error);
+
+	if (flags & SETCREDF_SUPP_GROUPS) {
+		/*
+		 * Check for the limit of number of groups right now in order to
+		 * limit the amount of bytes to copy.
+		 */
+		if (wcred.sc_supp_groups_nb > ngroups_max)
+			return (EINVAL);
+
+		/*
+		 * Since we are going to be copying the supplementary groups
+		 * from userland, make room also for the effective GID right
+		 * now, to avoid having to allocate and copy again the
+		 * supplementary groups.
+		 */
+		groups = wcred.sc_supp_groups_nb < CRED_SMALLGROUPS_NB ?
+		    smallgroups : malloc((wcred.sc_supp_groups_nb + 1) *
+		    sizeof(*groups), M_TEMP, M_WAITOK);
+
+		error = copyin(wcred.sc_supp_groups, groups + 1,
+		    wcred.sc_supp_groups_nb * sizeof(*groups));
+		if (error != 0)
+			goto finish;
+		wcred.sc_supp_groups = groups + 1;
+	} else {
+		wcred.sc_supp_groups_nb = 0;
+		wcred.sc_supp_groups = NULL;
+	}
+
+#ifdef MAC
+	if (flags & SETCREDF_MAC_LABEL) {
+		error = mac_label_copyin(wcred.sc_label, &mac, NULL);
+		if (error != 0) {
+			wcred.sc_label = NULL;
+			goto finish;
+		}
+		wcred.sc_label = &mac;
+	} else
+		wcred.sc_label = NULL;
+#endif
+
+	error = kern_setcred_v0(td, flags, &wcred, groups);
+
+finish:
+	if (groups != NULL && groups != smallgroups)
+		free(groups, M_TEMP);
+#ifdef MAC
+	if (wcred.sc_label != NULL)
+		free_copied_label(wcred.sc_label);
+#endif
+	return (error);
+}
+
+/*
+ * CAUTION: This function normalizes groups in 'wcred'.
+ *
+ * If 'preallocated_groups' is non-NULL, it must be an already allocated array
+ * of size 'wcred->sc_supp_groups_nb + 1', with the supplementary groups
+ * starting at index 1, and 'wcred->sc_supp_groups' then must point to the first
+ * supplementary group.
+ */
+int
+kern_setcred_v0(struct thread *const td, const u_int flags,
+    struct setcred_v0 *const wcred, gid_t *preallocated_groups)
+{
+	struct proc *const p = td->td_proc;
+	struct ucred *new_cred, *old_cred, *to_free_cred;
+	struct uidinfo *uip = NULL, *ruip = NULL;
+#ifdef MAC
+	void *mac_set_proc_data = NULL;
+	bool proc_label_set = false;
+#endif
+	gid_t *groups = NULL;
+	gid_t smallgroups[CRED_SMALLGROUPS_NB];
+	int error;
+	bool cred_set;
+
+	MPASS(SETCREDF_TO_VERSION(flags) == 0);
+	/* Bail out on unrecognized flags. */
+	if (flags & ~SETCREDF_MASK)
+		return (EINVAL);
+
+	/*
+	 * Part 1: We allocate and perform preparatory operations with no locks.
+	 */
+
+	if (flags & SETCREDF_SUPP_GROUPS) {
+		if (wcred->sc_supp_groups_nb > ngroups_max)
+			return (EINVAL);
+		if (preallocated_groups != NULL) {
+			groups = preallocated_groups;
+			MPASS(preallocated_groups + 1 == wcred->sc_supp_groups);
+		} else {
+			groups = wcred->sc_supp_groups_nb < CRED_SMALLGROUPS_NB ?
+			    smallgroups :
+			    malloc((wcred->sc_supp_groups_nb + 1) *
+			    sizeof(*groups), M_TEMP, M_WAITOK);
+			memcpy(groups + 1, wcred->sc_supp_groups,
+			    wcred->sc_supp_groups_nb * sizeof(*groups));
+		}
+	}
+
+	if (flags & SETCREDF_MAC_LABEL) {
+#ifdef MAC
+		error = mac_set_proc_prepare(td, wcred->sc_label,
+		    &mac_set_proc_data);
+		if (error != 0)
+			goto free_groups;
+#else
+		error = ENOTSUP;
+		goto free_groups;
+#endif
+	}
+
+	if (flags & SETCREDF_UID) {
+		AUDIT_ARG_EUID(wcred->sc_uid);
+		uip = uifind(wcred->sc_uid);
+	}
+	if (flags & SETCREDF_RUID) {
+		AUDIT_ARG_RUID(wcred->sc_ruid);
+		ruip = uifind(wcred->sc_ruid);
+	}
+	if (flags & SETCREDF_SVUID)
+		AUDIT_ARG_SUID(wcred->sc_svuid);
+
+	if (flags & SETCREDF_GID)
+		AUDIT_ARG_EGID(wcred->sc_gid);
+	if (flags & SETCREDF_RGID)
+		AUDIT_ARG_RGID(wcred->sc_rgid);
+	if (flags & SETCREDF_SVGID)
+		AUDIT_ARG_SGID(wcred->sc_svgid);
+	if (flags & SETCREDF_SUPP_GROUPS) {
+		int ngrp = wcred->sc_supp_groups_nb;
+
+		/*
+		 * Output the raw supplementary groups array for better
+		 * traceability.
+		 */
+		AUDIT_ARG_GROUPSET(groups + 1, ngrp);
+		++ngrp;
+		groups_normalize(&ngrp, groups);
+		wcred->sc_supp_groups_nb = ngrp - 1;
+	}
+
+	/*
+	 * We first completely build the new credentials and only then pass them
+	 * to MAC along with the old ones so that modules can check whether the
+	 * requested transition is allowed.
+	 */
+	new_cred = crget();
+	to_free_cred = new_cred;
+	if (flags & SETCREDF_SUPP_GROUPS)
+		crextend(new_cred, wcred->sc_supp_groups_nb + 1);
+
+#ifdef MAC
+	mac_cred_setcred_enter();
+#endif
+
+	/*
+	 * Part 2: We grab the process lock as to have a stable view of its
+	 * current credentials, and prepare a copy of them with the requested
+	 * changes applied under that lock.
+	 */
+
+	PROC_LOCK(p);
+	old_cred = crcopysafe(p, new_cred);
+
+	/*
+	 * Change user IDs.
+	 */
+	if (flags & SETCREDF_UID)
+		change_euid(new_cred, uip);
+	if (flags & SETCREDF_RUID)
+		change_ruid(new_cred, ruip);
+	if (flags & SETCREDF_SVUID)
+		change_svuid(new_cred, wcred->sc_svuid);
+
+	/*
+	 * Change groups.
+	 *
+	 * crsetgroups_internal() changes both the effective and supplementary
+	 * ones.
+	 */
+	if (flags & SETCREDF_SUPP_GROUPS) {
+		groups[0] = flags & SETCREDF_GID ? wcred->sc_gid :
+		    new_cred->cr_gid;
+		crsetgroups_internal(new_cred, wcred->sc_supp_groups_nb + 1,
+		    groups);
+	} else if (flags & SETCREDF_GID)
+		change_egid(new_cred, wcred->sc_gid);
+	if (flags & SETCREDF_RGID)
+		change_rgid(new_cred, wcred->sc_rgid);
+	if (flags & SETCREDF_SVGID)
+		change_svgid(new_cred, wcred->sc_svgid);
+
+#ifdef MAC
+	/*
+	 * Change the MAC label.
+	 */
+	if (flags & SETCREDF_MAC_LABEL) {
+		error = mac_set_proc_core(td, new_cred, mac_set_proc_data);
+		if (error != 0)
+			goto unlock_finish;
+		proc_label_set = true;
+	}
+
+	/*
+	 * MAC security modules checks.
+	 */
+	error = mac_cred_check_setcred(flags, old_cred, new_cred);
+	if (error != 0)
+		goto unlock_finish;
+#endif
+	/*
+	 * Privilege check.
+	 */
+	error = priv_check_cred(old_cred, PRIV_CRED_SETCRED);
+	if (error != 0)
+		goto unlock_finish;
+
+	/*
+	 * Set the new credentials, noting that they have changed.
+	 */
+	cred_set = proc_set_cred_enforce_proc_lim(p, new_cred);
+	if (cred_set) {
+		setsugid(p);
+		to_free_cred = old_cred;
+		MPASS(error == 0);
+	} else
+		error = EAGAIN;
+
+unlock_finish:
+	PROC_UNLOCK(p);
+	/*
+	 * Part 3: After releasing the process lock, we perform cleanups and
+	 * finishing operations.
+	 */
+
+#ifdef MAC
+	if (mac_set_proc_data != NULL)
+		mac_set_proc_finish(td, proc_label_set, mac_set_proc_data);
+	mac_cred_setcred_exit();
+#endif
+	crfree(to_free_cred);
+	if (uip != NULL)
+		uifree(uip);
+	if (ruip != NULL)
+		uifree(ruip);
+free_groups:
+	if (groups != preallocated_groups && groups != smallgroups)
+		free(groups, M_TEMP); /* Deals with 'groups' being NULL. */
+	return (error);
+}
+
 /*
  * Use the clause in B.4.2.2 that allows setuid/setgid to be 4.2/4.3BSD
  * compatible.  It says that setting the uid/gid to euid/egid is a special
@@ -859,15 +1160,6 @@
 	return (error);
 }
 
-static int
-gidp_cmp(const void *p1, const void *p2)
-{
-	const gid_t g1 = *(const gid_t *)p1;
-	const gid_t g2 = *(const gid_t *)p2;
-
-	return ((g1 > g2) - (g1 < g2));
-}
-
 /*
  * CAUTION: This function normalizes 'groups', possibly also changing the value
  * of '*ngrpp' as a consequence.
diff --git a/sys/kern/syscalls.c b/sys/kern/syscalls.c
--- a/sys/kern/syscalls.c
+++ b/sys/kern/syscalls.c
@@ -595,4 +595,5 @@
 	"timerfd_settime",			/* 587 = timerfd_settime */
 	"kcmp",			/* 588 = kcmp */
 	"getrlimitusage",			/* 589 = getrlimitusage */
+	"setcred",			/* 590 = setcred */
 };
diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master
--- a/sys/kern/syscalls.master
+++ b/sys/kern/syscalls.master
@@ -3341,5 +3341,12 @@
 		    _Out_ rlim_t *res
 		);
 	}
+590	AUE_SETCRED	STD|CAPENABLED {
+		int setcred(
+		    u_int flags,
+		    _In_reads_bytes_(size) const void *wcred,
+		    size_t size
+		);
+	}
 
 ; vim: syntax=off
diff --git a/sys/kern/systrace_args.c b/sys/kern/systrace_args.c
--- a/sys/kern/systrace_args.c
+++ b/sys/kern/systrace_args.c
@@ -3465,6 +3465,15 @@
 		*n_args = 3;
 		break;
 	}
+	/* setcred */
+	case 590: {
+		struct setcred_args *p = params;
+		uarg[a++] = p->flags; /* u_int */
+		uarg[a++] = (intptr_t)p->wcred; /* const void * */
+		uarg[a++] = p->size; /* size_t */
+		*n_args = 3;
+		break;
+	}
 	default:
 		*n_args = 0;
 		break;
@@ -9271,6 +9280,22 @@
 			break;
 		};
 		break;
+	/* setcred */
+	case 590:
+		switch (ndx) {
+		case 0:
+			p = "u_int";
+			break;
+		case 1:
+			p = "userland const void *";
+			break;
+		case 2:
+			p = "size_t";
+			break;
+		default:
+			break;
+		};
+		break;
 	default:
 		break;
 	};
@@ -11249,6 +11274,11 @@
 		if (ndx == 0 || ndx == 1)
 			p = "int";
 		break;
+	/* setcred */
+	case 590:
+		if (ndx == 0 || ndx == 1)
+			p = "int";
+		break;
 	default:
 		break;
 	};
diff --git a/sys/security/mac/mac_cred.c b/sys/security/mac/mac_cred.c
--- a/sys/security/mac/mac_cred.c
+++ b/sys/security/mac/mac_cred.c
@@ -209,6 +209,55 @@
 	return (error);
 }
 
+/*
+ * Entry hook for setcred().
+ *
+ * Called with no lock held by setcred() so that MAC modules may allocate memory
+ * in preparation for checking privileges.  A call to this hook is always
+ * followed by a matching call to mac_cred_setcred_exit().  Between these two,
+ * setcred() may or may not call mac_cred_check_setcred().
+ */
+void
+mac_cred_setcred_enter(void)
+{
+
+	MAC_POLICY_PERFORM_NOSLEEP(cred_setcred_enter);
+}
+
+MAC_CHECK_PROBE_DEFINE3(cred_check_setcred, "unsigned int", "struct ucred *",
+    "struct ucred *");
+
+/*
+ * Check hook for setcred().
+ *
+ * When called, the current process' lock is held.  It thus cannot perform
+ * memory allocations, which must be done in advance in
+ * mac_cred_setcred_enter().  It *MUST NOT* tamper with the process' lock.
+ */
+int
+mac_cred_check_setcred(u_int flags, const struct ucred *old_cred,
+    struct ucred *new_cred)
+{
+	int error;
+
+	MAC_POLICY_CHECK_NOSLEEP(cred_check_setcred, flags, old_cred, new_cred);
+	MAC_CHECK_PROBE3(cred_check_setcred, error, flags, old_cred, new_cred);
+
+	return (error);
+}
+
+/*
+ * Exit hook for setcred().
+ *
+ * Called with no lock held, exactly once per call to mac_cred_setcred_enter().
+ */
+void
+mac_cred_setcred_exit(void)
+{
+
+	MAC_POLICY_PERFORM_NOSLEEP(cred_setcred_exit);
+}
+
 MAC_CHECK_PROBE_DEFINE2(cred_check_setuid, "struct ucred *", "uid_t");
 
 int
diff --git a/sys/security/mac/mac_framework.h b/sys/security/mac/mac_framework.h
--- a/sys/security/mac/mac_framework.h
+++ b/sys/security/mac/mac_framework.h
@@ -72,6 +72,7 @@
 struct mount;
 struct msg;
 struct msqid_kernel;
+struct pipepair;
 struct proc;
 struct semid_kernel;
 struct shmfd;
@@ -80,7 +81,6 @@
 struct socket;
 struct sysctl_oid;
 struct sysctl_req;
-struct pipepair;
 struct thread;
 struct timespec;
 struct ucred;
@@ -115,6 +115,10 @@
 int	mac_cred_check_setaudit_addr(struct ucred *cred,
 	    struct auditinfo_addr *aia);
 int	mac_cred_check_setauid(struct ucred *cred, uid_t auid);
+void	mac_cred_setcred_enter(void);
+int	mac_cred_check_setcred(u_int flags, const struct ucred *old_cred,
+	    struct ucred *new_cred);
+void	mac_cred_setcred_exit(void);
 int	mac_cred_check_setegid(struct ucred *cred, gid_t egid);
 int	mac_cred_check_seteuid(struct ucred *cred, uid_t euid);
 int	mac_cred_check_setgid(struct ucred *cred, gid_t gid);
diff --git a/sys/security/mac/mac_policy.h b/sys/security/mac/mac_policy.h
--- a/sys/security/mac/mac_policy.h
+++ b/sys/security/mac/mac_policy.h
@@ -144,6 +144,10 @@
 typedef int	(*mpo_cred_check_setaudit_addr_t)(struct ucred *cred,
 		    struct auditinfo_addr *aia);
 typedef int	(*mpo_cred_check_setauid_t)(struct ucred *cred, uid_t auid);
+typedef void	(*mpo_cred_setcred_enter_t)(void);
+typedef int	(*mpo_cred_check_setcred_t)(u_int flags,
+		    const struct ucred *old_cred, struct ucred *new_cred);
+typedef void	(*mpo_cred_setcred_exit_t)(void);
 typedef int	(*mpo_cred_check_setegid_t)(struct ucred *cred, gid_t egid);
 typedef int	(*mpo_cred_check_seteuid_t)(struct ucred *cred, uid_t euid);
 typedef int	(*mpo_cred_check_setgid_t)(struct ucred *cred, gid_t gid);
@@ -720,6 +724,9 @@
 	mpo_cred_check_setaudit_t		mpo_cred_check_setaudit;
 	mpo_cred_check_setaudit_addr_t		mpo_cred_check_setaudit_addr;
 	mpo_cred_check_setauid_t		mpo_cred_check_setauid;
+	mpo_cred_setcred_enter_t		mpo_cred_setcred_enter;
+	mpo_cred_check_setcred_t		mpo_cred_check_setcred;
+	mpo_cred_setcred_exit_t			mpo_cred_setcred_exit;
 	mpo_cred_check_setuid_t			mpo_cred_check_setuid;
 	mpo_cred_check_seteuid_t		mpo_cred_check_seteuid;
 	mpo_cred_check_setgid_t			mpo_cred_check_setgid;
@@ -1033,8 +1040,9 @@
  *   3                       7.x
  *   4                       8.x
  *   5                       14.x
+ *   6                       15.x
  */
-#define	MAC_VERSION	5
+#define	MAC_VERSION	6
 
 #define	MAC_POLICY_SET(mpops, mpname, mpfullname, mpflags, privdata_wanted) \
 	static struct mac_policy_conf mpname##_mac_policy_conf = {	\
diff --git a/sys/security/mac_do/mac_do.c b/sys/security/mac_do/mac_do.c
--- a/sys/security/mac_do/mac_do.c
+++ b/sys/security/mac_do/mac_do.c
@@ -5,15 +5,19 @@
  */
 
 #include <sys/param.h>
+#include <sys/ctype.h>
 #include <sys/malloc.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
+#include <sys/libkern.h>
+#include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/module.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
+#include <sys/refcount.h>
 #include <sys/socket.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
@@ -21,9 +25,9 @@
 #include <sys/ucred.h>
 #include <sys/vnode.h>
 
-#include <security/mac/mac_policy.h>
+#include <machine/stdarg.h>
 
-SYSCTL_DECL(_security_mac);
+#include <security/mac/mac_policy.h>
 
 static SYSCTL_NODE(_security_mac, OID_AUTO, do,
     CTLFLAG_RW|CTLFLAG_MPSAFE, 0, "mac_do policy controls");
@@ -32,514 +36,2058 @@
 SYSCTL_INT(_security_mac_do, OID_AUTO, enabled, CTLFLAG_RWTUN,
     &do_enabled, 0, "Enforce do policy");
 
+static int	print_parse_error = 1;
+SYSCTL_INT(_security_mac_do, OID_AUTO, print_parse_error, CTLFLAG_RWTUN,
+    &print_parse_error, 0, "Print parse errors on setting rules "
+    "(via sysctl(8)).");
+
 static MALLOC_DEFINE(M_DO, "do_rule", "Rules for mac_do");
 
 #define MAC_RULE_STRING_LEN	1024
 
-static unsigned		mac_do_osd_jail_slot;
+static unsigned		osd_jail_slot;
+static unsigned		osd_thread_slot;
 
-#define RULE_UID	1
-#define RULE_GID	2
-#define RULE_ANY	3
+#define IT_INVALID	0 /* Must stay 0. */
+#define IT_UID		1
+#define IT_GID		2
+#define IT_ANY		3
+#define IT_LAST		IT_ANY
+
+static const char *id_type_to_str[] = {
+	[IT_INVALID]	= "invalid",
+	[IT_UID]	= "uid",
+	[IT_GID]	= "gid",
+	/* See also parse_id_type(). */
+	[IT_ANY]	= "*",
+};
+
+#define PARSE_ERROR_SIZE	256
+
+struct parse_error {
+	size_t	pos;
+	char	msg[PARSE_ERROR_SIZE];
+};
+
+/*
+ * We assume that 'uid_t' and 'gid_t' are aliases to 'int' in conversions
+ * required for parsing rules specification strings.
+ */
+_Static_assert(sizeof(uid_t) == sizeof(int) && (int)(uid_t)-1 == -1 &&
+    sizeof(gid_t) == sizeof(int) && (int)(gid_t)-1 == -1,
+    "mac_do(4) currently assumes that 'uid_t' and 'gid_t' are aliases to 'int'");
+
+/*
+ * Internal flags.
+ *
+ * They either apply as per-type (t) or per-ID (i) but are conflated because all
+ * per-ID flags are also valid as per-type ones to qualify the "current" (".")
+ * per-type flag.  Also, some of them are in fact exclusive, but we use one-hot
+ * encoding for simplicity.
+ *
+ * There is currently room for "only" 16 bits.  As these flags are purely
+ * internal, they can be renumbered and/or their type changed as needed.
+ *
+ * See also the check_*() functions below.
+ */
+typedef uint16_t	flags_t;
+
+/* (i,gid) Specification concerns primary groups. */
+#define MDF_PRIMARY	1
+/* (i,gid) Specification concerns supplementary groups. */
+#define	MDF_SUPP_ALLOW	2
+/* (i,gid) Group must appear as a supplementary group. */
+#define	MDF_SUPP_MUST	4
+/* (i,gid) Group must not appear as a supplementary group. */
+#define MDF_SUPP_DONT	8
+#define MDF_SUPP_MASK	(MDF_SUPP_ALLOW | MDF_SUPP_MUST | MDF_SUPP_DONT)
+#define MDF_ID_MASK	(MDF_PRIMARY | MDF_SUPP_MASK)
+
+/*
+ * (t) All IDs allowed.
+ *
+ * For GIDs, MDF_ANY only concerns primary groups.  The MDF_PRIMARY and
+ * MDF_SUPP_* flags never apply to MDF_ANY, but can be present if MDF_CURRENT is
+ * present also, as usual.
+ */
+#define	MDF_ANY			(1u << 8)
+/* (t) Current IDs allowed. */
+#define	MDF_CURRENT		(1u << 9)
+#define MDF_TYPE_COMMON_MASK	(MDF_ANY | MDF_CURRENT)
+/* (t,gid) All IDs allowed as supplementary groups. */
+#define	MDF_ANY_SUPP		(1u << 10)
+/* (t,gid) Some ID has MDF_SUPP_MUST or MDF_SUPP_DONT. */
+#define	MDF_MAY_REJ_SUPP	(1u << 11)
+/* (t,gid) Whether some target clause concerns primary groups.  Used during
+ * parsing only. */
+#define	MDF_HAS_PRIMARY_CLAUSE	(1u << 12)
+/* (t,gid) Whether some target clause concerns supplementary groups.  Used
+ * during parsing only. */
+#define	MDF_HAS_SUPP_CLAUSE	(1u << 13)
+#define	MDF_TYPE_GID_MASK	(MDF_ANY_SUPP | MDF_MAY_REJ_SUPP | \
+    MDF_HAS_PRIMARY_CLAUSE | MDF_HAS_SUPP_CLAUSE)
+#define	MDF_TYPE_MASK		(MDF_TYPE_COMMON_MASK | MDF_TYPE_GID_MASK)
+
+/*
+ * Persistent structures.
+ */
+
+struct id_spec {
+	int		 id;
+	flags_t		 flags; /* See MDF_* above. */
+};
+
+/*
+ * This limits the number of target clauses per type to 65535.  With the current
+ * value of MAC_RULE_STRING_LEN (1024), this is way more than enough anyway.
+ */
+typedef uint16_t	 id_nb_t;
+/* We only have a few IT_* types. */
+typedef uint16_t	 id_type_t;
 
 struct rule {
-	int	from_type;
-	union {
-		uid_t f_uid;
-		gid_t f_gid;
-	};
-	int	to_type;
-	uid_t t_uid;
-	TAILQ_ENTRY(rule) r_entries;
+	STAILQ_ENTRY(rule) r_entries;
+	id_type_t	 from_type;
+	int		 from_id;
+	flags_t		 uid_flags; /* See MDF_* above. */
+	id_nb_t		 uids_nb;
+	flags_t		 gid_flags; /* See MDF_* above. */
+	id_nb_t		 gids_nb;
+	struct id_spec	*uids;
+	struct id_spec	*gids;
 };
 
-struct mac_do_rule {
-	char string[MAC_RULE_STRING_LEN];
-	TAILQ_HEAD(rulehead, rule) head;
+STAILQ_HEAD(rulehead, rule);
+
+struct rules {
+	char		string[MAC_RULE_STRING_LEN];
+	struct rulehead	head;
+	volatile u_int	use_count __aligned(CACHE_LINE_SIZE);
 };
 
-static struct mac_do_rule rules0;
+/*
+ * Temporary structures used to build a 'struct rule' above.
+ */
+
+struct id_elem {
+	STAILQ_ENTRY(id_elem) ie_entries;
+	struct id_spec spec;
+};
+
+STAILQ_HEAD(id_list, id_elem);
+
+#ifdef INVARIANTS
+static void
+check_type(const id_type_t type)
+{
+	if (type > IT_LAST)
+		panic("Invalid type number %u", type);
+}
 
 static void
-toast_rules(struct rulehead *head)
+panic_for_unexpected_flags(const id_type_t type, const flags_t flags,
+    const char *const str)
 {
-	struct rule *r;
-
-	while ((r = TAILQ_FIRST(head)) != NULL) {
-		TAILQ_REMOVE(head, r, r_entries);
-		free(r, M_DO);
-	}
+	panic("ID type %s: Unexpected flags %u (%s), ", id_type_to_str[type],
+	    flags, str);
 }
 
-static int
-parse_rule_element(char *element, struct rule **rule)
+static void
+check_type_and_id_flags(const id_type_t type, const flags_t flags)
 {
-	int error = 0;
-	char *type, *id, *p;
-	struct rule *new;
+	const char *str;
 
-	new = malloc(sizeof(*new), M_DO, M_ZERO|M_WAITOK);
-
-	type = strsep(&element, "=");
-	if (type == NULL) {
-		error = EINVAL;
-		goto out;
-	}
-	if (strcmp(type, "uid") == 0) {
-		new->from_type = RULE_UID;
-	} else if (strcmp(type, "gid") == 0) {
-		new->from_type = RULE_GID;
-	} else {
-		error = EINVAL;
-		goto out;
-	}
-	id = strsep(&element, ":");
-	if (id == NULL) {
-		error = EINVAL;
-		goto out;
-	}
-	if (new->from_type == RULE_UID)
-		new->f_uid = strtol(id, &p, 10);
-	if (new->from_type == RULE_GID)
-		new->f_gid = strtol(id, &p, 10);
-	if (*p != '\0') {
-		error = EINVAL;
-		goto out;
-	}
-	if (*element == '\0') {
-		error = EINVAL;
-		goto out;
-	}
-	if (strcmp(element, "any") == 0 || strcmp(element, "*") == 0) {
-		new->to_type = RULE_ANY;
-	} else {
-		new->to_type = RULE_UID;
-		new->t_uid = strtol(element, &p, 10);
-		if (*p != '\0') {
-			error = EINVAL;
-			goto out;
+	check_type(type);
+	switch (type) {
+	case IT_UID:
+		if (flags != 0) {
+			str = "only 0 allowed";
+			goto unexpected_flags;
 		}
-	}
-out:
-	if (error != 0) {
-		free(new, M_DO);
-		*rule = NULL;
-	} else
-		*rule = new;
-	return (error);
-}
-
-static int
-parse_rules(char *string, struct rulehead *head)
-{
-	struct rule *new;
-	char *element;
-	int error = 0;
-
-	while ((element = strsep(&string, ",")) != NULL) {
-		if (strlen(element) == 0)
-			continue;
-		error = parse_rule_element(element, &new);
-		if (error)
-			goto out;
-		TAILQ_INSERT_TAIL(head, new, r_entries);
-	}
-out:
-	if (error != 0)
-		toast_rules(head);
-	return (error);
-}
-
-static struct mac_do_rule *
-mac_do_rule_find(struct prison *spr, struct prison **prp)
-{
-	struct prison *pr;
-	struct mac_do_rule *rules;
-
-	for (pr = spr;; pr = pr->pr_parent) {
-		mtx_lock(&pr->pr_mtx);
-		if (pr == &prison0) {
-			rules = &rules0;
-			break;
+		break;
+	case IT_GID:
+		if (flags & ~MDF_ID_MASK) {
+			str = "only bits in MDF_ID_MASK allowed";
+			goto unexpected_flags;
 		}
-		rules = osd_jail_get(pr, mac_do_osd_jail_slot);
-		if (rules != NULL)
-			break;
-		mtx_unlock(&pr->pr_mtx);
+		if (!powerof2(flags & MDF_SUPP_MASK)) {
+			str = "only a single flag in MDF_SUPP_MASK allowed";
+			goto unexpected_flags;
+		}
+		break;
+	default:
+	    __assert_unreachable();
 	}
-	*prp = pr;
+	return;
 
+unexpected_flags:
+	panic_for_unexpected_flags(type, flags, str);
+}
+
+static void
+check_type_and_id_spec(const id_type_t type, const struct id_spec *const is)
+{
+	check_type_and_id_flags(type, is->flags);
+	if (is->id < 0)
+		panic("ID type %s: Negative id %d", id_type_to_str[type],
+		    is->id);
+}
+
+static void
+check_type_and_type_flags(const id_type_t type, const flags_t flags)
+{
+	const char *str;
+
+	check_type_and_id_flags(type, flags & MDF_ID_MASK);
+	if (flags & ~MDF_ID_MASK & ~MDF_TYPE_MASK) {
+		str = "only MDF_ID_MASK | MDF_TYPE_MASK bits allowed";
+		goto unexpected_flags;
+	}
+	if ((flags & MDF_ANY) && (flags & MDF_CURRENT) &&
+	    (type == IT_UID || (flags & MDF_PRIMARY))) {
+		str = "MDF_ANY and MDF_CURRENT are exclusive for UIDs "
+		    "or primary group GIDs";
+		goto unexpected_flags;
+	}
+	if ((flags & MDF_ANY_SUPP) && (flags & MDF_CURRENT) &&
+	    (flags & MDF_SUPP_MASK)) {
+		str = "MDF_SUPP_ANY and MDF_CURRENT with supplementary "
+		    "groups specification are exclusive";
+		goto unexpected_flags;
+	}
+	if (((flags & MDF_PRIMARY) || (flags & MDF_ANY)) &&
+	    !(flags & MDF_HAS_PRIMARY_CLAUSE)) {
+		str = "Presence of folded primary clause not reflected "
+		    "by presence of MDF_HAS_PRIMARY_CLAUSE";
+		goto unexpected_flags;
+	}
+	if (((flags & MDF_SUPP_MASK) || (flags & MDF_ANY_SUPP)) &&
+	    !(flags & MDF_HAS_SUPP_CLAUSE)) {
+		str = "Presence of folded supplementary clause not reflected "
+		    "by presence of MDF_HAS_SUPP_CLAUSE";
+		goto unexpected_flags;
+	}
+	return;
+
+unexpected_flags:
+	panic_for_unexpected_flags(type, flags, str);
+}
+#else /* !INVARIANTS */
+#define check_type_and_id_flags(...)
+#define check_type_and_id_spec(...)
+#define check_type_and_type_flags(...)
+#endif /* INVARIANTS */
+
+/*
+ * Returns EALREADY if both flags have some overlap, or EINVAL if flags are
+ * incompatible, else 0 with flags successfully merged into 'dest'.
+ */
+static int
+coalesce_id_flags(const flags_t src, flags_t *const dest)
+{
+	flags_t res;
+
+	if ((src & *dest) != 0)
+		return (EALREADY);
+
+	res = src | *dest;
+
+	/* Check for compatibility of supplementary flags, and coalesce. */
+	if (res & MDF_SUPP_MASK) {
+		/* MDF_SUPP_DONT incompatible with the rest. */
+		if ((res & MDF_SUPP_DONT) && (res & MDF_SUPP_MASK &
+		    ~MDF_SUPP_DONT))
+			return (EINVAL);
+		/* Coalesce MDF_SUPP_ALLOW and MDF_SUPP_MUST into MDF_SUPP_MUST. */
+		if ((res & MDF_SUPP_ALLOW) && (res & MDF_SUPP_MUST))
+			res &= ~MDF_SUPP_ALLOW;
+	}
+
+	*dest = res;
+	return (0);
+}
+
+static void
+toast_rules(struct rules *const rules)
+{
+	struct rulehead *const head = &rules->head;
+	struct rule *rule, *rule_next;
+
+	STAILQ_FOREACH_SAFE(rule, head, r_entries, rule_next) {
+		free(rule->uids, M_DO);
+		free(rule->gids, M_DO);
+		free(rule, M_DO);
+	}
+	free(rules, M_DO);
+}
+
+static struct rules *
+alloc_rules(void)
+{
+	struct rules *const rules = malloc(sizeof(*rules), M_DO, M_WAITOK);
+
+	_Static_assert(MAC_RULE_STRING_LEN > 0, "MAC_RULE_STRING_LEN <= 0!");
+	rules->string[0] = 0;
+	STAILQ_INIT(&rules->head);
+	rules->use_count = 0;
 	return (rules);
 }
 
-static int
-sysctl_rules(SYSCTL_HANDLER_ARGS)
+static bool
+is_null_or_empty(const char *s)
 {
-	char *copy_string, *new_string;
-	struct rulehead head, saved_head;
-	struct prison *pr;
-	struct mac_do_rule *rules;
+	return (s == NULL || s[0] == 0);
+}
+
+/*
+ * String to non-negative int.
+ *
+ * Returns minus an error (EINVAL or EOVERFLOW) in case of failure.  Rejects an
+ * explicit sign at start, as well as whitespaces.
+ */
+static int
+strtonni(const char *const restrict s, const char **const restrict endptr,
+    int base)
+{
+	long l;
+	char *ep;
+
+	if (s[0] != 0 && !isdigit(s[0])) {
+		if (endptr != NULL)
+			*endptr = s;
+		return (-EINVAL);
+	}
+
+	l = strtol(s, &ep, base);
+	if (endptr != NULL)
+		*endptr = ep;
+	if (l < 0)
+		return (-EINVAL);
+	else if (l == LONG_MAX || l > INT_MAX)
+		return (-EOVERFLOW);
+	return ((int)l);
+}
+
+/*
+ * strsep() variant skipping spaces and tabs.
+ *
+ * Skips spaces and tabs at beginning and end of the token before one of the
+ * 'delim' characters, i.e., at start of string and just before one of the
+ * delimiter characters (so it doesn't prevent tokens containing spaces and tabs
+ * in the middle).
+ */
+static char *
+strsep_noblanks(char **const stringp, const char *delim)
+{
+	char *p = *stringp;
+	char *ret, *wsp;
+	size_t idx;
+
+	if (p == NULL)
+		return (NULL);
+
+	idx = strspn(p, " \t");
+	p += idx;
+
+	ret = strsep(&p, delim);
+
+	/* Rewind spaces/tabs at the end. */
+	if (p == NULL)
+		wsp = ret + strlen(ret);
+	else
+		wsp = p - 1;
+	for (; wsp != ret; --wsp) {
+		switch (wsp[-1]) {
+		case ' ':
+		case '\t':
+			continue;
+		}
+		break;
+	}
+	*wsp = '\0';
+
+	*stringp = p;
+	return (ret);
+}
+
+
+static void
+alloc_parse_error(struct parse_error **const parse_error, const size_t pos,
+    const char *const fmt, ...)
+{
+	struct parse_error *const err = malloc(sizeof(*err), M_DO, M_WAITOK);
+	va_list ap;
+
+	err->pos = pos;
+	va_start(ap, fmt);
+	vsnprintf(err->msg, PARSE_ERROR_SIZE, fmt, ap);
+	va_end(ap);
+
+	MPASS(*parse_error == NULL);
+	*parse_error = err;
+}
+
+static void
+free_parse_error(struct parse_error *const parse_error)
+{
+	free(parse_error, M_DO);
+}
+
+static int
+parse_id_type(const char *const string, id_type_t *const type,
+    struct parse_error **const parse_error)
+{
+	/*
+	 * Special case for "any", as the canonical form for IT_ANY in
+	 * id_type_to_str[] is "*".
+	 */
+	if (strcmp(string, "any") == 0) {
+		*type = IT_ANY;
+		return (0);
+	}
+
+	/* Start at 1 to avoid parsing "invalid". */
+	for (size_t i = 1; i <= IT_LAST; ++i) {
+		if (strcmp(string, id_type_to_str[i]) == 0) {
+			*type = i;
+			return (0);
+		}
+	}
+
+	*type = IT_INVALID;
+	alloc_parse_error(parse_error, 0, "No valid type found.");
+	return (EINVAL);
+}
+
+static bool
+parse_any(const char *const string)
+{
+	return (strcmp(string, "*") == 0 || strcmp(string, "any") == 0);
+}
+
+static bool
+has_clauses(const id_nb_t nb, const flags_t type_flags)
+{
+	return ((type_flags & MDF_TYPE_MASK) || nb != 0);
+}
+
+static int
+parse_target_clause(char *to, struct rule *const rule,
+    struct id_list *const uid_list, struct id_list *const gid_list,
+    struct parse_error **const parse_error)
+{
+	const char *const start = to;
+	char *to_type, *to_id;
+	const char *p;
+	struct id_list *list;
+	id_nb_t *nb;
+	flags_t *tflags;
+	struct id_elem *ie;
+	struct id_spec is = {};
+	u_int gid_flags = 0;
+	id_type_t type;
 	int error;
 
-	rules = mac_do_rule_find(req->td->td_ucred->cr_prison, &pr);
-	mtx_unlock(&pr->pr_mtx);
-	if (req->newptr == NULL)
-		return (sysctl_handle_string(oidp, rules->string, MAC_RULE_STRING_LEN, req));
+	MPASS(*parse_error == NULL);
+	MPASS(to != NULL);
+	to_type = strsep_noblanks(&to, "=");
+	MPASS(to_type != NULL);
+	error = parse_id_type(to_type, &type, parse_error);
+	if (error != 0)
+		goto einval;
 
-	new_string = malloc(MAC_RULE_STRING_LEN, M_DO,
-	    M_WAITOK|M_ZERO);
-	mtx_lock(&pr->pr_mtx);
-	strlcpy(new_string, rules->string, MAC_RULE_STRING_LEN);
-	mtx_unlock(&pr->pr_mtx);
+	to_id = strsep_noblanks(&to, "");
+	switch (type) {
+	case IT_GID:
+		if (to_id == NULL) {
+			alloc_parse_error(parse_error, to_type - start,
+			    "No '=' and ID specification after type '%s'.",
+			    to_type);
+			goto einval;
+		}
 
-	error = sysctl_handle_string(oidp, new_string, MAC_RULE_STRING_LEN, req);
-	if (error)
-		goto out;
+		/*
+		 * This is easily wrapped into a loop for when/if we allow
+		 * multiple flags to be specified at once.
+		 */
+		switch (*to_id) {
+		case '+':
+			is.flags |= MDF_SUPP_ALLOW;
+			goto has_supp_clause;
+		case '!':
+			is.flags |= MDF_SUPP_MUST;
+			gid_flags |= MDF_MAY_REJ_SUPP;
+			goto has_supp_clause;
+		case '-':
+			is.flags |= MDF_SUPP_DONT;
+			gid_flags |= MDF_MAY_REJ_SUPP;
+			goto has_supp_clause;
+		has_supp_clause:
+			gid_flags |= MDF_HAS_SUPP_CLAUSE;
+			++to_id;
+			break;
+		default:
+			is.flags |= MDF_PRIMARY;
+			gid_flags |= MDF_HAS_PRIMARY_CLAUSE;
+			break;
+		}
 
-	copy_string = strdup(new_string, M_DO);
-	TAILQ_INIT(&head);
-	error = parse_rules(copy_string, &head);
-	free(copy_string, M_DO);
-	if (error)
-		goto out;
-	TAILQ_INIT(&saved_head);
-	mtx_lock(&pr->pr_mtx);
-	TAILQ_CONCAT(&saved_head, &rules->head, r_entries);
-	TAILQ_CONCAT(&rules->head, &head, r_entries);
-	strlcpy(rules->string, new_string, MAC_RULE_STRING_LEN);
-	mtx_unlock(&pr->pr_mtx);
-	toast_rules(&saved_head);
+		list = gid_list;
+		nb = &rule->gids_nb;
+		tflags = &rule->gid_flags;
 
+		/* "*" or "any"? */
+		if (parse_any(to_id)) {
+			/*
+			 * We check that we have not seen any other clause of
+			 * the same category (i.e., concerning primary or
+			 * supplementary groups).
+			 */
+			if (is.flags & MDF_PRIMARY) {
+				if (*tflags & MDF_HAS_PRIMARY_CLAUSE) {
+					alloc_parse_error(parse_error,
+					    to_id - start,
+					    "'any' specified after another "
+					    "(primary) GID.");
+					goto einval;
+				}
+				*tflags |= gid_flags | MDF_ANY;
+			} else {
+				/*
+				 * If a supplementary group flag was present, it
+				 * must be MDF_SUPP_ALLOW ("+").
+				 */
+				if ((is.flags & MDF_SUPP_MASK) != MDF_SUPP_ALLOW) {
+					alloc_parse_error(parse_error,
+					    to_id - start,
+					    "'any' specified with another "
+					    "flag than '+'.");
+					goto einval;
+				}
+				if (*tflags & MDF_HAS_SUPP_CLAUSE) {
+					alloc_parse_error(parse_error,
+					    to_id - start,
+					    "'+any' specified after another "
+					    "(supplementary) GID.");
+					goto einval;
+				}
+				*tflags |= gid_flags | MDF_ANY_SUPP;
+			}
+			goto check_type_and_finish;
+		} else {
+			/*
+			 * Check that we haven't already seen "any" for the same
+			 * category.
+			 */
+			if ((is.flags & MDF_PRIMARY)) {
+				if (*tflags & MDF_ANY) {
+					alloc_parse_error(parse_error,
+					    to_id - start,
+					    "Some (primary) GID specified after "
+					    "'any'.");
+					goto einval;
+				}
+			} else if (*tflags & MDF_ANY_SUPP &&
+			    is.flags & MDF_SUPP_ALLOW) {
+				alloc_parse_error(parse_error,
+				    to_id - start,
+				    "Some (supplementary) GID specified after "
+				    "'+any'.");
+				goto einval;
+			}
+			*tflags |= gid_flags;
+		}
+		break;
+
+	case IT_UID:
+		if (to_id == NULL) {
+			alloc_parse_error(parse_error, to_type - start,
+			    "No '=' and ID specification after type '%s'.",
+			    to_type);
+			goto einval;
+		}
+
+		list = uid_list;
+		nb = &rule->uids_nb;
+		tflags = &rule->uid_flags;
+
+		/* "*" or "any"? */
+		if (parse_any(to_id)) {
+			/* There must not be any other clause. */
+			if (has_clauses(*nb, *tflags)) {
+				alloc_parse_error(parse_error, to_id - start,
+				    "'any' specified after another UID.");
+				goto einval;
+			}
+			*tflags |= MDF_ANY;
+			goto check_type_and_finish;
+		} else {
+			/*
+			 * Check that we haven't already seen "any" for the same
+			 * category.
+			 */
+			if (*tflags & MDF_ANY) {
+				alloc_parse_error(parse_error, to_id - start,
+				    "Some UID specified after 'any'.");
+				goto einval;
+			}
+		}
+		break;
+
+	case IT_ANY:
+		/* No ID allowed. */
+		if (to_id != NULL) {
+			alloc_parse_error(parse_error, to_type - start,
+			    "No '=' and ID allowed after type '%s'.", to_type);
+			goto einval;
+		}
+		/*
+		 * We can't have IT_ANY after any other IT_*, it must be the
+		 * only one.
+		 */
+		if (has_clauses(rule->uids_nb, rule->uid_flags) ||
+		    has_clauses(rule->gids_nb, rule->gid_flags)) {
+			alloc_parse_error(parse_error, to_type - start,
+			    "Target clause of type '%s' coming after another "
+			    "clause (must be alone).", to_type);
+			goto einval;
+		}
+		rule->uid_flags |= MDF_ANY;
+		rule->gid_flags |= MDF_ANY | MDF_ANY_SUPP |
+		    MDF_HAS_PRIMARY_CLAUSE | MDF_HAS_SUPP_CLAUSE;
+		goto finish;
+
+	default:
+		/* parse_id_type() returns no other types currently. */
+		__assert_unreachable();
+	}
+
+	/* These cases have been treated above. */
+	MPASS((type == IT_UID || type == IT_GID) && !parse_any(to_id));
+
+	/* "."? */
+	if (strcmp(to_id, ".") == 0) {
+		if (*tflags & MDF_CURRENT) {
+			/* Duplicate "." <id>.  Try to coalesce. */
+			error = coalesce_id_flags(is.flags, tflags);
+			if (error != 0) {
+				alloc_parse_error(parse_error, to_id - start,
+				    "Incompatible flags with prior clause "
+				    "with same target.");
+				goto einval;
+			}
+		} else
+			*tflags |= MDF_CURRENT | is.flags;
+		goto check_type_and_finish;
+	}
+
+	/* Parse an ID. */
+	is.id = strtonni(to_id, &p, 10);
+	if (is.id < 0 || *p != '\0') {
+		alloc_parse_error(parse_error, to_id - start,
+		    "Cannot parse a numerical ID (base 10, no sign).");
+		goto einval;
+	}
+
+	/*
+	 * We check for duplicate IDs and coalesce their 'struct id_spec' only
+	 * at end of parse_single_rule() because it is much more performant then
+	 * (using sorted arrays).
+	 */
+	++*nb;
+	if (*nb == 0) {
+		alloc_parse_error(parse_error, 0,
+		    "Too many target clauses of type '%s'.", to_type);
+		return (EOVERFLOW);
+	}
+	ie = malloc(sizeof(*ie), M_DO, M_WAITOK);
+	ie->spec = is;
+	STAILQ_INSERT_TAIL(list, ie, ie_entries);
+	check_type_and_id_spec(type, &is);
+check_type_and_finish:
+	check_type_and_type_flags(type, *tflags);
+finish:
+	return (0);
+einval:
+	/* We must have built a parse error on error. */
+	MPASS(*parse_error != NULL);
+	return (EINVAL);
+}
+
+static int
+int_cmp(const int i1, const int i2)
+{
+	return ((i1 > i2) - (i1 < i2));
+}
+
+static int
+id_spec_cmp(const void *const p1, const void *const p2)
+{
+	const struct id_spec *const is1 = p1;
+	const struct id_spec *const is2 = p2;
+
+	return (int_cmp(is1->id, is2->id));
+}
+
+/*
+ * Transfer content of 'list' into 'array', freeing and emptying list.
+ *
+ * 'nb' must be 'list''s length and not be greater than 'array''s size.  The
+ * destination array is sorted by ID.  Structures 'struct id_spec' with same IDs
+ * are coalesced if that makes sense (not including duplicate clauses), else
+ * EINVAL is returned.  On success, 'nb' is updated (lowered) to account for
+ * coalesced specifications.  The parameter 'type' is only for testing purposes
+ * (INVARIANTS).
+ */
+static int
+pour_list_into_rule(const id_type_t type, struct id_list *const list,
+    struct id_spec *const array, id_nb_t *const nb,
+    struct parse_error **const parse_error)
+{
+	struct id_elem *ie, *ie_next;
+	size_t idx = 0;
+
+	/* Fill the array. */
+	STAILQ_FOREACH_SAFE(ie, list, ie_entries, ie_next) {
+		MPASS(idx < *nb);
+		array[idx] = ie->spec;
+		free(ie, M_DO);
+		++idx;
+	}
+	MPASS(idx == *nb);
+	STAILQ_INIT(list);
+
+	/* Sort it (by ID). */
+	qsort(array, *nb, sizeof(*array), id_spec_cmp);
+
+	/* Coalesce same IDs. */
+	if (*nb != 0) {
+		size_t ref_idx = 0;
+
+		for (idx = 1; idx < *nb; ++idx) {
+			const int id = array[idx].id;
+
+			if (id == array[ref_idx].id) {
+				const int error =
+				    coalesce_id_flags(array[idx].flags,
+				    &array[ref_idx].flags);
+
+				if (error != 0) {
+					alloc_parse_error(parse_error, 0,
+					    "Incompatible or duplicate flags "
+					    "for ID %d.", id);
+					return (EINVAL);
+				}
+				check_type_and_id_flags(type,
+				    array[ref_idx].flags);
+			}
+			else {
+				++ref_idx;
+				if (ref_idx != idx)
+					array[ref_idx] = array[idx];
+			}
+		}
+		*nb = ref_idx + 1;
+	}
+
+	return (0);
+}
+
+/*
+ * See also first comments for parse_rule() below.
+ *
+ * The second part of a rule, called <target> (or <to>), is a comma-separated
+ * (',') list of '<type>=<flags><id>' clauses similar to that of the <from>
+ * part, with the extensions that <id> may also be "*" or "any" or ".", and that
+ * <flags> may contain at most one of the '+', '-' and '!' characters when
+ * <type> is "gid" (no flags are allowed for "uid").  No two clauses in a single
+ * <to> list may list the same <id>.  "*" and "any" both designate any ID for
+ * the <type>, and are aliases to each other.  In front of "any" (or "*"; only
+ * for GIDs), only the '+' flags is allowed.  "." designates the process'
+ * current IDs for the <type>.  The precise meaning of flags and "." is
+ * explained in functions checking privileges below.
+ */
+static int
+parse_single_rule(char *rule, struct rules *const rules,
+    struct parse_error **const parse_error)
+{
+	const char *const start = rule;
+	const char *from_type, *from_id, *p;
+	char *to_list;
+	struct id_list uid_list, gid_list;
+	struct id_elem *ie, *ie_next;
+	struct rule *new;
+	int error;
+
+	MPASS(*parse_error == NULL);
+	STAILQ_INIT(&uid_list);
+	STAILQ_INIT(&gid_list);
+
+	/* Freed when the 'struct rules' container is freed. */
+	new = malloc(sizeof(*new), M_DO, M_WAITOK | M_ZERO);
+
+	from_type = strsep_noblanks(&rule, "=");
+	MPASS(from_type != NULL); /* Because 'rule' was not NULL. */
+	error = parse_id_type(from_type, &new->from_type, parse_error);
+	if (error != 0)
+		goto einval;
+	switch (new->from_type) {
+	case IT_UID:
+	case IT_GID:
+		break;
+	default:
+		alloc_parse_error(parse_error, 0, "Type '%s' not allowed in "
+		    "the \"from\" part of rules.");
+		goto einval;
+	}
+
+	from_id = strsep_noblanks(&rule, ":");
+	if (is_null_or_empty(from_id)) {
+		alloc_parse_error(parse_error, 0, "No ID specified.");
+		goto einval;
+	}
+
+	new->from_id = strtonni(from_id, &p, 10);
+	if (new->from_id < 0 || *p != '\0') {
+		alloc_parse_error(parse_error, from_id - start,
+		    "Cannot parse a numerical ID (base 10, no sign).");
+		goto einval;
+	}
+
+	/*
+	 * We will now parse the "to" list.
+	 *
+	 * In order to ease parsing, we will begin by building lists of target
+	 * UIDs and GIDs in local variables 'uid_list' and 'gid_list'.  The
+	 * number of each type of IDs will be filled directly in 'new'.  At end
+	 * of parse, we will allocate both arrays of IDs to be placed into the
+	 * 'uids' and 'gids' members, sort them, and discard the tail queues
+	 * used to build them.  This conversion to sorted arrays at end of parse
+	 * allows to minimize memory allocations and enables searching IDs in
+	 * O(log(n)) instead of linearly.
+	 */
+	to_list = strsep_noblanks(&rule, ",");
+	if (to_list == NULL) {
+		alloc_parse_error(parse_error, 0, "No target list.");
+		goto einval;
+	}
+	do {
+		error = parse_target_clause(to_list, new, &uid_list, &gid_list,
+		    parse_error);
+		if (error != 0) {
+			(*parse_error)->pos += to_list - start;
+			goto einval;
+		}
+
+		to_list = strsep_noblanks(&rule, ",");
+	} while (to_list != NULL);
+
+	if (new->uids_nb != 0) {
+		new->uids = malloc(sizeof(*new->uids) * new->uids_nb, M_DO,
+		    M_WAITOK);
+		error = pour_list_into_rule(IT_UID, &uid_list, new->uids,
+		    &new->uids_nb, parse_error);
+		if (error != 0)
+			goto einval;
+	}
+	MPASS(STAILQ_EMPTY(&uid_list));
+	if (!has_clauses(new->uids_nb, new->uid_flags)) {
+		/* No UID specified, default is "uid=.". */
+		MPASS(new->uid_flags == 0);
+		new->uid_flags = MDF_CURRENT;
+		check_type_and_type_flags(IT_UID, new->uid_flags);
+	}
+
+	if (new->gids_nb != 0) {
+		new->gids = malloc(sizeof(*new->gids) * new->gids_nb, M_DO,
+		    M_WAITOK);
+		error = pour_list_into_rule(IT_GID, &gid_list, new->gids,
+		    &new->gids_nb, parse_error);
+		if (error != 0)
+			goto einval;
+	}
+	MPASS(STAILQ_EMPTY(&gid_list));
+	if (!has_clauses(new->gids_nb, new->gid_flags)) {
+		/* No GID specified, default is "gid=.,gid=!.". */
+		MPASS(new->gid_flags == 0);
+		new->gid_flags |= MDF_CURRENT | MDF_PRIMARY | MDF_SUPP_MUST |
+		    MDF_HAS_PRIMARY_CLAUSE | MDF_HAS_SUPP_CLAUSE;
+		check_type_and_type_flags(IT_GID, new->gid_flags);
+	}
+
+	STAILQ_INSERT_TAIL(&rules->head, new, r_entries);
+	return (0);
+
+einval:
+	free(new->gids, M_DO);
+	free(new->uids, M_DO);
+	free(new, M_DO);
+	STAILQ_FOREACH_SAFE(ie, &gid_list, ie_entries, ie_next)
+	    free(ie, M_DO);
+	STAILQ_FOREACH_SAFE(ie, &uid_list, ie_entries, ie_next)
+	    free(ie, M_DO);
+	MPASS(*parse_error != NULL);
+	return (EINVAL);
+}
+
+/*
+ * Parse rules specification and produce rule structures out of it.
+ *
+ * Returns 0 on success, with '*rulesp' made to point to a 'struct rule'
+ * representing the rules.  On error, the returned value is non-zero and
+ * '*rulesp' is unchanged.  If 'string' has length greater or equal to
+ * MAC_RULE_STRING_LEN, ENAMETOOLONG is returned.  If it is not in the expected
+ * format, EINVAL is returned.  If an error is returned, '*parse_error' is set
+ * to point to a 'struct parse_error' giving an error message for the problem,
+ * else '*parse_error' is set to NULL.
+ *
+ * Expected format: A semi-colon-separated list of rules of the form
+ * "<from>:<target>".  The <from> part is of the form "<type>=<id>" where <type>
+ * is "uid" or "gid", <id> an UID or GID (depending on <type>) and <target> is
+ * "*", "any" or a comma-separated list of '<type>=<flags><id>' clauses (see the
+ * comment for parse_single_rule() for more details).  For convenience, empty
+ * rules are allowed (and do nothing), and spaces and tabs are allowed (and
+ * removed) around each token (tokens are natural ones, except that
+ * '<flags><id>' as a whole is considered a single token, so no blanks are
+ * allowed between '<flags>' and '<id>').
+ *
+ * Examples:
+ * - "uid=1001:uid=1010,gid=1010;uid=1002:any"
+ * - "gid=1010:gid=1011,gid=1012,gid=1013"
+ */
+static int
+parse_rules(const char *const string, struct rules **const rulesp,
+    struct parse_error **const parse_error)
+{
+	const size_t len = strlen(string);
+	char *copy, *p, *rule;
+	struct rules *rules;
+	int error = 0;
+
+	*parse_error = NULL;
+
+	if (len >= MAC_RULE_STRING_LEN) {
+		alloc_parse_error(parse_error, 0,
+		    "Rule specification string is too long (%zu, max %zu)",
+		    len, MAC_RULE_STRING_LEN - 1);
+		return (ENAMETOOLONG);
+	}
+
+	rules = alloc_rules();
+	bcopy(string, rules->string, len + 1);
+	MPASS(rules->string[len] == '\0'); /* Catch some races. */
+
+	copy = malloc(len + 1, M_DO, M_WAITOK);
+	bcopy(string, copy, len + 1);
+	MPASS(copy[len] == '\0'); /* Catch some races. */
+
+	p = copy;
+	while ((rule = strsep_noblanks(&p, ";")) != NULL) {
+		if (rule[0] == '\0')
+			continue;
+		error = parse_single_rule(rule, rules, parse_error);
+		if (error != 0) {
+			(*parse_error)->pos += rule - copy;
+			toast_rules(rules);
+			goto out;
+		}
+	}
+
+	*rulesp = rules;
 out:
-	free(new_string, M_DO);
+	free(copy, M_DO);
+	return (error);
+}
+
+/*
+ * Find rules applicable to the passed prison.
+ *
+ * Returns the applicable rules (and never NULL).  'pr' must be unlocked.
+ * 'aprp' is set to the (ancestor) prison holding these, and it must be unlocked
+ * once the caller is done accessing the rules.  '*aprp' is equal to 'pr' if and
+ * only if the current jail has its own set of rules.
+ */
+static struct rules *
+find_rules(struct prison *const pr, struct prison **const aprp)
+{
+	struct prison *cpr, *ppr;
+	struct rules *rules;
+
+	cpr = pr;
+	for (;;) {
+		prison_lock(cpr);
+		rules = osd_jail_get(cpr, osd_jail_slot);
+		if (rules != NULL)
+			break;
+		prison_unlock(cpr);
+
+		ppr = cpr->pr_parent;
+		MPASS(ppr != NULL); /* prison0 always has rules. */
+		cpr = ppr;
+	}
+
+	*aprp = cpr;
+	return (rules);
+}
+
+static void
+hold_rules(struct rules *const rules)
+{
+	refcount_acquire(&rules->use_count);
+}
+
+static void
+drop_rules(struct rules *const rules)
+{
+	if (refcount_release(&rules->use_count))
+		toast_rules(rules);
+}
+
+#ifdef INVARIANTS
+static void
+check_rules_use_count(const struct rules *const rules, u_int expected)
+{
+	const u_int use_count = refcount_load(&rules->use_count);
+
+	if (use_count != expected)
+		panic("MAC/do: Rules at %p: Use count is %u, expected %u",
+		    rules, use_count, expected);
+}
+#else
+#define check_rules_use_count(...)
+#endif /* INVARIANTS */
+
+/*
+ * OSD destructor for slot 'osd_jail_slot'.
+ *
+ * Called with 'value' not NULL.  We have arranged that it is only ever called
+ * when the corresponding jail goes down or at module unload.
+ */
+static void
+dealloc_jail_osd(void *const value)
+{
+	struct rules *const rules = value;
+
+	/*
+	 * If called because the "holding" jail goes down, no one should be
+	 * using the rules but us at this point because no threads of that jail
+	 * (or its sub-jails) should currently be executing (in particular,
+	 * currently executing setcred()).  The case of module unload is more
+	 * complex.  Although the MAC framework takes care that no hook is
+	 * called while a module is unloading, the unload could happen between
+	 * two calls to MAC hooks in the course of, e.g., executing setcred(),
+	 * where the rules' reference count has been bumped to keep them alive
+	 * even if the rules on the "holding" jail has been concurrently
+	 * changed.  These other references are held in our thread OSD slot, so
+	 * we ensure that all thread's slots are freed first in mac_do_destroy()
+	 * to be able to check that only one reference remains.
+	 */
+	check_rules_use_count(rules, 1);
+	toast_rules(rules);
+}
+
+/*
+ * Remove the rules specifically associated to a prison.
+ *
+ * In practice, this means that the rules become inherited (from the closest
+ * ascendant that has some).
+ *
+ * Destroys the 'osd_jail_slot' slot of the passed jail.
+ */
+static void
+remove_rules(struct prison *const pr)
+{
+	struct rules *old_rules;
+	int error __unused;
+
+	prison_lock(pr);
+	/*
+	 * We go to the burden of extracting rules first instead of just letting
+	 * osd_jail_del() calling dealloc_jail_osd() as we want to decrement
+	 * their use count, and possibly free them, outside of the prison lock.
+	 */
+	old_rules = osd_jail_get(pr, osd_jail_slot);
+	error = osd_jail_set(pr, osd_jail_slot, NULL);
+	/* osd_set() never fails nor allocate memory when 'value' is NULL. */
+	MPASS(error == 0);
+	/*
+	 * This completely frees the OSD slot, but doesn't call the destructor
+	 * since we've just put NULL in the slot.
+	 */
+	osd_jail_del(pr, osd_jail_slot);
+	prison_unlock(pr);
+
+	if (old_rules != NULL)
+		drop_rules(old_rules);
+}
+
+/*
+ * Assign already built rules to a jail.
+ */
+static void
+set_rules(struct prison *const pr, struct rules *const rules)
+{
+	struct rules *old_rules;
+	void **rsv;
+
+	check_rules_use_count(rules, 0);
+	hold_rules(rules);
+	rsv = osd_reserve(osd_jail_slot);
+
+	prison_lock(pr);
+	old_rules = osd_jail_get(pr, osd_jail_slot);
+	osd_jail_set_reserved(pr, osd_jail_slot, rsv, rules);
+	prison_unlock(pr);
+	if (old_rules != NULL)
+		drop_rules(old_rules);
+}
+
+/*
+ * Assigns empty rules to a jail.
+ */
+static void
+set_empty_rules(struct prison *const pr)
+{
+	struct rules *const rules = alloc_rules();
+
+	set_rules(pr, rules);
+}
+
+/*
+ * Parse a rules specification and assign them to a jail.
+ *
+ * Returns the same error code as parse_rules() (which see).
+ */
+static int
+parse_and_set_rules(struct prison *const pr, const char *rules_string,
+    struct parse_error **const parse_error)
+{
+	struct rules *rules;
+	int error;
+
+	error = parse_rules(rules_string, &rules, parse_error);
+	if (error != 0)
+		return (error);
+	set_rules(pr, rules);
+	return (0);
+}
+
+static int
+mac_do_sysctl_rules(SYSCTL_HANDLER_ARGS)
+{
+	char *const buf = malloc(MAC_RULE_STRING_LEN, M_DO, M_WAITOK);
+	struct prison *const td_pr = req->td->td_ucred->cr_prison;
+	struct prison *pr;
+	struct rules *rules;
+	struct parse_error *parse_error;
+	int error;
+
+	rules = find_rules(td_pr, &pr);
+	strlcpy(buf, rules->string, MAC_RULE_STRING_LEN);
+	prison_unlock(pr);
+
+	error = sysctl_handle_string(oidp, buf, MAC_RULE_STRING_LEN, req);
+	if (error != 0 || req->newptr == NULL)
+		goto out;
+
+	/* Set our prison's rules, not that of the jail we inherited from. */
+	error = parse_and_set_rules(td_pr, buf, &parse_error);
+	if (error != 0) {
+		if (print_parse_error)
+			printf("MAC/do: Parse error at index %zu: %s\n",
+			    parse_error->pos, parse_error->msg);
+		free_parse_error(parse_error);
+	}
+out:
+	free(buf, M_DO);
 	return (error);
 }
 
 SYSCTL_PROC(_security_mac_do, OID_AUTO, rules,
-    CTLTYPE_STRING|CTLFLAG_RW|CTLFLAG_MPSAFE,
-    0, 0, sysctl_rules, "A",
+    CTLTYPE_STRING|CTLFLAG_RW|CTLFLAG_PRISON|CTLFLAG_MPSAFE,
+    0, 0, mac_do_sysctl_rules, "A",
     "Rules");
 
-static void
-destroy(struct mac_policy_conf *mpc)
-{
-	osd_jail_deregister(mac_do_osd_jail_slot);
-	toast_rules(&rules0.head);
-}
 
-static void
-mac_do_alloc_prison(struct prison *pr, struct mac_do_rule **lrp)
-{
-	struct prison *ppr;
-	struct mac_do_rule *rules, *new_rules;
-	void **rsv;
-
-	rules = mac_do_rule_find(pr, &ppr);
-	if (ppr == pr)
-		goto done;
-
-	mtx_unlock(&ppr->pr_mtx);
-	new_rules = malloc(sizeof(*new_rules), M_PRISON, M_WAITOK|M_ZERO);
-	rsv = osd_reserve(mac_do_osd_jail_slot);
-	rules = mac_do_rule_find(pr, &ppr);
-	if (ppr == pr) {
-		free(new_rules, M_PRISON);
-		osd_free_reserved(rsv);
-		goto done;
-	}
-	mtx_lock(&pr->pr_mtx);
-	osd_jail_set_reserved(pr, mac_do_osd_jail_slot, rsv, new_rules);
-	TAILQ_INIT(&new_rules->head);
-done:
-	if (lrp != NULL)
-		*lrp = rules;
-	mtx_unlock(&pr->pr_mtx);
-	mtx_unlock(&ppr->pr_mtx);
-}
-
-static void
-mac_do_dealloc_prison(void *data)
-{
-	struct mac_do_rule *r = data;
-
-	toast_rules(&r->head);
-}
-
-static int
-mac_do_prison_set(void *obj, void *data)
-{
-	struct prison *pr = obj;
-	struct vfsoptlist *opts = data;
-	struct rulehead head, saved_head;
-	struct mac_do_rule *rules;
-	char *rules_string, *copy_string;
-	int error, jsys, len;
-
-	error = vfs_copyopt(opts, "mdo", &jsys, sizeof(jsys));
-	if (error == ENOENT)
-		jsys = -1;
-	error = vfs_getopt(opts, "mdo.rules", (void **)&rules_string, &len);
-	if (error == ENOENT)
-		rules = NULL;
-	else
-		jsys = JAIL_SYS_NEW;
-	switch (jsys) {
-	case JAIL_SYS_INHERIT:
-		mtx_lock(&pr->pr_mtx);
-		osd_jail_del(pr, mac_do_osd_jail_slot);
-		mtx_unlock(&pr->pr_mtx);
-		break;
-	case JAIL_SYS_NEW:
-		mac_do_alloc_prison(pr, &rules);
-		if (rules_string == NULL)
-			break;
-		copy_string = strdup(rules_string, M_DO);
-		TAILQ_INIT(&head);
-		error = parse_rules(copy_string, &head);
-		free(copy_string, M_DO);
-		if (error)
-			return (1);
-		TAILQ_INIT(&saved_head);
-		mtx_lock(&pr->pr_mtx);
-		TAILQ_CONCAT(&saved_head, &rules->head, r_entries);
-		TAILQ_CONCAT(&rules->head, &head, r_entries);
-		strlcpy(rules->string, rules_string, MAC_RULE_STRING_LEN);
-		mtx_unlock(&pr->pr_mtx);
-		toast_rules(&saved_head);
-		break;
-	}
-	return (0);
-}
-
-SYSCTL_JAIL_PARAM_SYS_NODE(mdo, CTLFLAG_RW, "Jail MAC/do parameters");
-SYSCTL_JAIL_PARAM_STRING(_mdo, rules, CTLFLAG_RW, MAC_RULE_STRING_LEN,
+SYSCTL_JAIL_PARAM_SYS_SUBNODE(mac, do, CTLFLAG_RW, "Jail MAC/do parameters");
+SYSCTL_JAIL_PARAM_STRING(_mac_do, rules, CTLFLAG_RW, MAC_RULE_STRING_LEN,
     "Jail MAC/do rules");
 
+
 static int
-mac_do_prison_get(void *obj, void *data)
+mac_do_jail_create(void *obj, void *data __unused)
 {
-	struct prison *ppr, *pr = obj;
-	struct vfsoptlist *opts = data;
-	struct mac_do_rule *rules;
+	struct prison *const pr = obj;
+
+	set_empty_rules(pr);
+	return (0);
+}
+
+static int
+mac_do_jail_get(void *obj, void *data)
+{
+	struct prison *ppr, *const pr = obj;
+	struct vfsoptlist *const opts = data;
+	struct rules *rules;
 	int jsys, error;
 
-	rules = mac_do_rule_find(pr, &ppr);
-	error = vfs_setopt(opts, "mdo", &jsys, sizeof(jsys));
+	rules = find_rules(pr, &ppr);
+
+	jsys = pr == ppr ?
+	    (STAILQ_EMPTY(&rules->head) ? JAIL_SYS_DISABLE : JAIL_SYS_NEW) :
+	    JAIL_SYS_INHERIT;
+	error = vfs_setopt(opts, "mac.do", &jsys, sizeof(jsys));
 	if (error != 0 && error != ENOENT)
 		goto done;
-	error = vfs_setopts(opts, "mdo.rules", rules->string);
+
+	error = vfs_setopts(opts, "mac.do.rules", rules->string);
 	if (error != 0 && error != ENOENT)
 		goto done;
-	mtx_unlock(&ppr->pr_mtx);
+
 	error = 0;
 done:
-	return (0);
-}
-
-static int
-mac_do_prison_create(void *obj, void *data __unused)
-{
-	struct prison *pr = obj;
-
-	mac_do_alloc_prison(pr, NULL);
-	return (0);
-}
-
-static int
-mac_do_prison_remove(void *obj, void *data __unused)
-{
-	struct prison *pr = obj;
-	struct mac_do_rule *r;
-
-	mtx_lock(&pr->pr_mtx);
-	r = osd_jail_get(pr, mac_do_osd_jail_slot);
-	mtx_unlock(&pr->pr_mtx);
-	toast_rules(&r->head);
-	return (0);
-}
-
-static int
-mac_do_prison_check(void *obj, void *data)
-{
-	struct vfsoptlist *opts = data;
-	char *rules_string;
-	int error, jsys, len;
-
-	error = vfs_copyopt(opts, "mdo", &jsys, sizeof(jsys));
-	if (error != ENOENT) {
-		if (error != 0)
-			return (error);
-		if (jsys != JAIL_SYS_NEW && jsys != JAIL_SYS_INHERIT)
-			return (EINVAL);
-	}
-	error = vfs_getopt(opts, "mdo.rules", (void **)&rules_string, &len);
-	if (error != ENOENT) {
-		if (error != 0)
-			return (error);
-		if (len > MAC_RULE_STRING_LEN) {
-			vfs_opterror(opts, "mdo.rules too long");
-			return (ENAMETOOLONG);
-		}
-	}
-	if (error == ENOENT)
-		error = 0;
+	prison_unlock(ppr);
 	return (error);
 }
 
-static void
-init(struct mac_policy_conf *mpc)
+/*
+ * We perform only cheap checks here, i.e., we do not really parse the rules
+ * specification string, if any.
+ */
+static int
+mac_do_jail_check(void *obj, void *data)
 {
-	static osd_method_t methods[PR_MAXMETHOD] = {
-		[PR_METHOD_CREATE] = mac_do_prison_create,
-		[PR_METHOD_GET] = mac_do_prison_get,
-		[PR_METHOD_SET] = mac_do_prison_set,
-		[PR_METHOD_CHECK] = mac_do_prison_check,
-		[PR_METHOD_REMOVE] = mac_do_prison_remove,
-	};
-	struct prison *pr;
+	struct vfsoptlist *opts = data;
+	char *rules_string;
+	int error, jsys, size;
 
-	mac_do_osd_jail_slot = osd_jail_register(mac_do_dealloc_prison, methods);
-	TAILQ_INIT(&rules0.head);
-	sx_slock(&allprison_lock);
-	TAILQ_FOREACH(pr, &allprison, pr_list)
-		mac_do_alloc_prison(pr, NULL);
-	sx_sunlock(&allprison_lock);
+	error = vfs_copyopt(opts, "mac.do", &jsys, sizeof(jsys));
+	if (error == ENOENT)
+		jsys = -1;
+	else {
+		if (error != 0)
+			return (error);
+		if (jsys != JAIL_SYS_DISABLE && jsys != JAIL_SYS_NEW &&
+		    jsys != JAIL_SYS_INHERIT)
+			return (EINVAL);
+	}
+
+	/*
+	 * We use vfs_getopt() here instead of vfs_getopts() to get the length.
+	 * We perform the additional checks done by the latter here, even if
+	 * jail_set() calls vfs_getopts() itself later (they becoming
+	 * inconsistent wouldn't cause any security problem).
+	 */
+	error = vfs_getopt(opts, "mac.do.rules", (void**)&rules_string, &size);
+	if (error == ENOENT) {
+		/*
+		 * Default (in absence of "mac.do.rules") is to disable (and, in
+		 * particular, not inherit).
+		 */
+		if (jsys == -1)
+			jsys = JAIL_SYS_DISABLE;
+
+		if (jsys == JAIL_SYS_NEW) {
+			vfs_opterror(opts, "'mac.do.rules' must be specified "
+			    "given 'mac.do''s value");
+			return (EINVAL);
+		}
+
+		/* Absence of "mac.do.rules" at this point is OK. */
+		error = 0;
+	} else {
+		if (error != 0)
+			return (error);
+
+		/* Not a proper string. */
+		if (size == 0 || rules_string[size - 1] != '\0') {
+			vfs_opterror(opts, "'mac.do.rules' not a proper string");
+			return (EINVAL);
+		}
+
+		if (size > MAC_RULE_STRING_LEN) {
+			vfs_opterror(opts, "'mdo.rules' too long");
+			return (ENAMETOOLONG);
+		}
+
+		if (jsys == -1)
+			/* Default (if "mac.do.rules" is present). */
+			jsys = rules_string[0] == '\0' ? JAIL_SYS_DISABLE :
+			    JAIL_SYS_NEW;
+
+		/*
+		 * Be liberal and accept JAIL_SYS_DISABLE and JAIL_SYS_INHERIT
+		 * with an explicit empty rules specification.
+		 */
+		switch (jsys) {
+		case JAIL_SYS_DISABLE:
+		case JAIL_SYS_INHERIT:
+			if (rules_string[0] != '\0') {
+				vfs_opterror(opts, "'mac.do.rules' specified "
+				    "but should not given 'mac.do''s value");
+				return (EINVAL);
+			}
+			break;
+		}
+	}
+
+	return (error);
+}
+
+static int
+mac_do_jail_set(void *obj, void *data)
+{
+	struct prison *pr = obj;
+	struct vfsoptlist *opts = data;
+	char *rules_string;
+	struct parse_error *parse_error;
+	int error, jsys;
+
+	/*
+	 * The invariants checks used below correspond to what has already been
+	 * checked in jail_check() above.
+	 */
+
+	error = vfs_copyopt(opts, "mac.do", &jsys, sizeof(jsys));
+	MPASS(error == 0 || error == ENOENT);
+	if (error != 0)
+		jsys = -1; /* Mark unfilled. */
+
+	rules_string = vfs_getopts(opts, "mac.do.rules", &error);
+	MPASS(error == 0 || error == ENOENT);
+	if (error == 0) {
+		MPASS(strlen(rules_string) < MAC_RULE_STRING_LEN);
+		if (jsys == -1)
+			/* Default (if "mac.do.rules" is present). */
+			jsys = rules_string[0] == '\0' ? JAIL_SYS_DISABLE :
+			    JAIL_SYS_NEW;
+		else
+			MPASS(jsys == JAIL_SYS_NEW ||
+			    ((jsys == JAIL_SYS_DISABLE ||
+			    jsys == JAIL_SYS_INHERIT) &&
+			    rules_string[0] == '\0'));
+	} else {
+		MPASS(jsys != JAIL_SYS_NEW);
+		if (jsys == -1)
+			/*
+			 * Default (in absence of "mac.do.rules") is to disable
+			 * (and, in particular, not inherit).
+			 */
+			jsys = JAIL_SYS_DISABLE;
+		/* If disabled, we'll store an empty rule specification. */
+		if (jsys == JAIL_SYS_DISABLE)
+			rules_string = "";
+	}
+
+	switch (jsys) {
+	case JAIL_SYS_INHERIT:
+		remove_rules(pr);
+		error = 0;
+		break;
+	case JAIL_SYS_DISABLE:
+	case JAIL_SYS_NEW:
+		error = parse_and_set_rules(pr, rules_string, &parse_error);
+		if (error != 0) {
+			vfs_opterror(opts,
+			    "MAC/do: Parse error at index %zu: %s\n",
+			    parse_error->pos, parse_error->msg);
+			free_parse_error(parse_error);
+		}
+		break;
+	default:
+		__assert_unreachable();
+	}
+	return (error);
+}
+
+/*
+ * OSD jail methods.
+ *
+ * There is no PR_METHOD_REMOVE, as OSD storage is destroyed by the common jail
+ * code (see prison_cleanup()), which triggers a run of our dealloc_jail_osd()
+ * destructor.
+ */
+static const osd_method_t osd_methods[PR_MAXMETHOD] = {
+	[PR_METHOD_CREATE] = mac_do_jail_create,
+	[PR_METHOD_GET] = mac_do_jail_get,
+	[PR_METHOD_CHECK] = mac_do_jail_check,
+	[PR_METHOD_SET] = mac_do_jail_set,
+};
+
+
+/*
+ * Common header structure.
+ *
+ * Each structure that is used to pass information between some MAC check
+ * function and priv_grant() must start with this header.
+ */
+struct mac_do_data_header {
+	/* Size of the allocated buffer holding the containing structure. */
+	size_t		 allocated_size;
+	/* Full size of the containing structure. */
+	size_t		 size;
+	/*
+	 * For convenience, we use privilege numbers as an identifier for the
+	 * containing structure's type, since there is one distinct privilege
+	 * for each privilege changing function we are supporting.  0 in 'priv'
+	 * indicates this header is uninitialized.
+	 */
+	int		 priv;
+	/* Rules to apply. */
+	struct rules	*rules;
+};
+
+/*
+ * The case of unusable or absent per-thread data can actually happen as nothing
+ * prevents, e.g., priv_check*() with privilege 'priv' to be called standalone,
+ * as it is currently by, e.g., the Linux emulator for PRIV_CRED_SETUID.  We
+ * interpret such calls to priv_check*() as full, unrestricted requests for
+ * 'priv', contrary to what we're doing here for selected operations, and
+ * consequently will not grant the requested privilege.
+ *
+ * Also, we protect ourselves from a concurrent change of 'do_enabled' while
+ * a call to setcred() is in progress by storing the rules per-thread
+ * which is then consulted by each successive hook so that they all have
+ * a coherent view of the specifications, and we empty the slot (actually, mark
+ * it as empty) when MAC/do is disabled.
+ */
+static int
+check_data_usable(const void *const data, const size_t size, const int priv)
+{
+	const struct mac_do_data_header *const hdr = data;
+
+	if (hdr == NULL || hdr->priv == 0)
+		return (ENOENT);
+	/*
+	 * Impacting changes in the protocols we are based on...  Don't crash in
+	 * production.
+	 */
+	if (hdr->priv != priv) {
+		MPASS(hdr->priv == priv);
+		return (EBUSY);
+	}
+	MPASS(hdr->size == size);
+	MPASS(hdr->size <= hdr->allocated_size);
+	return (0);
+}
+
+static void
+clear_data(void *const data)
+{
+	struct mac_do_data_header *const hdr = data;
+
+	if (hdr != NULL) {
+		drop_rules(hdr->rules);
+		/* We don't deallocate so as to save time on next access. */
+		hdr->priv = 0;
+	}
+}
+
+static void *
+fetch_data(void)
+{
+	return (osd_thread_get_unlocked(curthread, osd_thread_slot));
 }
 
 static bool
-rule_is_valid(struct ucred *cred, struct rule *r)
+is_data_reusable(const void *const data, const size_t size)
 {
-	if (r->from_type == RULE_UID && r->f_uid == cred->cr_uid)
+	const struct mac_do_data_header *const hdr = data;
+
+	return (hdr != NULL && size <= hdr->allocated_size);
+}
+
+static void
+set_data_header(void *const data, const size_t size, const int priv,
+    struct rules *const rules)
+{
+	struct mac_do_data_header *const hdr = data;
+
+	MPASS(hdr->priv == 0);
+	MPASS(priv != 0);
+	MPASS(size <= hdr->allocated_size);
+	hdr->size = size;
+	hdr->priv = priv;
+	hdr->rules = rules;
+}
+
+/* The proc lock (and any other non-sleepable lock) must not be held. */
+static void *
+alloc_data(void *const data, const size_t size)
+{
+	struct mac_do_data_header *const hdr = realloc(data, size, M_DO,
+	    M_WAITOK);
+
+	MPASS(size >= sizeof(struct mac_do_data_header));
+	hdr->allocated_size = size;
+	hdr->priv = 0;
+	if (hdr != data) {
+		/*
+		 * This call either reuses the existing memory allocated for the
+		 * slot or tries to allocate some without blocking.
+		 */
+		int error = osd_thread_set(curthread, osd_thread_slot, hdr);
+
+		if (error != 0) {
+			/* Going to make a M_WAITOK allocation. */
+			void **const rsv = osd_reserve(osd_thread_slot);
+
+			error = osd_thread_set_reserved(curthread,
+			    osd_thread_slot, rsv, hdr);
+			MPASS(error == 0);
+		}
+	}
+	return (hdr);
+}
+
+/* Destructor for 'osd_thread_slot'. */
+static void
+dealloc_thread_osd(void *const value)
+{
+	free(value, M_DO);
+}
+
+/*
+ * Whether to grant access to some primary group according to flags.
+ *
+ * The passed 'flags' must be those of a rule's matching GID, or the IT_GID type
+ * flags when MDF_CURRENT has been matched.
+ *
+ * Return values:
+ * - 0:			Access granted.
+ * - EJUSTRETURN:	Flags are agnostic.
+ */
+static int
+grant_primary_group_from_flags(const flags_t flags)
+{
+	return (flags & MDF_PRIMARY ? 0 : EJUSTRETURN);
+}
+
+/*
+ * Same as grant_primary_group_from_flags(), but for supplementary groups.
+ *
+ * Return values:
+ * - 0:			Access granted.
+ * - EJUSTRETURN:	Flags are agnostic.
+ * - EPERM:		Access denied.
+ */
+static int
+grant_supplementary_group_from_flags(const flags_t flags)
+{
+	if (flags & MDF_SUPP_MASK)
+		return (flags & MDF_SUPP_DONT ? EPERM : 0);
+
+	return (EJUSTRETURN);
+}
+
+/*
+ * Check whether to grant access to supplementary groups.
+ */
+static int
+rule_grant_supplementary_groups(const struct rule *const rule,
+    const struct ucred *const old_cred, const struct ucred *const new_cred)
+{
+	const int nb_groups = new_cred->cr_ngroups - 1;
+	const gid_t *const groups = new_cred->cr_groups + 1;
+	int error;
+	id_nb_t r_idx = 0;
+
+	if (rule->gid_flags & MDF_ANY_SUPP)
+		/*
+		 * All supplementary groups will be accepted, no need to loop
+		 * over them.
+		 */
+		return (0);
+
+	for (int i = 0; i < nb_groups; ++i) {
+		const int gid = groups[i];
+
+		/* Was MDF_CURRENT specified, and is 'gid' a current GID? */
+		if ((rule->gid_flags & MDF_CURRENT) &&
+		    group_is_supplementary(gid, old_cred)) {
+			error = grant_supplementary_group_from_flags
+			    (rule->gid_flags);
+			if (error == 0)
+				continue;
+			/*
+			 * MDF_SUPP_DONT must have been handled in the relevant
+			 * check_*() function (or these functions weren't
+			 * executed at all (e.g., Linux emulation), and we
+			 * shouldn't get this deep).
+			 */
+			MPASS(error == EJUSTRETURN);
+		}
+
+		/*
+		 * Search by GID for a corresponding 'struct id_spec'.
+		 *
+		 * As both the new credentials and the groups in the rule are
+		 * sorted, we just need to browse the rules' groups only once
+		 * for each one.
+		 */
+		for (; r_idx < rule->gids_nb; ++r_idx) {
+			const int r_gid = rule->gids[r_idx].id;
+
+			if (gid == r_gid) {
+				error = grant_supplementary_group_from_flags
+				    (rule->gids[r_idx].flags);
+				if (error == 0)
+					goto next_group;
+				/* Same as above. */
+				MPASS(error == EJUSTRETURN);
+				break;
+			} else if (gid < r_gid)
+				break;
+		}
+
+		/* 'gid' wasn't accepted. */
+		return (EPERM);
+next_group:
+		;
+	}
+
+	return (0);
+}
+
+static int
+rule_grant_primary_group(const struct rule *const rule,
+    const struct ucred *const old_cred, const gid_t gid)
+{
+	struct id_spec gid_is = {};
+	const struct id_spec *found_is;
+	int error;
+
+	if (rule->gid_flags & MDF_ANY)
+		return (0);
+
+	/* Was MDF_CURRENT specified, and is 'gid' a current GID? */
+	if ((rule->gid_flags & MDF_CURRENT) &&
+	    group_is_primary(gid, old_cred)) {
+		error = grant_primary_group_from_flags(rule->gid_flags);
+		if (error == 0)
+			return (0);
+	}
+
+	/* Search by GID for a corresponding 'struct id_spec'. */
+	gid_is.id = gid;
+	found_is = bsearch(&gid_is, rule->gids, rule->gids_nb,
+	    sizeof(*rule->gids), id_spec_cmp);
+
+	if (found_is != NULL) {
+		error = grant_primary_group_from_flags(found_is->flags);
+		if (error == 0)
+			return (0);
+	}
+
+	return (EPERM);
+}
+
+static int
+rule_grant_primary_groups(const struct rule *const rule,
+    const struct ucred *const old_cred, const struct ucred *const new_cred)
+{
+	int error;
+
+	/* Shortcut. */
+	if (rule->gid_flags & MDF_ANY)
+		return (0);
+
+	error = rule_grant_primary_group(rule, old_cred, new_cred->cr_gid);
+	if (error != 0)
+		return (error);
+	error = rule_grant_primary_group(rule, old_cred, new_cred->cr_rgid);
+	if (error != 0)
+		return (error);
+	error = rule_grant_primary_group(rule, old_cred, new_cred->cr_svgid);
+	if (error != 0)
+		return (error);
+	return (0);
+}
+
+static bool
+user_is_current(const uid_t uid, const struct ucred *const old_cred)
+{
+	return (uid == old_cred->cr_uid || uid == old_cred->cr_ruid ||
+	    uid == old_cred->cr_svuid);
+}
+
+static int
+rule_grant_user(const struct rule *const rule,
+    const struct ucred *const old_cred, const uid_t uid)
+{
+	struct id_spec uid_is = {};
+	const struct id_spec *found_is;
+
+	if (rule->uid_flags & MDF_ANY)
+		return (0);
+
+	/* Was MDF_CURRENT specified, and is 'uid' a current UID? */
+	if ((rule->uid_flags & MDF_CURRENT) &&
+	    user_is_current(uid, old_cred))
+		return (0);
+
+	/* Search by UID for a corresponding 'struct id_spec'. */
+	uid_is.id = uid;
+	found_is = bsearch(&uid_is, rule->uids, rule->uids_nb,
+	    sizeof(*rule->uids), id_spec_cmp);
+
+	if (found_is != NULL)
+		return (0);
+
+	return (EPERM);
+}
+
+static int
+rule_grant_users(const struct rule *const rule,
+    const struct ucred *const old_cred, const struct ucred *const new_cred)
+{
+	int error;
+
+	/* Shortcut. */
+	if (rule->uid_flags & MDF_ANY)
+		return (0);
+
+	error = rule_grant_user(rule, old_cred, new_cred->cr_uid);
+	if (error != 0)
+		return (error);
+	error = rule_grant_user(rule, old_cred, new_cred->cr_ruid);
+	if (error != 0)
+		return (error);
+	error = rule_grant_user(rule, old_cred, new_cred->cr_svuid);
+	if (error != 0)
+		return (error);
+
+	return (0);
+}
+
+static int
+rule_grant_setcred(const struct rule *const rule,
+    const struct ucred *const old_cred, const struct ucred *const new_cred)
+{
+	int error;
+
+	error = rule_grant_users(rule, old_cred, new_cred);
+	if (error != 0)
+		return (error);
+	error = rule_grant_primary_groups(rule, old_cred, new_cred);
+	if (error != 0)
+		return (error);
+	error = rule_grant_supplementary_groups(rule, old_cred, new_cred);
+	if (error != 0)
+		return (error);
+
+	return (0);
+}
+
+static bool
+rule_applies(const struct rule *const rule, const struct ucred *const cred)
+{
+	if (rule->from_type == IT_UID && rule->from_id == cred->cr_uid)
 		return (true);
-	if (r->from_type == RULE_GID && groupmember(r->f_gid, cred))
+	if (rule->from_type == IT_GID && groupmember(rule->from_id, cred))
 		return (true);
 	return (false);
 }
 
-static int
-priv_grant(struct ucred *cred, int priv)
-{
-	struct rule *r;
-	struct prison *pr;
-	struct mac_do_rule *rule;
-
-	if (do_enabled == 0)
-		return (EPERM);
-
-	rule = mac_do_rule_find(cred->cr_prison, &pr);
-	TAILQ_FOREACH(r, &rule->head, r_entries) {
-		if (rule_is_valid(cred, r)) {
-			switch (priv) {
-			case PRIV_CRED_SETGROUPS:
-			case PRIV_CRED_SETUID:
-				mtx_unlock(&pr->pr_mtx);
-				return (0);
-			default:
-				break;
-			}
-		}
-	}
-	mtx_unlock(&pr->pr_mtx);
-	return (EPERM);
-}
+/*
+ * To pass data between check_setcred() and priv_grant() (on PRIV_CRED_SETCRED).
+ */
+struct mac_do_setcred_data {
+	struct mac_do_data_header hdr;
+	const struct ucred *new_cred;
+	u_int setcred_flags;
+};
 
 static int
-check_setgroups(struct ucred *cred, int ngrp, gid_t *groups)
+mac_do_priv_grant(struct ucred *cred, int priv)
 {
-	struct rule *r;
-	char *fullpath = NULL;
-	char *freebuf = NULL;
-	struct prison *pr;
-	struct mac_do_rule *rule;
-
-	if (do_enabled == 0)
-		return (0);
-	if (cred->cr_uid == 0)
-		return (0);
-
-	if (vn_fullpath(curproc->p_textvp, &fullpath, &freebuf) != 0)
-		return (EPERM);
-	if (strcmp(fullpath, "/usr/bin/mdo") != 0) {
-		free(freebuf, M_TEMP);
-		return (EPERM);
-	}
-	free(freebuf, M_TEMP);
-
-	rule = mac_do_rule_find(cred->cr_prison, &pr);
-	TAILQ_FOREACH(r, &rule->head, r_entries) {
-		if (rule_is_valid(cred, r)) {
-			mtx_unlock(&pr->pr_mtx);
-			return (0);
-		}
-	}
-	mtx_unlock(&pr->pr_mtx);
-
-	return (EPERM);
-}
-
-static int
-check_setuid(struct ucred *cred, uid_t uid)
-{
-	struct rule *r;
+	struct mac_do_setcred_data *const data = fetch_data();
+	const struct rules *rules;
+	const struct ucred *new_cred;
+	const struct rule *rule;
+	u_int setcred_flags;
 	int error;
-	char *fullpath = NULL;
-	char *freebuf = NULL;
-	struct prison *pr;
-	struct mac_do_rule *rule;
 
-	if (do_enabled == 0)
-		return (0);
-	if (cred->cr_uid == uid || cred->cr_uid == 0 || cred->cr_ruid == 0)
-		return (0);
-
-	if (vn_fullpath(curproc->p_textvp, &fullpath, &freebuf) != 0)
+	/* Bail out fast if we aren't concerned. */
+	if (priv != PRIV_CRED_SETCRED)
 		return (EPERM);
-	if (strcmp(fullpath, "/usr/bin/mdo") != 0) {
-		free(freebuf, M_TEMP);
-		return (EPERM);
-	}
-	free(freebuf, M_TEMP);
 
+	/*
+	 * Do we have to do something?
+	 */
+	if (check_data_usable(data, sizeof(*data), priv) != 0)
+		/* No. */
+		return (EPERM);
+
+	rules = data->hdr.rules;
+	new_cred = data->new_cred;
+	KASSERT(new_cred != NULL,
+	    ("priv_check*() called before mac_cred_check_setcred()"));
+	setcred_flags = data->setcred_flags;
+
+	/*
+	 * Rule out new versions of setcred() (in case, e.g., they introduce new
+	 * flags), as well as MAC label changes as we do not (currently) support
+	 * validating such changes.  This may be improved by actually checking
+	 * whether the requested label and the current one would differ.  For
+	 * now, we just check for SETCREDF_MAC_LABEL's presence.
+	 */
+	if (SETCREDF_TO_VERSION(setcred_flags) != 0 ||
+	    (setcred_flags & SETCREDF_MAC_LABEL) != 0)
+		return (EPERM);
+
+	/*
+	 * Browse rules, and for those that match the requestor, call specific
+	 * privilege granting functions interpreting the "to"/"target" part.
+	 */
 	error = EPERM;
-	rule = mac_do_rule_find(cred->cr_prison, &pr);
-	TAILQ_FOREACH(r, &rule->head, r_entries) {
-		if (r->from_type == RULE_UID) {
-			if (cred->cr_uid != r->f_uid)
-				continue;
-			if (r->to_type == RULE_ANY) {
-				error = 0;
-				break;
-			}
-			if (r->to_type == RULE_UID && uid == r->t_uid) {
-				error = 0;
-				break;
-			}
-		}
-		if (r->from_type == RULE_GID) {
-			if (!groupmember(r->f_gid, cred))
-				continue;
-			if (r->to_type == RULE_ANY) {
-				error = 0;
-				break;
-			}
-			if (r->to_type == RULE_UID && uid == r->t_uid) {
-				error = 0;
-				break;
-			}
-		}
-	}
-	mtx_unlock(&pr->pr_mtx);
+	STAILQ_FOREACH(rule, &rules->head, r_entries)
+	    if (rule_applies(rule, cred)) {
+		    error = rule_grant_setcred(rule, cred, new_cred);
+		    if (error != EPERM)
+			    break;
+	    }
+
 	return (error);
 }
 
+static int
+rule_check_set_supplementary_groups(const struct rule *const rule,
+    const struct ucred *const old_cred, const struct ucred *const new_cred)
+{
+	const flags_t gid_flags = rule->gid_flags;
+
+	if (gid_flags & MDF_MAY_REJ_SUPP) {
+		const bool has_current_rej = (gid_flags & MDF_CURRENT) &&
+		    ((gid_flags & MDF_SUPP_MUST) || (gid_flags & MDF_SUPP_DONT));
+		int o_idx, n_idx, r_idx;
+
+		o_idx = r_idx = 1;
+		for (n_idx = 1; n_idx < new_cred->cr_ngroups; ++n_idx) {
+			const int gid = new_cred->cr_groups[n_idx];
+
+			if (has_current_rej) {
+				/*
+				 * Linear search, as both supplementary groups
+				 * arrays are sorted.
+				 */
+				for (; o_idx < old_cred->cr_ngroups; ++o_idx) {
+					const gid_t o_gid =
+					    old_cred->cr_groups[o_idx];
+
+					if (o_gid < gid) {
+						if (gid_flags & MDF_SUPP_MUST)
+							return (EPERM);
+					} else if (o_gid == gid) {
+						if (gid_flags & MDF_SUPP_DONT)
+							return (EPERM);
+					}
+					else
+						break;
+				}
+			}
+
+			/*
+			 * Linear search, as both the supplementary groups
+			 * array and the GIDs in the rule are sorted.
+			 */
+			for (; r_idx < rule->gids_nb; ++r_idx) {
+				const struct id_spec *const is =
+				    &rule->gids[r_idx];
+
+				if (is->id < gid) {
+					if (is->flags & MDF_SUPP_MUST)
+						/* Not present but mandatory. */
+						return (EPERM);
+				} else if (is->id == gid) {
+					if (is->flags & MDF_SUPP_DONT)
+						/* Present but forbidden. */
+						return (EPERM);
+				} else
+					break;
+			}
+		}
+
+		/*
+		 * If we must have all current groups and we didn't browse all
+		 * of them at this point (because the remaining ones have GIDs
+		 * greater than the last requested group), we are simply missing
+		 * them.
+		 */
+		if ((gid_flags & MDF_CURRENT) && (gid_flags & MDF_SUPP_MUST) &&
+		    o_idx < old_cred->cr_ngroups)
+			return (EPERM);
+		/*
+		 * Similarly, we have to finish browsing all GIDs from the rule
+		 * in case some are marked mandatory.
+		 */
+		for (; r_idx < rule->gids_nb; ++r_idx) {
+			const struct id_spec *const is = &rule->gids[r_idx];
+
+			if (is->flags & MDF_SUPP_MUST)
+				return (EPERM);
+		}
+	}
+
+	return (0);
+}
+
+static int
+check_proc(void)
+{
+	char *path, *to_free;
+	int error;
+
+	/*
+	 * Only grant privileges if requested by the right executable.
+	 *
+	 * XXXOC: We may want to base this check on a tunable path and/or
+	 * a specific MAC label.  Going even further, e.g., envisioning to
+	 * completely replace the path check with the latter, we would need to
+	 * install FreeBSD on a FS with multilabel enabled by default, which in
+	 * practice entails adding an option to ZFS to set MNT_MULTILABEL
+	 * automatically on mounts, ensuring that root (and more if using
+	 * different partitions) ZFS or UFS filesystems are created with
+	 * multilabel turned on, and having the installation procedure support
+	 * setting a MAC label per file (perhaps via additions to mtree(1)).  So
+	 * this probably isn't going to happen overnight, if ever.
+	 */
+	if (vn_fullpath(curproc->p_textvp, &path, &to_free) != 0)
+		return (EPERM);
+	error = strcmp(path, "/usr/bin/mdo") == 0 ? 0 : EPERM;
+	free(to_free, M_TEMP);
+	return (error);
+}
+
+static void
+mac_do_setcred_enter(void)
+{
+	struct rules *rules;
+	struct prison *pr;
+	struct mac_do_setcred_data * data;
+	int error;
+
+	/*
+	 * If not enabled, don't prepare data.  Other hooks will check for that
+	 * to know if they have to do something.
+	 */
+	if (do_enabled == 0)
+		return;
+
+	/*
+	 * MAC/do only applies to a process launched from a given executable.
+	 * For other processes, we just won't intervene (we don't deny requests,
+	 * nor do we grant privileges to them).
+	 */
+	error = check_proc();
+	if (error != 0)
+		return;
+
+	/*
+	 * Find the currently applicable rules.
+	 */
+	rules = find_rules(curproc->p_ucred->cr_prison, &pr);
+	hold_rules(rules);
+	prison_unlock(pr);
+
+	/*
+	 * Setup thread data to be used by other hooks.
+	 */
+	data = fetch_data();
+	if (!is_data_reusable(data, sizeof(*data)))
+		data = alloc_data(data, sizeof(*data));
+	set_data_header(data, sizeof(*data), PRIV_CRED_SETCRED, rules);
+	/* Not really necessary, but helps to catch programming errors. */
+	data->new_cred = NULL;
+	data->setcred_flags = 0;
+}
+
+static int
+mac_do_check_setcred(u_int flags, const struct ucred *const old_cred,
+    struct ucred *const new_cred)
+{
+	struct mac_do_setcred_data *const data = fetch_data();
+	const struct rule *rule;
+
+	/*
+	 * Do we have to do something?
+	 */
+	if (check_data_usable(data, sizeof(*data), PRIV_CRED_SETCRED) != 0)
+		/* No. */
+		return (0);
+
+	/*
+	 * Check for (supplementary) mandatory and forbidden groups.
+	 */
+	STAILQ_FOREACH(rule, &data->hdr.rules->head, r_entries) {
+		if (rule_applies(rule, old_cred)) {
+			int error = rule_check_set_supplementary_groups(rule,
+			    old_cred, new_cred);
+
+			if (error != 0)
+				return (error);
+		}
+	}
+
+	/*
+	 * Keep track of the setcred() flags and the new credentials for
+	 * priv_check*().
+	 */
+	data->new_cred = new_cred;
+	data->setcred_flags = flags;
+
+	return (0);
+}
+
+static void
+mac_do_setcred_exit(void)
+{
+	struct mac_do_setcred_data *const data = fetch_data();
+
+	if (check_data_usable(data, sizeof(*data), PRIV_CRED_SETCRED) == 0)
+		/*
+		 * This doesn't deallocate the small per-thread data storage,
+		 * which can be reused on subsequent calls.  (That data is of
+		 * course deallocated as the current thread dies or this module
+		 * is unloaded.)
+		 */
+		clear_data(data);
+}
+
+static void
+mac_do_init(struct mac_policy_conf *mpc)
+{
+	struct prison *pr;
+
+	osd_jail_slot = osd_jail_register(dealloc_jail_osd, osd_methods);
+	set_empty_rules(&prison0);
+	sx_slock(&allprison_lock);
+	TAILQ_FOREACH(pr, &allprison, pr_list)
+	    set_empty_rules(pr);
+	sx_sunlock(&allprison_lock);
+
+	osd_thread_slot = osd_thread_register(dealloc_thread_osd);
+}
+
+static void
+mac_do_destroy(struct mac_policy_conf *mpc)
+{
+	/*
+	 * osd_thread_deregister() must be called before osd_jail_deregister(),
+	 * for the reason explained in dealloc_jail_osd().
+	 */
+	osd_thread_deregister(osd_thread_slot);
+	osd_jail_deregister(osd_jail_slot);
+}
+
 static struct mac_policy_ops do_ops = {
-	.mpo_destroy = destroy,
-	.mpo_init = init,
-	.mpo_cred_check_setuid = check_setuid,
-	.mpo_cred_check_setgroups = check_setgroups,
-	.mpo_priv_grant = priv_grant,
+	.mpo_init = mac_do_init,
+	.mpo_destroy = mac_do_destroy,
+	.mpo_cred_setcred_enter = mac_do_setcred_enter,
+	.mpo_cred_check_setcred = mac_do_check_setcred,
+	.mpo_cred_setcred_exit = mac_do_setcred_exit,
+	.mpo_priv_grant = mac_do_priv_grant,
 };
 
-MAC_POLICY_SET(&do_ops, mac_do, "MAC/do",
-   MPC_LOADTIME_FLAG_UNLOADOK, NULL);
+MAC_POLICY_SET(&do_ops, mac_do, "MAC/do", MPC_LOADTIME_FLAG_UNLOADOK, NULL);
 MODULE_VERSION(mac_do, 1);
diff --git a/sys/security/mac_stub/mac_stub.c b/sys/security/mac_stub/mac_stub.c
--- a/sys/security/mac_stub/mac_stub.c
+++ b/sys/security/mac_stub/mac_stub.c
@@ -222,6 +222,26 @@
 	return (0);
 }
 
+static void
+stub_cred_setcred_enter(void)
+{
+
+}
+
+static int
+stub_cred_check_setcred(u_int flags, const struct ucred *old_cred,
+    struct ucred *new_cred)
+{
+
+	return (0);
+}
+
+static void
+stub_cred_setcred_exit(void)
+{
+
+}
+
 static int
 stub_cred_check_setegid(struct ucred *cred, gid_t egid)
 {
@@ -1688,6 +1708,9 @@
 	.mpo_cred_check_setaudit = stub_cred_check_setaudit,
 	.mpo_cred_check_setaudit_addr = stub_cred_check_setaudit_addr,
 	.mpo_cred_check_setauid = stub_cred_check_setauid,
+	.mpo_cred_setcred_enter = stub_cred_setcred_enter,
+	.mpo_cred_check_setcred = stub_cred_check_setcred,
+	.mpo_cred_setcred_exit = stub_cred_setcred_exit,
 	.mpo_cred_check_setegid = stub_cred_check_setegid,
 	.mpo_cred_check_seteuid = stub_cred_check_seteuid,
 	.mpo_cred_check_setgid = stub_cred_check_setgid,
diff --git a/sys/security/mac_test/mac_test.c b/sys/security/mac_test/mac_test.c
--- a/sys/security/mac_test/mac_test.c
+++ b/sys/security/mac_test/mac_test.c
@@ -257,6 +257,35 @@
 	return (0);
 }
 
+COUNTER_DECL(cred_setcred_enter);
+static void
+test_cred_setcred_enter(void)
+{
+
+	COUNTER_INC(cred_setcred_enter);
+}
+
+COUNTER_DECL(cred_check_setcred);
+static int
+test_cred_check_setcred(u_int flags, const struct ucred *old_cred,
+    struct ucred *new_cred)
+{
+
+	LABEL_CHECK(old_cred->cr_label, MAGIC_CRED);
+	LABEL_CHECK(new_cred->cr_label, MAGIC_CRED);
+	COUNTER_INC(cred_check_setcred);
+
+	return (0);
+}
+
+COUNTER_DECL(cred_setcred_exit);
+static void
+test_cred_setcred_exit(void)
+{
+
+	COUNTER_INC(cred_setcred_exit);
+}
+
 COUNTER_DECL(cred_check_setegid);
 static int
 test_cred_check_setegid(struct ucred *cred, gid_t egid)
@@ -3033,6 +3062,9 @@
 	.mpo_cred_check_setaudit = test_cred_check_setaudit,
 	.mpo_cred_check_setaudit_addr = test_cred_check_setaudit_addr,
 	.mpo_cred_check_setauid = test_cred_check_setauid,
+	.mpo_cred_setcred_enter = test_cred_setcred_enter,
+	.mpo_cred_check_setcred = test_cred_check_setcred,
+	.mpo_cred_setcred_exit = test_cred_setcred_exit,
 	.mpo_cred_check_seteuid = test_cred_check_seteuid,
 	.mpo_cred_check_setegid = test_cred_check_setegid,
 	.mpo_cred_check_setgid = test_cred_check_setgid,
diff --git a/sys/sys/priv.h b/sys/sys/priv.h
--- a/sys/sys/priv.h
+++ b/sys/sys/priv.h
@@ -105,7 +105,8 @@
 #define	PRIV_CRED_SETRESGID	58	/* setresgid. */
 #define	PRIV_SEEOTHERGIDS	59	/* Exempt bsd.seeothergids. */
 #define	PRIV_SEEOTHERUIDS	60	/* Exempt bsd.seeotheruids. */
-#define	PRIV_SEEJAILPROC        61      /* Exempt from bsd.see_jail_proc. */
+#define	PRIV_SEEJAILPROC	61	/* Exempt from bsd.see_jail_proc. */
+#define	PRIV_CRED_SETCRED	62	/* setcred. */
 
 /*
  * Debugging privileges.
diff --git a/sys/sys/syscall.h b/sys/sys/syscall.h
--- a/sys/sys/syscall.h
+++ b/sys/sys/syscall.h
@@ -526,4 +526,5 @@
 #define	SYS_timerfd_settime	587
 #define	SYS_kcmp	588
 #define	SYS_getrlimitusage	589
-#define	SYS_MAXSYSCALL	590
+#define	SYS_setcred	590
+#define	SYS_MAXSYSCALL	591
diff --git a/sys/sys/syscall.mk b/sys/sys/syscall.mk
--- a/sys/sys/syscall.mk
+++ b/sys/sys/syscall.mk
@@ -432,4 +432,5 @@
 	timerfd_gettime.o \
 	timerfd_settime.o \
 	kcmp.o \
-	getrlimitusage.o
+	getrlimitusage.o \
+	setcred.o
diff --git a/sys/sys/syscallsubr.h b/sys/sys/syscallsubr.h
--- a/sys/sys/syscallsubr.h
+++ b/sys/sys/syscallsubr.h
@@ -320,6 +320,8 @@
 	    fd_set *fd_ex, struct timeval *tvp, int abi_nfdbits);
 int	kern_sendit(struct thread *td, int s, struct msghdr *mp, int flags,
 	    struct mbuf *control, enum uio_seg segflg);
+int	kern_setcred_v0(struct thread *const td, const u_int flags,
+	    struct setcred_v0 *const wcred, gid_t *preallocated_groups);
 int	kern_setgroups(struct thread *td, int *ngrpp, gid_t *groups);
 int	kern_setitimer(struct thread *, u_int, struct itimerval *,
 	    struct itimerval *);
diff --git a/sys/sys/sysproto.h b/sys/sys/sysproto.h
--- a/sys/sys/sysproto.h
+++ b/sys/sys/sysproto.h
@@ -1882,6 +1882,11 @@
 	char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
 	char res_l_[PADL_(rlim_t *)]; rlim_t * res; char res_r_[PADR_(rlim_t *)];
 };
+struct setcred_args {
+	char flags_l_[PADL_(u_int)]; u_int flags; char flags_r_[PADR_(u_int)];
+	char wcred_l_[PADL_(const void *)]; const void * wcred; char wcred_r_[PADR_(const void *)];
+	char size_l_[PADL_(size_t)]; size_t size; char size_r_[PADR_(size_t)];
+};
 int	sys_exit(struct thread *, struct exit_args *);
 int	sys_fork(struct thread *, struct fork_args *);
 int	sys_read(struct thread *, struct read_args *);
@@ -2282,6 +2287,7 @@
 int	sys_timerfd_settime(struct thread *, struct timerfd_settime_args *);
 int	sys_kcmp(struct thread *, struct kcmp_args *);
 int	sys_getrlimitusage(struct thread *, struct getrlimitusage_args *);
+int	sys_setcred(struct thread *, struct setcred_args *);
 
 #ifdef COMPAT_43
 
@@ -3262,6 +3268,7 @@
 #define	SYS_AUE_timerfd_settime	AUE_TIMERFD
 #define	SYS_AUE_kcmp	AUE_NULL
 #define	SYS_AUE_getrlimitusage	AUE_NULL
+#define	SYS_AUE_setcred	AUE_SETCRED
 
 #undef PAD_
 #undef PADL_
diff --git a/sys/sys/ucred.h b/sys/sys/ucred.h
--- a/sys/sys/ucred.h
+++ b/sys/sys/ucred.h
@@ -32,6 +32,7 @@
 #ifndef _SYS_UCRED_H_
 #define	_SYS_UCRED_H_
 
+#include <sys/types.h>
 #if defined(_KERNEL) || defined(_WANT_UCRED)
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
@@ -39,8 +40,6 @@
 #include <bsm/audit.h>
 
 #if defined(_KERNEL) || defined(_WANT_UCRED)
-struct loginclass;
-
 /*
  * Flags for cr_flags.
  */
@@ -53,6 +52,11 @@
  */
 #define	CRED_SMALLGROUPS_NB	16
 
+struct label;
+struct loginclass;
+struct prison;
+struct uidinfo;
+
 /*
  * Credentials.
  *
@@ -119,7 +123,50 @@
 /* This can be used for both ucred and xucred structures. */
 #define	cr_gid cr_groups[0]
 
+struct mac;
+/*
+ * Structure to pass as an argument to the setcred() system call.
+ */
+struct setcred_v0 {
+	uid_t	 sc_uid;		/* effective user id */
+	uid_t	 sc_ruid;		/* real user id */
+	uid_t	 sc_svuid;		/* saved user id */
+	gid_t	 sc_gid;		/* effective group id */
+	gid_t	 sc_rgid;		/* real group id */
+	gid_t	 sc_svgid;		/* saved group id */
+	int	 sc_supp_groups_nb;	/* number of supplementary groups */
+	gid_t	*sc_supp_groups;	/* supplementary groups */
+	struct mac *sc_label;		/* MAC label */
+};
+
+/*
+ * Flags to setcred().
+ *
+ * Descending order to leave room for more version bits (if ever needed).
+ */
+#define	SETCREDF_UID		(1u << 31)
+#define	SETCREDF_RUID		(1u << 30)
+#define SETCREDF_SVUID		(1u << 29)
+#define SETCREDF_GID		(1u << 28)
+#define SETCREDF_RGID		(1u << 27)
+#define SETCREDF_SVGID		(1u << 26)
+#define SETCREDF_SUPP_GROUPS	(1u << 25)
+#define SETCREDF_MAC_LABEL	(1u << 24)
+
+#define SETCREDF_FROM_VERSION(version)	(((u_int)version) & 0xFF)
+#define SETCREDF_TO_VERSION(flags)	((flags) & 0xFF)
+
 #ifdef _KERNEL
+/*
+ * Masks of the currently valid flags to setcred() (v0).  As new versions are
+ * added, they may or may not use the same flags.
+ */
+#define SETCREDF_VERSION_BITS	(0xFF)
+#define SETCREDF_SET_MASK	(SETCREDF_UID | SETCREDF_RUID | SETCREDF_SVUID | \
+    SETCREDF_GID | SETCREDF_RGID | SETCREDF_SVGID | SETCREDF_SUPP_GROUPS | \
+    SETCREDF_MAC_LABEL)
+#define SETCREDF_MASK		(SETCREDF_SET_MASK | SETCREDF_VERSION_BITS)
+
 struct proc;
 struct thread;
 
@@ -183,6 +230,13 @@
 bool	group_is_supplementary(const gid_t gid, const struct ucred *const cred);
 bool	groupmember(gid_t gid, const struct ucred *cred);
 bool	realgroupmember(gid_t gid, const struct ucred *cred);
+
+#else /* !_KERNEL */
+
+__BEGIN_DECLS
+int	setcred(u_int flags, const void *wcred, size_t size);
+__END_DECLS
+
 #endif /* _KERNEL */
 
 #endif /* !_SYS_UCRED_H_ */
diff --git a/usr.bin/mdo/mdo.c b/usr.bin/mdo/mdo.c
--- a/usr.bin/mdo/mdo.c
+++ b/usr.bin/mdo/mdo.c
@@ -5,6 +5,7 @@
  */
 
 #include <sys/limits.h>
+#include <sys/ucred.h>
 
 #include <err.h>
 #include <paths.h>
@@ -27,6 +28,8 @@
 {
 	struct passwd *pw;
 	const char *username = "root";
+	struct setcred_v0 wcred = {};
+	u_int setcred_flags = SETCREDF_FROM_VERSION(0);
 	bool uidonly = false;
 	int ch;
 
@@ -50,20 +53,45 @@
 			const char *errp = NULL;
 			uid_t uid = strtonum(username, 0, UID_MAX, &errp);
 			if (errp != NULL)
-				err(EXIT_FAILURE, "%s", errp);
+				err(EXIT_FAILURE, "invalid user ID '%s'",
+				    username);
 			pw = getpwuid(uid);
 		}
 		if (pw == NULL)
 			err(EXIT_FAILURE, "invalid username '%s'", username);
 	}
+
+	wcred.sc_uid = wcred.sc_ruid = wcred.sc_svuid = pw->pw_uid;
+	setcred_flags |= SETCREDF_UID | SETCREDF_RUID | SETCREDF_SVUID;
+
 	if (!uidonly) {
-		if (initgroups(pw->pw_name, pw->pw_gid) == -1)
-			err(EXIT_FAILURE, "failed to call initgroups");
-		if (setgid(pw->pw_gid) == -1)
-			err(EXIT_FAILURE, "failed to call setgid");
+		/*
+		 * If there are too many groups specified for some UID, setting
+		 * the groups will fail.  We preserve this condition by
+		 * allocating one more group slot than allowed, as
+		 * getgrouplist() itself is just some getter function and thus
+		 * doesn't (and shouldn't) check the limit, and to allow
+		 * setcred() to actually check for overflow.
+		 */
+		const long ngroups_alloc = sysconf(_SC_NGROUPS_MAX) + 2;
+		gid_t *const groups = malloc(sizeof(*groups) * ngroups_alloc);
+		int ngroups = ngroups_alloc;
+
+		if (groups == NULL)
+			err(EXIT_FAILURE, "cannot allocate memory for groups");
+
+		getgrouplist(pw->pw_name, pw->pw_gid, groups, &ngroups);
+
+		wcred.sc_gid = wcred.sc_rgid = wcred.sc_svgid = pw->pw_gid;
+		wcred.sc_supp_groups = groups + 1;
+		wcred.sc_supp_groups_nb = ngroups - 1;
+		setcred_flags |= SETCREDF_GID | SETCREDF_RGID | SETCREDF_SVGID |
+		    SETCREDF_SUPP_GROUPS;
 	}
-	if (setuid(pw->pw_uid) == -1)
-		err(EXIT_FAILURE, "failed to call setuid");
+
+	if (setcred(setcred_flags, &wcred, sizeof(wcred)) != 0)
+		err(EXIT_FAILURE, "calling setcred() failed");
+
 	if (*argv == NULL) {
 		const char *sh = getenv("SHELL");
 		if (sh == NULL)