diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c index 39bdcaf5ef0e..57e6024a9939 100644 --- a/sys/kern/kern_jail.c +++ b/sys/kern/kern_jail.c @@ -1,5009 +1,5010 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 1999 Poul-Henning Kamp. * Copyright (c) 2008 Bjoern A. Zeeb. * Copyright (c) 2009 James Gritton. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include "opt_ddb.h" #include "opt_inet.h" #include "opt_inet6.h" #include "opt_nfs.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #endif /* DDB */ #include #define PRISON0_HOSTUUID_MODULE "hostuuid" MALLOC_DEFINE(M_PRISON, "prison", "Prison structures"); static MALLOC_DEFINE(M_PRISON_RACCT, "prison_racct", "Prison racct structures"); /* Keep struct prison prison0 and some code in kern_jail_set() readable. */ #ifdef INET #ifdef INET6 #define _PR_IP_SADDRSEL PR_IP4_SADDRSEL|PR_IP6_SADDRSEL #else #define _PR_IP_SADDRSEL PR_IP4_SADDRSEL #endif #else /* !INET */ #ifdef INET6 #define _PR_IP_SADDRSEL PR_IP6_SADDRSEL #else #define _PR_IP_SADDRSEL 0 #endif #endif /* prison0 describes what is "real" about the system. */ struct prison prison0 = { .pr_id = 0, .pr_name = "0", .pr_ref = 1, .pr_uref = 1, .pr_path = "/", .pr_securelevel = -1, .pr_devfs_rsnum = 0, .pr_state = PRISON_STATE_ALIVE, .pr_childmax = JAIL_MAX, .pr_hostuuid = DEFAULT_HOSTUUID, .pr_children = LIST_HEAD_INITIALIZER(prison0.pr_children), #ifdef VIMAGE .pr_flags = PR_HOST|PR_VNET|_PR_IP_SADDRSEL, #else .pr_flags = PR_HOST|_PR_IP_SADDRSEL, #endif .pr_allow = PR_ALLOW_ALL_STATIC, }; MTX_SYSINIT(prison0, &prison0.pr_mtx, "jail mutex", MTX_DEF); struct bool_flags { const char *name; const char *noname; volatile u_int flag; }; struct jailsys_flags { const char *name; unsigned disable; unsigned new; }; /* allprison, allprison_racct and lastprid are protected by allprison_lock. */ struct sx allprison_lock; SX_SYSINIT(allprison_lock, &allprison_lock, "allprison"); struct prisonlist allprison = TAILQ_HEAD_INITIALIZER(allprison); LIST_HEAD(, prison_racct) allprison_racct; int lastprid = 0; static int get_next_prid(struct prison **insprp); static int do_jail_attach(struct thread *td, struct prison *pr, int drflags); static void prison_complete(void *context, int pending); static void prison_deref(struct prison *pr, int flags); static void prison_deref_kill(struct prison *pr, struct prisonlist *freeprison); static int prison_lock_xlock(struct prison *pr, int flags); static void prison_cleanup(struct prison *pr); static void prison_free_not_last(struct prison *pr); static void prison_proc_free_not_last(struct prison *pr); static void prison_proc_relink(struct prison *opr, struct prison *npr, struct proc *p); static void prison_set_allow_locked(struct prison *pr, unsigned flag, int enable); static char *prison_path(struct prison *pr1, struct prison *pr2); #ifdef RACCT static void prison_racct_attach(struct prison *pr); static void prison_racct_modify(struct prison *pr); static void prison_racct_detach(struct prison *pr); #endif /* Flags for prison_deref */ #define PD_DEREF 0x01 /* Decrement pr_ref */ #define PD_DEUREF 0x02 /* Decrement pr_uref */ #define PD_KILL 0x04 /* Remove jail, kill processes, etc */ #define PD_LOCKED 0x10 /* pr_mtx is held */ #define PD_LIST_SLOCKED 0x20 /* allprison_lock is held shared */ #define PD_LIST_XLOCKED 0x40 /* allprison_lock is held exclusive */ #define PD_OP_FLAGS 0x07 /* Operation flags */ #define PD_LOCK_FLAGS 0x70 /* Lock status flags */ /* * Parameter names corresponding to PR_* flag values. Size values are for kvm * as we cannot figure out the size of a sparse array, or an array without a * terminating entry. */ static struct bool_flags pr_flag_bool[] = { {"persist", "nopersist", PR_PERSIST}, #ifdef INET {"ip4.saddrsel", "ip4.nosaddrsel", PR_IP4_SADDRSEL}, #endif #ifdef INET6 {"ip6.saddrsel", "ip6.nosaddrsel", PR_IP6_SADDRSEL}, #endif }; const size_t pr_flag_bool_size = sizeof(pr_flag_bool); static struct jailsys_flags pr_flag_jailsys[] = { {"host", 0, PR_HOST}, #ifdef VIMAGE {"vnet", 0, PR_VNET}, #endif #ifdef INET {"ip4", PR_IP4_USER, PR_IP4_USER}, #endif #ifdef INET6 {"ip6", PR_IP6_USER, PR_IP6_USER}, #endif }; const size_t pr_flag_jailsys_size = sizeof(pr_flag_jailsys); /* * Make this array full-size so dynamic parameters can be added. * It is protected by prison0.mtx, but lockless reading is allowed * with an atomic check of the flag values. */ static struct bool_flags pr_flag_allow[NBBY * NBPW] = { {"allow.set_hostname", "allow.noset_hostname", PR_ALLOW_SET_HOSTNAME}, {"allow.sysvipc", "allow.nosysvipc", PR_ALLOW_SYSVIPC}, {"allow.raw_sockets", "allow.noraw_sockets", PR_ALLOW_RAW_SOCKETS}, {"allow.chflags", "allow.nochflags", PR_ALLOW_CHFLAGS}, {"allow.mount", "allow.nomount", PR_ALLOW_MOUNT}, {"allow.quotas", "allow.noquotas", PR_ALLOW_QUOTAS}, {"allow.socket_af", "allow.nosocket_af", PR_ALLOW_SOCKET_AF}, {"allow.mlock", "allow.nomlock", PR_ALLOW_MLOCK}, {"allow.reserved_ports", "allow.noreserved_ports", PR_ALLOW_RESERVED_PORTS}, {"allow.read_msgbuf", "allow.noread_msgbuf", PR_ALLOW_READ_MSGBUF}, {"allow.unprivileged_proc_debug", "allow.nounprivileged_proc_debug", PR_ALLOW_UNPRIV_DEBUG}, {"allow.suser", "allow.nosuser", PR_ALLOW_SUSER}, #ifdef VIMAGE {"allow.nfsd", "allow.nonfsd", PR_ALLOW_NFSD}, #endif }; static unsigned pr_allow_all = PR_ALLOW_ALL_STATIC; const size_t pr_flag_allow_size = sizeof(pr_flag_allow); #define JAIL_DEFAULT_ALLOW (PR_ALLOW_SET_HOSTNAME | \ PR_ALLOW_RESERVED_PORTS | \ PR_ALLOW_UNPRIV_DEBUG | \ PR_ALLOW_SUSER) #define JAIL_DEFAULT_ENFORCE_STATFS 2 #define JAIL_DEFAULT_DEVFS_RSNUM 0 static unsigned jail_default_allow = JAIL_DEFAULT_ALLOW; static int jail_default_enforce_statfs = JAIL_DEFAULT_ENFORCE_STATFS; static int jail_default_devfs_rsnum = JAIL_DEFAULT_DEVFS_RSNUM; #if defined(INET) || defined(INET6) static unsigned jail_max_af_ips = 255; #endif /* * Initialize the parts of prison0 that can't be static-initialized with * constants. This is called from proc0_init() after creating thread0 cpuset. */ void prison0_init(void) { uint8_t *file, *data; size_t size; char buf[sizeof(prison0.pr_hostuuid)]; bool valid; prison0.pr_cpuset = cpuset_ref(thread0.td_cpuset); prison0.pr_osreldate = osreldate; strlcpy(prison0.pr_osrelease, osrelease, sizeof(prison0.pr_osrelease)); /* If we have a preloaded hostuuid, use it. */ file = preload_search_by_type(PRISON0_HOSTUUID_MODULE); if (file != NULL) { data = preload_fetch_addr(file); size = preload_fetch_size(file); if (data != NULL) { /* * The preloaded data may include trailing whitespace, almost * certainly a newline; skip over any whitespace or * non-printable characters to be safe. */ while (size > 0 && data[size - 1] <= 0x20) { size--; } valid = false; /* * Not NUL-terminated when passed from loader, but * validate_uuid requires that due to using sscanf (as * does the subsequent strlcpy, since it still reads * past the given size to return the true length); * bounce to a temporary buffer to fix. */ if (size >= sizeof(buf)) goto done; memcpy(buf, data, size); buf[size] = '\0'; if (validate_uuid(buf, size, NULL, 0) != 0) goto done; valid = true; (void)strlcpy(prison0.pr_hostuuid, buf, sizeof(prison0.pr_hostuuid)); done: if (bootverbose && !valid) { printf("hostuuid: preload data malformed: '%.*s'\n", (int)size, data); } } } if (bootverbose) printf("hostuuid: using %s\n", prison0.pr_hostuuid); } /* * struct jail_args { * struct jail *jail; * }; */ int sys_jail(struct thread *td, struct jail_args *uap) { uint32_t version; int error; struct jail j; error = copyin(uap->jail, &version, sizeof(uint32_t)); if (error) return (error); switch (version) { case 0: { struct jail_v0 j0; /* FreeBSD single IPv4 jails. */ bzero(&j, sizeof(struct jail)); error = copyin(uap->jail, &j0, sizeof(struct jail_v0)); if (error) return (error); j.version = j0.version; j.path = j0.path; j.hostname = j0.hostname; j.ip4s = htonl(j0.ip_number); /* jail_v0 is host order */ break; } case 1: /* * Version 1 was used by multi-IPv4 jail implementations * that never made it into the official kernel. */ return (EINVAL); case 2: /* JAIL_API_VERSION */ /* FreeBSD multi-IPv4/IPv6,noIP jails. */ error = copyin(uap->jail, &j, sizeof(struct jail)); if (error) return (error); break; default: /* Sci-Fi jails are not supported, sorry. */ return (EINVAL); } return (kern_jail(td, &j)); } int kern_jail(struct thread *td, struct jail *j) { struct iovec optiov[2 * (4 + nitems(pr_flag_allow) #ifdef INET + 1 #endif #ifdef INET6 + 1 #endif )]; struct uio opt; char *u_path, *u_hostname, *u_name; struct bool_flags *bf; #ifdef INET uint32_t ip4s; struct in_addr *u_ip4; #endif #ifdef INET6 struct in6_addr *u_ip6; #endif size_t tmplen; int error, enforce_statfs; bzero(&optiov, sizeof(optiov)); opt.uio_iov = optiov; opt.uio_iovcnt = 0; opt.uio_offset = -1; opt.uio_resid = -1; opt.uio_segflg = UIO_SYSSPACE; opt.uio_rw = UIO_READ; opt.uio_td = td; /* Set permissions for top-level jails from sysctls. */ if (!jailed(td->td_ucred)) { for (bf = pr_flag_allow; bf < pr_flag_allow + nitems(pr_flag_allow) && atomic_load_int(&bf->flag) != 0; bf++) { optiov[opt.uio_iovcnt].iov_base = __DECONST(char *, (jail_default_allow & bf->flag) ? bf->name : bf->noname); optiov[opt.uio_iovcnt].iov_len = strlen(optiov[opt.uio_iovcnt].iov_base) + 1; opt.uio_iovcnt += 2; } optiov[opt.uio_iovcnt].iov_base = "enforce_statfs"; optiov[opt.uio_iovcnt].iov_len = sizeof("enforce_statfs"); opt.uio_iovcnt++; enforce_statfs = jail_default_enforce_statfs; optiov[opt.uio_iovcnt].iov_base = &enforce_statfs; optiov[opt.uio_iovcnt].iov_len = sizeof(enforce_statfs); opt.uio_iovcnt++; } tmplen = MAXPATHLEN + MAXHOSTNAMELEN + MAXHOSTNAMELEN; #ifdef INET ip4s = (j->version == 0) ? 1 : j->ip4s; if (ip4s > jail_max_af_ips) return (EINVAL); tmplen += ip4s * sizeof(struct in_addr); #else if (j->ip4s > 0) return (EINVAL); #endif #ifdef INET6 if (j->ip6s > jail_max_af_ips) return (EINVAL); tmplen += j->ip6s * sizeof(struct in6_addr); #else if (j->ip6s > 0) return (EINVAL); #endif u_path = malloc(tmplen, M_TEMP, M_WAITOK); u_hostname = u_path + MAXPATHLEN; u_name = u_hostname + MAXHOSTNAMELEN; #ifdef INET u_ip4 = (struct in_addr *)(u_name + MAXHOSTNAMELEN); #endif #ifdef INET6 #ifdef INET u_ip6 = (struct in6_addr *)(u_ip4 + ip4s); #else u_ip6 = (struct in6_addr *)(u_name + MAXHOSTNAMELEN); #endif #endif optiov[opt.uio_iovcnt].iov_base = "path"; optiov[opt.uio_iovcnt].iov_len = sizeof("path"); opt.uio_iovcnt++; optiov[opt.uio_iovcnt].iov_base = u_path; error = copyinstr(j->path, u_path, MAXPATHLEN, &optiov[opt.uio_iovcnt].iov_len); if (error) { free(u_path, M_TEMP); return (error); } opt.uio_iovcnt++; optiov[opt.uio_iovcnt].iov_base = "host.hostname"; optiov[opt.uio_iovcnt].iov_len = sizeof("host.hostname"); opt.uio_iovcnt++; optiov[opt.uio_iovcnt].iov_base = u_hostname; error = copyinstr(j->hostname, u_hostname, MAXHOSTNAMELEN, &optiov[opt.uio_iovcnt].iov_len); if (error) { free(u_path, M_TEMP); return (error); } opt.uio_iovcnt++; if (j->jailname != NULL) { optiov[opt.uio_iovcnt].iov_base = "name"; optiov[opt.uio_iovcnt].iov_len = sizeof("name"); opt.uio_iovcnt++; optiov[opt.uio_iovcnt].iov_base = u_name; error = copyinstr(j->jailname, u_name, MAXHOSTNAMELEN, &optiov[opt.uio_iovcnt].iov_len); if (error) { free(u_path, M_TEMP); return (error); } opt.uio_iovcnt++; } #ifdef INET optiov[opt.uio_iovcnt].iov_base = "ip4.addr"; optiov[opt.uio_iovcnt].iov_len = sizeof("ip4.addr"); opt.uio_iovcnt++; optiov[opt.uio_iovcnt].iov_base = u_ip4; optiov[opt.uio_iovcnt].iov_len = ip4s * sizeof(struct in_addr); if (j->version == 0) u_ip4->s_addr = j->ip4s; else { error = copyin(j->ip4, u_ip4, optiov[opt.uio_iovcnt].iov_len); if (error) { free(u_path, M_TEMP); return (error); } } opt.uio_iovcnt++; #endif #ifdef INET6 optiov[opt.uio_iovcnt].iov_base = "ip6.addr"; optiov[opt.uio_iovcnt].iov_len = sizeof("ip6.addr"); opt.uio_iovcnt++; optiov[opt.uio_iovcnt].iov_base = u_ip6; optiov[opt.uio_iovcnt].iov_len = j->ip6s * sizeof(struct in6_addr); error = copyin(j->ip6, u_ip6, optiov[opt.uio_iovcnt].iov_len); if (error) { free(u_path, M_TEMP); return (error); } opt.uio_iovcnt++; #endif KASSERT(opt.uio_iovcnt <= nitems(optiov), ("kern_jail: too many iovecs (%d)", opt.uio_iovcnt)); error = kern_jail_set(td, &opt, JAIL_CREATE | JAIL_ATTACH); free(u_path, M_TEMP); return (error); } /* * struct jail_set_args { * struct iovec *iovp; * unsigned int iovcnt; * int flags; * }; */ int sys_jail_set(struct thread *td, struct jail_set_args *uap) { struct uio *auio; int error; /* Check that we have an even number of iovecs. */ if (uap->iovcnt & 1) return (EINVAL); error = copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_jail_set(td, auio, uap->flags); free(auio, M_IOV); return (error); } #if defined(INET) || defined(INET6) typedef int prison_addr_cmp_t(const void *, const void *); typedef bool prison_addr_valid_t(const void *); static const struct pr_family { size_t size; prison_addr_cmp_t *cmp; prison_addr_valid_t *valid; int ip_flag; } pr_families[PR_FAMILY_MAX] = { #ifdef INET [PR_INET] = { .size = sizeof(struct in_addr), .cmp = prison_qcmp_v4, .valid = prison_valid_v4, .ip_flag = PR_IP4_USER, }, #endif #ifdef INET6 [PR_INET6] = { .size = sizeof(struct in6_addr), .cmp = prison_qcmp_v6, .valid = prison_valid_v6, .ip_flag = PR_IP6_USER, }, #endif }; /* * Network address lists (pr_addrs) allocation for jails. The addresses * are accessed locklessly by the network stack, thus need to be protected by * the network epoch. */ struct prison_ip { struct epoch_context ctx; uint32_t ips; #ifdef FUTURE_C /* * XXX Variable-length automatic arrays in union may be * supported in future C. */ union { char pr_ip[]; struct in_addr pr_ip4[]; struct in6_addr pr_ip6[]; }; #else /* No future C :( */ char pr_ip[]; #endif }; static char * PR_IP(struct prison_ip *pip, const pr_family_t af, int idx) { MPASS(pip); MPASS(af < PR_FAMILY_MAX); MPASS(idx >= 0 && idx < pip->ips); return (pip->pr_ip + pr_families[af].size * idx); } static struct prison_ip * prison_ip_alloc(const pr_family_t af, uint32_t cnt, int flags) { struct prison_ip *pip; pip = malloc(sizeof(struct prison_ip) + cnt * pr_families[af].size, M_PRISON, flags); if (pip != NULL) pip->ips = cnt; return (pip); } /* * Allocate and copyin user supplied address list, sorting and validating. * kern_jail_set() helper. */ static struct prison_ip * prison_ip_copyin(const pr_family_t af, void *op, uint32_t cnt) { prison_addr_cmp_t *const cmp = pr_families[af].cmp; const size_t size = pr_families[af].size; struct prison_ip *pip; pip = prison_ip_alloc(af, cnt, M_WAITOK); bcopy(op, pip->pr_ip, cnt * size); /* * IP addresses are all sorted but ip[0] to preserve * the primary IP address as given from userland. * This special IP is used for unbound outgoing * connections as well for "loopback" traffic in case * source address selection cannot find any more fitting * address to connect from. */ if (cnt > 1) qsort(PR_IP(pip, af, 1), cnt - 1, size, cmp); /* * Check for duplicate addresses and do some simple * zero and broadcast checks. If users give other bogus * addresses it is their problem. */ for (int i = 0; i < cnt; i++) { if (!pr_families[af].valid(PR_IP(pip, af, i))) { free(pip, M_PRISON); return (NULL); } if (i + 1 < cnt && (cmp(PR_IP(pip, af, 0), PR_IP(pip, af, i + 1)) == 0 || cmp(PR_IP(pip, af, i), PR_IP(pip, af, i + 1)) == 0)) { free(pip, M_PRISON); return (NULL); } } return (pip); } /* * Allocate and dup parent prison address list. * kern_jail_set() helper. */ static void prison_ip_dup(struct prison *ppr, struct prison *pr, const pr_family_t af) { const struct prison_ip *ppip = ppr->pr_addrs[af]; struct prison_ip *pip; if (ppip != NULL) { pip = prison_ip_alloc(af, ppip->ips, M_WAITOK); bcopy(ppip->pr_ip, pip->pr_ip, pip->ips * pr_families[af].size); pr->pr_addrs[af] = pip; } } /* * Make sure the new set of IP addresses is a subset of the parent's list. * Don't worry about the parent being unlocked, as any setting is done with * allprison_lock held. * kern_jail_set() helper. */ static bool prison_ip_parent_match(struct prison_ip *ppip, struct prison_ip *pip, const pr_family_t af) { prison_addr_cmp_t *const cmp = pr_families[af].cmp; int i, j; if (ppip == NULL) return (false); for (i = 0; i < ppip->ips; i++) if (cmp(PR_IP(pip, af, 0), PR_IP(ppip, af, i)) == 0) break; if (i == ppip->ips) /* Main address not present in parent. */ return (false); if (pip->ips > 1) { for (i = j = 1; i < pip->ips; i++) { if (cmp(PR_IP(pip, af, i), PR_IP(ppip, af, 0)) == 0) /* Equals to parent primary address. */ continue; for (; j < ppip->ips; j++) if (cmp(PR_IP(pip, af, i), PR_IP(ppip, af, j)) == 0) break; if (j == ppip->ips) break; } if (j == ppip->ips) /* Address not present in parent. */ return (false); } return (true); } /* * Check for conflicting IP addresses. We permit them if there is no more * than one IP on each jail. If there is a duplicate on a jail with more * than one IP stop checking and return error. * kern_jail_set() helper. */ static bool prison_ip_conflict_check(const struct prison *ppr, const struct prison *pr, struct prison_ip *pip, pr_family_t af) { const struct prison *tppr, *tpr; int descend; #ifdef VIMAGE for (tppr = ppr; tppr != &prison0; tppr = tppr->pr_parent) if (tppr->pr_flags & PR_VNET) break; #else tppr = &prison0; #endif FOREACH_PRISON_DESCENDANT(tppr, tpr, descend) { if (tpr == pr || #ifdef VIMAGE (tpr != tppr && (tpr->pr_flags & PR_VNET)) || #endif !prison_isalive(tpr)) { descend = 0; continue; } if (!(tpr->pr_flags & pr_families[af].ip_flag)) continue; descend = 0; if (tpr->pr_addrs[af] == NULL || (pip->ips == 1 && tpr->pr_addrs[af]->ips == 1)) continue; for (int i = 0; i < pip->ips; i++) if (prison_ip_check(tpr, af, PR_IP(pip, af, i)) == 0) return (false); } return (true); } _Static_assert(offsetof(struct prison_ip, ctx) == 0, "prison must start with epoch context"); static void prison_ip_free_deferred(epoch_context_t ctx) { free(ctx, M_PRISON); } static void prison_ip_free(struct prison_ip *pip) { if (pip != NULL) NET_EPOCH_CALL(prison_ip_free_deferred, &pip->ctx); } static void prison_ip_set(struct prison *pr, const pr_family_t af, struct prison_ip *new) { struct prison_ip **mem, *old; mtx_assert(&pr->pr_mtx, MA_OWNED); mem = &pr->pr_addrs[af]; old = *mem; atomic_store_ptr(mem, new); prison_ip_free(old); } /* * Restrict a prison's IP address list with its parent's, possibly replacing * it. Return true if succeed, otherwise should redo. * kern_jail_set() helper. */ static bool prison_ip_restrict(struct prison *pr, const pr_family_t af, struct prison_ip **newp) { struct prison_ip *ppip = pr->pr_parent->pr_addrs[af]; struct prison_ip *pip = pr->pr_addrs[af]; int (*const cmp)(const void *, const void *) = pr_families[af].cmp; const size_t size = pr_families[af].size; struct prison_ip *new = newp != NULL ? *newp : NULL; uint32_t ips; mtx_assert(&pr->pr_mtx, MA_OWNED); /* * Due to epoch-synchronized access to the IP address lists we always * allocate a new list even if the old one has enough space. We could * atomically update an IPv4 address inside a list, but that would * screw up sorting, and in case of IPv6 we can't even atomically write * one. */ if (ppip == NULL) { if (pip != NULL) prison_ip_set(pr, af, NULL); return (true); } if (!(pr->pr_flags & pr_families[af].ip_flag)) { if (new == NULL) { new = prison_ip_alloc(af, ppip->ips, M_NOWAIT); if (new == NULL) return (false); /* Redo */ } /* This has no user settings, so just copy the parent's list. */ MPASS(new->ips == ppip->ips); bcopy(ppip->pr_ip, new->pr_ip, ppip->ips * size); prison_ip_set(pr, af, new); if (newp != NULL) *newp = NULL; /* Used */ } else if (pip != NULL) { /* Remove addresses that aren't in the parent. */ int i; i = 0; /* index in pip */ ips = 0; /* index in new */ if (new == NULL) { new = prison_ip_alloc(af, pip->ips, M_NOWAIT); if (new == NULL) return (false); /* Redo */ } for (int pi = 0; pi < ppip->ips; pi++) if (cmp(PR_IP(pip, af, 0), PR_IP(ppip, af, pi)) == 0) { /* Found our primary address in parent. */ bcopy(PR_IP(pip, af, i), PR_IP(new, af, ips), size); i++; ips++; break; } for (int pi = 1; i < pip->ips; ) { /* Check against primary, which is unsorted. */ if (cmp(PR_IP(pip, af, i), PR_IP(ppip, af, 0)) == 0) { /* Matches parent's primary address. */ bcopy(PR_IP(pip, af, i), PR_IP(new, af, ips), size); i++; ips++; continue; } /* The rest are sorted. */ switch (pi >= ppip->ips ? -1 : cmp(PR_IP(pip, af, i), PR_IP(ppip, af, pi))) { case -1: i++; break; case 0: bcopy(PR_IP(pip, af, i), PR_IP(new, af, ips), size); i++; pi++; ips++; break; case 1: pi++; break; } } if (ips == 0) { if (newp == NULL || *newp == NULL) prison_ip_free(new); new = NULL; } else { /* Shrink to real size */ KASSERT((new->ips >= ips), ("Out-of-bounds write to prison_ip %p", new)); new->ips = ips; } prison_ip_set(pr, af, new); if (newp != NULL) *newp = NULL; /* Used */ } return (true); } /* * Fast-path check if an address belongs to a prison. */ int prison_ip_check(const struct prison *pr, const pr_family_t af, const void *addr) { int (*const cmp)(const void *, const void *) = pr_families[af].cmp; struct prison_ip *pip; int i, a, z, d; MPASS(mtx_owned(&pr->pr_mtx) || in_epoch(net_epoch_preempt) || sx_xlocked(&allprison_lock)); pip = atomic_load_ptr(&pr->pr_addrs[af]); if (__predict_false(pip == NULL)) return (EAFNOSUPPORT); /* Check the primary IP. */ if (cmp(PR_IP(pip, af, 0), addr) == 0) return (0); /* * All the other IPs are sorted so we can do a binary search. */ a = 0; z = pip->ips - 2; while (a <= z) { i = (a + z) / 2; d = cmp(PR_IP(pip, af, i + 1), addr); if (d > 0) z = i - 1; else if (d < 0) a = i + 1; else return (0); } return (EADDRNOTAVAIL); } /* * Grab primary IP. Historically required mutex, but nothing prevents * us to support epoch-protected access. Is it used in fast path? * in{6}_jail.c helper */ const void * prison_ip_get0(const struct prison *pr, const pr_family_t af) { const struct prison_ip *pip = pr->pr_addrs[af]; mtx_assert(&pr->pr_mtx, MA_OWNED); MPASS(pip); return (pip->pr_ip); } u_int prison_ip_cnt(const struct prison *pr, const pr_family_t af) { return (pr->pr_addrs[af]->ips); } #endif /* defined(INET) || defined(INET6) */ int kern_jail_set(struct thread *td, struct uio *optuio, int flags) { struct nameidata nd; #ifdef INET struct prison_ip *ip4; #endif #ifdef INET6 struct prison_ip *ip6; #endif struct vfsopt *opt; struct vfsoptlist *opts; struct prison *pr, *deadpr, *inspr, *mypr, *ppr, *tpr; struct vnode *root; char *domain, *errmsg, *host, *name, *namelc, *p, *path, *uuid; char *g_path, *osrelstr; struct bool_flags *bf; struct jailsys_flags *jsf; #if defined(INET) || defined(INET6) void *op; #endif unsigned long hid; size_t namelen, onamelen, pnamelen; int born, created, cuflags, descend, drflags, enforce; int error, errmsg_len, errmsg_pos; int gotchildmax, gotenforce, gothid, gotrsnum, gotslevel; int jid, jsys, len, level; int childmax, osreldt, rsnum, slevel; #ifdef INET int ip4s; bool redo_ip4; #endif #ifdef INET6 int ip6s; bool redo_ip6; #endif uint64_t pr_allow, ch_allow, pr_flags, ch_flags; uint64_t pr_allow_diff; unsigned tallow; char numbuf[12]; error = priv_check(td, PRIV_JAIL_SET); if (!error && (flags & JAIL_ATTACH)) error = priv_check(td, PRIV_JAIL_ATTACH); if (error) return (error); mypr = td->td_ucred->cr_prison; if ((flags & JAIL_CREATE) && mypr->pr_childmax == 0) return (EPERM); if (flags & ~JAIL_SET_MASK) return (EINVAL); /* * Check all the parameters before committing to anything. Not all * errors can be caught early, but we may as well try. Also, this * takes care of some expensive stuff (path lookup) before getting * the allprison lock. * * XXX Jails are not filesystems, and jail parameters are not mount * options. But it makes more sense to re-use the vfsopt code * than duplicate it under a different name. */ error = vfs_buildopts(optuio, &opts); if (error) return (error); #ifdef INET ip4 = NULL; #endif #ifdef INET6 ip6 = NULL; #endif g_path = NULL; cuflags = flags & (JAIL_CREATE | JAIL_UPDATE); if (!cuflags) { error = EINVAL; vfs_opterror(opts, "no valid operation (create or update)"); goto done_errmsg; } error = vfs_copyopt(opts, "jid", &jid, sizeof(jid)); if (error == ENOENT) jid = 0; else if (error != 0) goto done_free; error = vfs_copyopt(opts, "securelevel", &slevel, sizeof(slevel)); if (error == ENOENT) gotslevel = 0; else if (error != 0) goto done_free; else gotslevel = 1; error = vfs_copyopt(opts, "children.max", &childmax, sizeof(childmax)); if (error == ENOENT) gotchildmax = 0; else if (error != 0) goto done_free; else gotchildmax = 1; error = vfs_copyopt(opts, "enforce_statfs", &enforce, sizeof(enforce)); if (error == ENOENT) gotenforce = 0; else if (error != 0) goto done_free; else if (enforce < 0 || enforce > 2) { error = EINVAL; goto done_free; } else gotenforce = 1; error = vfs_copyopt(opts, "devfs_ruleset", &rsnum, sizeof(rsnum)); if (error == ENOENT) gotrsnum = 0; else if (error != 0) goto done_free; else gotrsnum = 1; pr_flags = ch_flags = 0; for (bf = pr_flag_bool; bf < pr_flag_bool + nitems(pr_flag_bool); bf++) { vfs_flagopt(opts, bf->name, &pr_flags, bf->flag); vfs_flagopt(opts, bf->noname, &ch_flags, bf->flag); } ch_flags |= pr_flags; for (jsf = pr_flag_jailsys; jsf < pr_flag_jailsys + nitems(pr_flag_jailsys); jsf++) { error = vfs_copyopt(opts, jsf->name, &jsys, sizeof(jsys)); if (error == ENOENT) continue; if (error != 0) goto done_free; switch (jsys) { case JAIL_SYS_DISABLE: if (!jsf->disable) { error = EINVAL; goto done_free; } pr_flags |= jsf->disable; break; case JAIL_SYS_NEW: pr_flags |= jsf->new; break; case JAIL_SYS_INHERIT: break; default: error = EINVAL; goto done_free; } ch_flags |= jsf->new | jsf->disable; } if ((flags & (JAIL_CREATE | JAIL_ATTACH)) == JAIL_CREATE && !(pr_flags & PR_PERSIST)) { error = EINVAL; vfs_opterror(opts, "new jail must persist or attach"); goto done_errmsg; } #ifdef VIMAGE if ((flags & JAIL_UPDATE) && (ch_flags & PR_VNET)) { error = EINVAL; vfs_opterror(opts, "vnet cannot be changed after creation"); goto done_errmsg; } #endif #ifdef INET if ((flags & JAIL_UPDATE) && (ch_flags & PR_IP4_USER)) { error = EINVAL; vfs_opterror(opts, "ip4 cannot be changed after creation"); goto done_errmsg; } #endif #ifdef INET6 if ((flags & JAIL_UPDATE) && (ch_flags & PR_IP6_USER)) { error = EINVAL; vfs_opterror(opts, "ip6 cannot be changed after creation"); goto done_errmsg; } #endif pr_allow = ch_allow = 0; for (bf = pr_flag_allow; bf < pr_flag_allow + nitems(pr_flag_allow) && atomic_load_int(&bf->flag) != 0; bf++) { vfs_flagopt(opts, bf->name, &pr_allow, bf->flag); vfs_flagopt(opts, bf->noname, &ch_allow, bf->flag); } ch_allow |= pr_allow; error = vfs_getopt(opts, "name", (void **)&name, &len); if (error == ENOENT) name = NULL; else if (error != 0) goto done_free; else { if (len == 0 || name[len - 1] != '\0') { error = EINVAL; goto done_free; } if (len > MAXHOSTNAMELEN) { error = ENAMETOOLONG; goto done_free; } } error = vfs_getopt(opts, "host.hostname", (void **)&host, &len); if (error == ENOENT) host = NULL; else if (error != 0) goto done_free; else { ch_flags |= PR_HOST; pr_flags |= PR_HOST; if (len == 0 || host[len - 1] != '\0') { error = EINVAL; goto done_free; } if (len > MAXHOSTNAMELEN) { error = ENAMETOOLONG; goto done_free; } } error = vfs_getopt(opts, "host.domainname", (void **)&domain, &len); if (error == ENOENT) domain = NULL; else if (error != 0) goto done_free; else { ch_flags |= PR_HOST; pr_flags |= PR_HOST; if (len == 0 || domain[len - 1] != '\0') { error = EINVAL; goto done_free; } if (len > MAXHOSTNAMELEN) { error = ENAMETOOLONG; goto done_free; } } error = vfs_getopt(opts, "host.hostuuid", (void **)&uuid, &len); if (error == ENOENT) uuid = NULL; else if (error != 0) goto done_free; else { ch_flags |= PR_HOST; pr_flags |= PR_HOST; if (len == 0 || uuid[len - 1] != '\0') { error = EINVAL; goto done_free; } if (len > HOSTUUIDLEN) { error = ENAMETOOLONG; goto done_free; } } #ifdef COMPAT_FREEBSD32 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { uint32_t hid32; error = vfs_copyopt(opts, "host.hostid", &hid32, sizeof(hid32)); hid = hid32; } else #endif error = vfs_copyopt(opts, "host.hostid", &hid, sizeof(hid)); if (error == ENOENT) gothid = 0; else if (error != 0) goto done_free; else { gothid = 1; ch_flags |= PR_HOST; pr_flags |= PR_HOST; } #ifdef INET error = vfs_getopt(opts, "ip4.addr", &op, &ip4s); if (error == ENOENT) ip4s = 0; else if (error != 0) goto done_free; else if (ip4s & (sizeof(struct in_addr) - 1)) { error = EINVAL; goto done_free; } else { ch_flags |= PR_IP4_USER; pr_flags |= PR_IP4_USER; if (ip4s > 0) { ip4s /= sizeof(struct in_addr); if (ip4s > jail_max_af_ips) { error = EINVAL; vfs_opterror(opts, "too many IPv4 addresses"); goto done_errmsg; } ip4 = prison_ip_copyin(PR_INET, op, ip4s); if (ip4 == NULL) { error = EINVAL; goto done_free; } } } #endif #ifdef INET6 error = vfs_getopt(opts, "ip6.addr", &op, &ip6s); if (error == ENOENT) ip6s = 0; else if (error != 0) goto done_free; else if (ip6s & (sizeof(struct in6_addr) - 1)) { error = EINVAL; goto done_free; } else { ch_flags |= PR_IP6_USER; pr_flags |= PR_IP6_USER; if (ip6s > 0) { ip6s /= sizeof(struct in6_addr); if (ip6s > jail_max_af_ips) { error = EINVAL; vfs_opterror(opts, "too many IPv6 addresses"); goto done_errmsg; } ip6 = prison_ip_copyin(PR_INET6, op, ip6s); if (ip6 == NULL) { error = EINVAL; goto done_free; } } } #endif #if defined(VIMAGE) && (defined(INET) || defined(INET6)) if ((ch_flags & PR_VNET) && (ch_flags & (PR_IP4_USER | PR_IP6_USER))) { error = EINVAL; vfs_opterror(opts, "vnet jails cannot have IP address restrictions"); goto done_errmsg; } #endif error = vfs_getopt(opts, "osrelease", (void **)&osrelstr, &len); if (error == ENOENT) osrelstr = NULL; else if (error != 0) goto done_free; else { if (flags & JAIL_UPDATE) { error = EINVAL; vfs_opterror(opts, "osrelease cannot be changed after creation"); goto done_errmsg; } if (len == 0 || osrelstr[len - 1] != '\0') { error = EINVAL; goto done_free; } if (len >= OSRELEASELEN) { error = ENAMETOOLONG; vfs_opterror(opts, "osrelease string must be 1-%d bytes long", OSRELEASELEN - 1); goto done_errmsg; } } error = vfs_copyopt(opts, "osreldate", &osreldt, sizeof(osreldt)); if (error == ENOENT) osreldt = 0; else if (error != 0) goto done_free; else { if (flags & JAIL_UPDATE) { error = EINVAL; vfs_opterror(opts, "osreldate cannot be changed after creation"); goto done_errmsg; } if (osreldt == 0) { error = EINVAL; vfs_opterror(opts, "osreldate cannot be 0"); goto done_errmsg; } } root = NULL; error = vfs_getopt(opts, "path", (void **)&path, &len); if (error == ENOENT) path = NULL; else if (error != 0) goto done_free; else { if (flags & JAIL_UPDATE) { error = EINVAL; vfs_opterror(opts, "path cannot be changed after creation"); goto done_errmsg; } if (len == 0 || path[len - 1] != '\0') { error = EINVAL; goto done_free; } NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, path); error = namei(&nd); if (error) goto done_free; root = nd.ni_vp; NDFREE_PNBUF(&nd); g_path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); strlcpy(g_path, path, MAXPATHLEN); error = vn_path_to_global_path(td, root, g_path, MAXPATHLEN); if (error == 0) { path = g_path; } else { /* exit on other errors */ goto done_free; } if (root->v_type != VDIR) { error = ENOTDIR; vput(root); goto done_free; } VOP_UNLOCK(root); } /* * Find the specified jail, or at least its parent. * This abuses the file error codes ENOENT and EEXIST. */ pr = NULL; inspr = NULL; if (cuflags == JAIL_CREATE && jid == 0 && name != NULL) { namelc = strrchr(name, '.'); jid = strtoul(namelc != NULL ? namelc + 1 : name, &p, 10); if (*p != '\0') jid = 0; } sx_xlock(&allprison_lock); drflags = PD_LIST_XLOCKED; ppr = mypr; if (!prison_isalive(ppr)) { /* This jail is dying. This process will surely follow. */ error = EAGAIN; goto done_deref; } if (jid != 0) { if (jid < 0) { error = EINVAL; vfs_opterror(opts, "negative jid"); goto done_deref; } /* * See if a requested jid already exists. Keep track of * where it can be inserted later. */ TAILQ_FOREACH(inspr, &allprison, pr_list) { if (inspr->pr_id < jid) continue; if (inspr->pr_id > jid) break; pr = inspr; mtx_lock(&pr->pr_mtx); drflags |= PD_LOCKED; inspr = NULL; break; } if (pr != NULL) { /* Create: jid must not exist. */ if (cuflags == JAIL_CREATE) { /* * Even creators that cannot see the jail will * get EEXIST. */ error = EEXIST; vfs_opterror(opts, "jail %d already exists", jid); goto done_deref; } if (!prison_ischild(mypr, pr)) { /* * Updaters get ENOENT if they cannot see the * jail. This is true even for CREATE | UPDATE, * which normally cannot give this error. */ error = ENOENT; vfs_opterror(opts, "jail %d not found", jid); goto done_deref; } ppr = pr->pr_parent; if (!prison_isalive(ppr)) { error = ENOENT; vfs_opterror(opts, "jail %d is dying", ppr->pr_id); goto done_deref; } if (!prison_isalive(pr)) { if (!(flags & JAIL_DYING)) { error = ENOENT; vfs_opterror(opts, "jail %d is dying", jid); goto done_deref; } if ((flags & JAIL_ATTACH) || (pr_flags & PR_PERSIST)) { /* * A dying jail might be resurrected * (via attach or persist), but first * it must determine if another jail * has claimed its name. Accomplish * this by implicitly re-setting the * name. */ if (name == NULL) name = prison_name(mypr, pr); } } } else { /* Update: jid must exist. */ if (cuflags == JAIL_UPDATE) { error = ENOENT; vfs_opterror(opts, "jail %d not found", jid); goto done_deref; } } } /* * If the caller provided a name, look for a jail by that name. * This has different semantics for creates and updates keyed by jid * (where the name must not already exist in a different jail), * and updates keyed by the name itself (where the name must exist * because that is the jail being updated). */ namelc = NULL; if (name != NULL) { namelc = strrchr(name, '.'); if (namelc == NULL) namelc = name; else { /* * This is a hierarchical name. Split it into the * parent and child names, and make sure the parent * exists or matches an already found jail. */ if (pr != NULL) { if (strncmp(name, ppr->pr_name, namelc - name) || ppr->pr_name[namelc - name] != '\0') { error = EINVAL; vfs_opterror(opts, "cannot change jail's parent"); goto done_deref; } } else { *namelc = '\0'; ppr = prison_find_name(mypr, name); if (ppr == NULL) { error = ENOENT; vfs_opterror(opts, "jail \"%s\" not found", name); goto done_deref; } mtx_unlock(&ppr->pr_mtx); if (!prison_isalive(ppr)) { error = ENOENT; vfs_opterror(opts, "jail \"%s\" is dying", name); goto done_deref; } *namelc = '.'; } namelc++; } if (namelc[0] != '\0') { pnamelen = (ppr == &prison0) ? 0 : strlen(ppr->pr_name) + 1; deadpr = NULL; FOREACH_PRISON_CHILD(ppr, tpr) { if (tpr != pr && !strcmp(tpr->pr_name + pnamelen, namelc)) { if (prison_isalive(tpr)) { if (pr == NULL && cuflags != JAIL_CREATE) { /* * Use this jail * for updates. */ pr = tpr; mtx_lock(&pr->pr_mtx); drflags |= PD_LOCKED; break; } /* * Create, or update(jid): * name must not exist in an * active sibling jail. */ error = EEXIST; vfs_opterror(opts, "jail \"%s\" already exists", name); goto done_deref; } if (pr == NULL && cuflags != JAIL_CREATE) { deadpr = tpr; } } } /* If no active jail is found, use a dying one. */ if (deadpr != NULL && pr == NULL) { if (flags & JAIL_DYING) { pr = deadpr; mtx_lock(&pr->pr_mtx); drflags |= PD_LOCKED; } else if (cuflags == JAIL_UPDATE) { error = ENOENT; vfs_opterror(opts, "jail \"%s\" is dying", name); goto done_deref; } } /* Update: name must exist if no jid. */ else if (cuflags == JAIL_UPDATE && pr == NULL) { error = ENOENT; vfs_opterror(opts, "jail \"%s\" not found", name); goto done_deref; } } } /* Update: must provide a jid or name. */ else if (cuflags == JAIL_UPDATE && pr == NULL) { error = ENOENT; vfs_opterror(opts, "update specified no jail"); goto done_deref; } /* If there's no prison to update, create a new one and link it in. */ created = pr == NULL; if (created) { for (tpr = mypr; tpr != NULL; tpr = tpr->pr_parent) if (tpr->pr_childcount >= tpr->pr_childmax) { error = EPERM; vfs_opterror(opts, "prison limit exceeded"); goto done_deref; } if (jid == 0 && (jid = get_next_prid(&inspr)) == 0) { error = EAGAIN; vfs_opterror(opts, "no available jail IDs"); goto done_deref; } pr = malloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO); pr->pr_state = PRISON_STATE_INVALID; refcount_init(&pr->pr_ref, 1); refcount_init(&pr->pr_uref, 0); drflags |= PD_DEREF; LIST_INIT(&pr->pr_children); mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF | MTX_DUPOK); TASK_INIT(&pr->pr_task, 0, prison_complete, pr); pr->pr_id = jid; if (inspr != NULL) TAILQ_INSERT_BEFORE(inspr, pr, pr_list); else TAILQ_INSERT_TAIL(&allprison, pr, pr_list); pr->pr_parent = ppr; prison_hold(ppr); prison_proc_hold(ppr); LIST_INSERT_HEAD(&ppr->pr_children, pr, pr_sibling); for (tpr = ppr; tpr != NULL; tpr = tpr->pr_parent) tpr->pr_childcount++; /* Set some default values, and inherit some from the parent. */ if (namelc == NULL) namelc = ""; if (path == NULL) { path = "/"; root = mypr->pr_root; vref(root); } strlcpy(pr->pr_hostuuid, DEFAULT_HOSTUUID, HOSTUUIDLEN); pr->pr_flags |= PR_HOST; #if defined(INET) || defined(INET6) #ifdef VIMAGE if (!(pr_flags & PR_VNET)) #endif { #ifdef INET if (!(ch_flags & PR_IP4_USER)) pr->pr_flags |= PR_IP4 | PR_IP4_USER; else if (!(pr_flags & PR_IP4_USER)) { pr->pr_flags |= ppr->pr_flags & PR_IP4; prison_ip_dup(ppr, pr, PR_INET); } #endif #ifdef INET6 if (!(ch_flags & PR_IP6_USER)) pr->pr_flags |= PR_IP6 | PR_IP6_USER; else if (!(pr_flags & PR_IP6_USER)) { pr->pr_flags |= ppr->pr_flags & PR_IP6; prison_ip_dup(ppr, pr, PR_INET6); } #endif } #endif /* Source address selection is always on by default. */ pr->pr_flags |= _PR_IP_SADDRSEL; pr->pr_securelevel = ppr->pr_securelevel; pr->pr_allow = JAIL_DEFAULT_ALLOW & ppr->pr_allow; pr->pr_enforce_statfs = jail_default_enforce_statfs; pr->pr_devfs_rsnum = ppr->pr_devfs_rsnum; pr->pr_osreldate = osreldt ? osreldt : ppr->pr_osreldate; if (osrelstr == NULL) strlcpy(pr->pr_osrelease, ppr->pr_osrelease, sizeof(pr->pr_osrelease)); else strlcpy(pr->pr_osrelease, osrelstr, sizeof(pr->pr_osrelease)); #ifdef VIMAGE /* Allocate a new vnet if specified. */ pr->pr_vnet = (pr_flags & PR_VNET) ? vnet_alloc() : ppr->pr_vnet; #endif /* * Allocate a dedicated cpuset for each jail. * Unlike other initial settings, this may return an error. */ error = cpuset_create_root(ppr, &pr->pr_cpuset); if (error) goto done_deref; mtx_lock(&pr->pr_mtx); drflags |= PD_LOCKED; } else { /* * Grab a reference for existing prisons, to ensure they * continue to exist for the duration of the call. */ prison_hold(pr); drflags |= PD_DEREF; #if defined(VIMAGE) && (defined(INET) || defined(INET6)) if ((pr->pr_flags & PR_VNET) && (ch_flags & (PR_IP4_USER | PR_IP6_USER))) { error = EINVAL; vfs_opterror(opts, "vnet jails cannot have IP address restrictions"); goto done_deref; } #endif #ifdef INET if (PR_IP4_USER & ch_flags & (pr_flags ^ pr->pr_flags)) { error = EINVAL; vfs_opterror(opts, "ip4 cannot be changed after creation"); goto done_deref; } #endif #ifdef INET6 if (PR_IP6_USER & ch_flags & (pr_flags ^ pr->pr_flags)) { error = EINVAL; vfs_opterror(opts, "ip6 cannot be changed after creation"); goto done_deref; } #endif } /* Do final error checking before setting anything. */ if (gotslevel) { if (slevel < ppr->pr_securelevel) { error = EPERM; goto done_deref; } } if (gotchildmax) { if (childmax >= ppr->pr_childmax) { error = EPERM; goto done_deref; } } if (gotenforce) { if (enforce < ppr->pr_enforce_statfs) { error = EPERM; goto done_deref; } } if (gotrsnum) { /* * devfs_rsnum is a uint16_t */ if (rsnum < 0 || rsnum > 65535) { error = EINVAL; goto done_deref; } /* * Nested jails always inherit parent's devfs ruleset */ if (jailed(td->td_ucred)) { if (rsnum > 0 && rsnum != ppr->pr_devfs_rsnum) { error = EPERM; goto done_deref; } else rsnum = ppr->pr_devfs_rsnum; } } #ifdef INET if (ip4s > 0) { if ((ppr->pr_flags & PR_IP4) && !prison_ip_parent_match(ppr->pr_addrs[PR_INET], ip4, PR_INET)) { error = EPERM; goto done_deref; } if (!prison_ip_conflict_check(ppr, pr, ip4, PR_INET)) { error = EADDRINUSE; vfs_opterror(opts, "IPv4 addresses clash"); goto done_deref; } } #endif #ifdef INET6 if (ip6s > 0) { if ((ppr->pr_flags & PR_IP6) && !prison_ip_parent_match(ppr->pr_addrs[PR_INET6], ip6, PR_INET6)) { error = EPERM; goto done_deref; } if (!prison_ip_conflict_check(ppr, pr, ip6, PR_INET6)) { error = EADDRINUSE; vfs_opterror(opts, "IPv6 addresses clash"); goto done_deref; } } #endif onamelen = namelen = 0; if (namelc != NULL) { /* Give a default name of the jid. Also allow the name to be * explicitly the jid - but not any other number, and only in * normal form (no leading zero/etc). */ if (namelc[0] == '\0') snprintf(namelc = numbuf, sizeof(numbuf), "%d", jid); else if ((strtoul(namelc, &p, 10) != jid || namelc[0] < '1' || namelc[0] > '9') && *p == '\0') { error = EINVAL; vfs_opterror(opts, "name cannot be numeric (unless it is the jid)"); goto done_deref; } /* * Make sure the name isn't too long for the prison or its * children. */ pnamelen = (ppr == &prison0) ? 0 : strlen(ppr->pr_name) + 1; onamelen = strlen(pr->pr_name + pnamelen); namelen = strlen(namelc); if (pnamelen + namelen + 1 > sizeof(pr->pr_name)) { error = ENAMETOOLONG; goto done_deref; } FOREACH_PRISON_DESCENDANT(pr, tpr, descend) { if (strlen(tpr->pr_name) + (namelen - onamelen) >= sizeof(pr->pr_name)) { error = ENAMETOOLONG; goto done_deref; } } } pr_allow_diff = pr_allow & ~ppr->pr_allow; if (pr_allow_diff & ~PR_ALLOW_DIFFERENCES) { error = EPERM; goto done_deref; } /* * Let modules check their parameters. This requires unlocking and * then re-locking the prison, but this is still a valid state as long * as allprison_lock remains xlocked. */ mtx_unlock(&pr->pr_mtx); drflags &= ~PD_LOCKED; error = osd_jail_call(pr, PR_METHOD_CHECK, opts); if (error != 0) goto done_deref; mtx_lock(&pr->pr_mtx); drflags |= PD_LOCKED; /* At this point, all valid parameters should have been noted. */ TAILQ_FOREACH(opt, opts, link) { if (!opt->seen && strcmp(opt->name, "errmsg")) { error = EINVAL; vfs_opterror(opts, "unknown parameter: %s", opt->name); goto done_deref; } } /* Set the parameters of the prison. */ #ifdef INET redo_ip4 = false; if (pr_flags & PR_IP4_USER) { pr->pr_flags |= PR_IP4; prison_ip_set(pr, PR_INET, ip4); ip4 = NULL; FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { #ifdef VIMAGE if (tpr->pr_flags & PR_VNET) { descend = 0; continue; } #endif if (!prison_ip_restrict(tpr, PR_INET, NULL)) { redo_ip4 = true; descend = 0; } } } #endif #ifdef INET6 redo_ip6 = false; if (pr_flags & PR_IP6_USER) { pr->pr_flags |= PR_IP6; prison_ip_set(pr, PR_INET6, ip6); ip6 = NULL; FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { #ifdef VIMAGE if (tpr->pr_flags & PR_VNET) { descend = 0; continue; } #endif if (!prison_ip_restrict(tpr, PR_INET6, NULL)) { redo_ip6 = true; descend = 0; } } } #endif if (gotslevel) { pr->pr_securelevel = slevel; /* Set all child jails to be at least this level. */ FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) if (tpr->pr_securelevel < slevel) tpr->pr_securelevel = slevel; } if (gotchildmax) { pr->pr_childmax = childmax; /* Set all child jails to under this limit. */ FOREACH_PRISON_DESCENDANT_LOCKED_LEVEL(pr, tpr, descend, level) if (tpr->pr_childmax > childmax - level) tpr->pr_childmax = childmax > level ? childmax - level : 0; } if (gotenforce) { pr->pr_enforce_statfs = enforce; /* Pass this restriction on to the children. */ FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) if (tpr->pr_enforce_statfs < enforce) tpr->pr_enforce_statfs = enforce; } if (gotrsnum) { pr->pr_devfs_rsnum = rsnum; /* Pass this restriction on to the children. */ FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) tpr->pr_devfs_rsnum = rsnum; } if (namelc != NULL) { if (ppr == &prison0) strlcpy(pr->pr_name, namelc, sizeof(pr->pr_name)); else snprintf(pr->pr_name, sizeof(pr->pr_name), "%s.%s", ppr->pr_name, namelc); /* Change this component of child names. */ FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { bcopy(tpr->pr_name + onamelen, tpr->pr_name + namelen, strlen(tpr->pr_name + onamelen) + 1); bcopy(pr->pr_name, tpr->pr_name, namelen); } } if (path != NULL) { /* Try to keep a real-rooted full pathname. */ strlcpy(pr->pr_path, path, sizeof(pr->pr_path)); pr->pr_root = root; root = NULL; } if (PR_HOST & ch_flags & ~pr_flags) { if (pr->pr_flags & PR_HOST) { /* * Copy the parent's host info. As with pr_ip4 above, * the lack of a lock on the parent is not a problem; * it is always set with allprison_lock at least * shared, and is held exclusively here. */ strlcpy(pr->pr_hostname, pr->pr_parent->pr_hostname, sizeof(pr->pr_hostname)); strlcpy(pr->pr_domainname, pr->pr_parent->pr_domainname, sizeof(pr->pr_domainname)); strlcpy(pr->pr_hostuuid, pr->pr_parent->pr_hostuuid, sizeof(pr->pr_hostuuid)); pr->pr_hostid = pr->pr_parent->pr_hostid; } } else if (host != NULL || domain != NULL || uuid != NULL || gothid) { /* Set this prison, and any descendants without PR_HOST. */ if (host != NULL) strlcpy(pr->pr_hostname, host, sizeof(pr->pr_hostname)); if (domain != NULL) strlcpy(pr->pr_domainname, domain, sizeof(pr->pr_domainname)); if (uuid != NULL) strlcpy(pr->pr_hostuuid, uuid, sizeof(pr->pr_hostuuid)); if (gothid) pr->pr_hostid = hid; FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { if (tpr->pr_flags & PR_HOST) descend = 0; else { if (host != NULL) strlcpy(tpr->pr_hostname, pr->pr_hostname, sizeof(tpr->pr_hostname)); if (domain != NULL) strlcpy(tpr->pr_domainname, pr->pr_domainname, sizeof(tpr->pr_domainname)); if (uuid != NULL) strlcpy(tpr->pr_hostuuid, pr->pr_hostuuid, sizeof(tpr->pr_hostuuid)); if (gothid) tpr->pr_hostid = hid; } } } pr->pr_allow = (pr->pr_allow & ~ch_allow) | pr_allow; if ((tallow = ch_allow & ~pr_allow)) prison_set_allow_locked(pr, tallow, 0); /* * Persistent prisons get an extra reference, and prisons losing their * persist flag lose that reference. */ born = !prison_isalive(pr); if (ch_flags & PR_PERSIST & (pr_flags ^ pr->pr_flags)) { if (pr_flags & PR_PERSIST) { prison_hold(pr); /* * This may make a dead prison alive again, but wait * to label it as such until after OSD calls have had * a chance to run (and perhaps to fail). */ refcount_acquire(&pr->pr_uref); } else { drflags |= PD_DEUREF; prison_free_not_last(pr); } } pr->pr_flags = (pr->pr_flags & ~ch_flags) | pr_flags; mtx_unlock(&pr->pr_mtx); drflags &= ~PD_LOCKED; /* * Any errors past this point will need to de-persist newly created * prisons, as well as call remove methods. */ if (born) drflags |= PD_KILL; #ifdef RACCT if (racct_enable && created) prison_racct_attach(pr); #endif /* Locks may have prevented a complete restriction of child IP * addresses. If so, allocate some more memory and try again. */ #ifdef INET while (redo_ip4) { ip4s = pr->pr_addrs[PR_INET]->ips; MPASS(ip4 == NULL); ip4 = prison_ip_alloc(PR_INET, ip4s, M_WAITOK); mtx_lock(&pr->pr_mtx); redo_ip4 = false; FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { #ifdef VIMAGE if (tpr->pr_flags & PR_VNET) { descend = 0; continue; } #endif if (!prison_ip_restrict(tpr, PR_INET, &ip4)) redo_ip4 = true; } mtx_unlock(&pr->pr_mtx); } #endif #ifdef INET6 while (redo_ip6) { ip6s = pr->pr_addrs[PR_INET6]->ips; MPASS(ip6 == NULL); ip6 = prison_ip_alloc(PR_INET6, ip6s, M_WAITOK); mtx_lock(&pr->pr_mtx); redo_ip6 = false; FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { #ifdef VIMAGE if (tpr->pr_flags & PR_VNET) { descend = 0; continue; } #endif if (!prison_ip_restrict(tpr, PR_INET6, &ip6)) redo_ip6 = true; } mtx_unlock(&pr->pr_mtx); } #endif /* Let the modules do their work. */ if (born) { error = osd_jail_call(pr, PR_METHOD_CREATE, opts); if (error) goto done_deref; } error = osd_jail_call(pr, PR_METHOD_SET, opts); if (error) goto done_deref; /* * A new prison is now ready to be seen; either it has gained a user * reference via persistence, or is about to gain one via attachment. */ if (born) { drflags = prison_lock_xlock(pr, drflags); pr->pr_state = PRISON_STATE_ALIVE; } /* Attach this process to the prison if requested. */ if (flags & JAIL_ATTACH) { error = do_jail_attach(td, pr, prison_lock_xlock(pr, drflags & PD_LOCK_FLAGS)); drflags &= ~(PD_LOCKED | PD_LIST_XLOCKED); if (error) { vfs_opterror(opts, "attach failed"); goto done_deref; } } #ifdef RACCT if (racct_enable && !created) { if (drflags & PD_LOCKED) { mtx_unlock(&pr->pr_mtx); drflags &= ~PD_LOCKED; } if (drflags & PD_LIST_XLOCKED) { sx_xunlock(&allprison_lock); drflags &= ~PD_LIST_XLOCKED; } prison_racct_modify(pr); } #endif if (born && pr != &prison0 && (pr->pr_allow & PR_ALLOW_NFSD) != 0 && (pr->pr_root->v_vflag & VV_ROOT) == 0) printf("Warning jail jid=%d: mountd/nfsd requires a separate" " file system\n", pr->pr_id); drflags &= ~PD_KILL; td->td_retval[0] = pr->pr_id; done_deref: /* Release any temporary prison holds and/or locks. */ if (pr != NULL) prison_deref(pr, drflags); else if (drflags & PD_LIST_SLOCKED) sx_sunlock(&allprison_lock); else if (drflags & PD_LIST_XLOCKED) sx_xunlock(&allprison_lock); if (root != NULL) vrele(root); done_errmsg: if (error) { /* Write the error message back to userspace. */ if (vfs_getopt(opts, "errmsg", (void **)&errmsg, &errmsg_len) == 0 && errmsg_len > 0) { errmsg_pos = 2 * vfs_getopt_pos(opts, "errmsg") + 1; if (optuio->uio_segflg == UIO_SYSSPACE) bcopy(errmsg, optuio->uio_iov[errmsg_pos].iov_base, errmsg_len); else copyout(errmsg, optuio->uio_iov[errmsg_pos].iov_base, errmsg_len); } } done_free: #ifdef INET prison_ip_free(ip4); #endif #ifdef INET6 prison_ip_free(ip6); #endif if (g_path != NULL) free(g_path, M_TEMP); vfs_freeopts(opts); return (error); } /* * Find the next available prison ID. Return the ID on success, or zero * on failure. Also set a pointer to the allprison list entry the prison * should be inserted before. */ static int get_next_prid(struct prison **insprp) { struct prison *inspr; int jid, maxid; jid = lastprid % JAIL_MAX + 1; if (TAILQ_EMPTY(&allprison) || TAILQ_LAST(&allprison, prisonlist)->pr_id < jid) { /* * A common case is for all jails to be implicitly numbered, * which means they'll go on the end of the list, at least * for the first JAIL_MAX times. */ inspr = NULL; } else { /* * Take two passes through the allprison list: first starting * with the proposed jid, then ending with it. */ for (maxid = JAIL_MAX; maxid != 0; ) { TAILQ_FOREACH(inspr, &allprison, pr_list) { if (inspr->pr_id < jid) continue; if (inspr->pr_id > jid) { /* Found an opening. */ maxid = 0; break; } if (++jid > maxid) { if (lastprid == maxid || lastprid == 0) { /* * The entire legal range * has been traversed */ return 0; } /* Try again from the start. */ jid = 1; maxid = lastprid; break; } } if (inspr == NULL) { /* Found room at the end of the list. */ break; } } } *insprp = inspr; lastprid = jid; return (jid); } /* * struct jail_get_args { * struct iovec *iovp; * unsigned int iovcnt; * int flags; * }; */ int sys_jail_get(struct thread *td, struct jail_get_args *uap) { struct uio *auio; int error; /* Check that we have an even number of iovecs. */ if (uap->iovcnt & 1) return (EINVAL); error = copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_jail_get(td, auio, uap->flags); if (error == 0) error = copyout(auio->uio_iov, uap->iovp, uap->iovcnt * sizeof (struct iovec)); free(auio, M_IOV); return (error); } int kern_jail_get(struct thread *td, struct uio *optuio, int flags) { struct bool_flags *bf; struct jailsys_flags *jsf; struct prison *pr, *mypr; struct vfsopt *opt; struct vfsoptlist *opts; char *errmsg, *name; int drflags, error, errmsg_len, errmsg_pos, i, jid, len, pos; unsigned f; if (flags & ~JAIL_GET_MASK) return (EINVAL); /* Get the parameter list. */ error = vfs_buildopts(optuio, &opts); if (error) return (error); errmsg_pos = vfs_getopt_pos(opts, "errmsg"); mypr = td->td_ucred->cr_prison; pr = NULL; /* * Find the prison specified by one of: lastjid, jid, name. */ sx_slock(&allprison_lock); drflags = PD_LIST_SLOCKED; error = vfs_copyopt(opts, "lastjid", &jid, sizeof(jid)); if (error == 0) { TAILQ_FOREACH(pr, &allprison, pr_list) { if (pr->pr_id > jid && ((flags & JAIL_DYING) || prison_isalive(pr)) && prison_ischild(mypr, pr)) { mtx_lock(&pr->pr_mtx); drflags |= PD_LOCKED; goto found_prison; } } error = ENOENT; vfs_opterror(opts, "no jail after %d", jid); goto done; } else if (error != ENOENT) goto done; error = vfs_copyopt(opts, "jid", &jid, sizeof(jid)); if (error == 0) { if (jid != 0) { pr = prison_find_child(mypr, jid); if (pr != NULL) { drflags |= PD_LOCKED; if (!(prison_isalive(pr) || (flags & JAIL_DYING))) { error = ENOENT; vfs_opterror(opts, "jail %d is dying", jid); goto done; } goto found_prison; } error = ENOENT; vfs_opterror(opts, "jail %d not found", jid); goto done; } } else if (error != ENOENT) goto done; error = vfs_getopt(opts, "name", (void **)&name, &len); if (error == 0) { if (len == 0 || name[len - 1] != '\0') { error = EINVAL; goto done; } pr = prison_find_name(mypr, name); if (pr != NULL) { drflags |= PD_LOCKED; if (!(prison_isalive(pr) || (flags & JAIL_DYING))) { error = ENOENT; vfs_opterror(opts, "jail \"%s\" is dying", name); goto done; } goto found_prison; } error = ENOENT; vfs_opterror(opts, "jail \"%s\" not found", name); goto done; } else if (error != ENOENT) goto done; vfs_opterror(opts, "no jail specified"); error = ENOENT; goto done; found_prison: /* Get the parameters of the prison. */ prison_hold(pr); drflags |= PD_DEREF; td->td_retval[0] = pr->pr_id; error = vfs_setopt(opts, "jid", &pr->pr_id, sizeof(pr->pr_id)); if (error != 0 && error != ENOENT) goto done; i = (pr->pr_parent == mypr) ? 0 : pr->pr_parent->pr_id; error = vfs_setopt(opts, "parent", &i, sizeof(i)); if (error != 0 && error != ENOENT) goto done; error = vfs_setopts(opts, "name", prison_name(mypr, pr)); if (error != 0 && error != ENOENT) goto done; error = vfs_setopt(opts, "cpuset.id", &pr->pr_cpuset->cs_id, sizeof(pr->pr_cpuset->cs_id)); if (error != 0 && error != ENOENT) goto done; error = vfs_setopts(opts, "path", prison_path(mypr, pr)); if (error != 0 && error != ENOENT) goto done; #ifdef INET error = vfs_setopt_part(opts, "ip4.addr", pr->pr_addrs[PR_INET]->pr_ip, pr->pr_addrs[PR_INET] ? pr->pr_addrs[PR_INET]->ips * pr_families[PR_INET].size : 0 ); if (error != 0 && error != ENOENT) goto done; #endif #ifdef INET6 error = vfs_setopt_part(opts, "ip6.addr", pr->pr_addrs[PR_INET6]->pr_ip, pr->pr_addrs[PR_INET6] ? pr->pr_addrs[PR_INET6]->ips * pr_families[PR_INET6].size : 0 ); if (error != 0 && error != ENOENT) goto done; #endif error = vfs_setopt(opts, "securelevel", &pr->pr_securelevel, sizeof(pr->pr_securelevel)); if (error != 0 && error != ENOENT) goto done; error = vfs_setopt(opts, "children.cur", &pr->pr_childcount, sizeof(pr->pr_childcount)); if (error != 0 && error != ENOENT) goto done; error = vfs_setopt(opts, "children.max", &pr->pr_childmax, sizeof(pr->pr_childmax)); if (error != 0 && error != ENOENT) goto done; error = vfs_setopts(opts, "host.hostname", pr->pr_hostname); if (error != 0 && error != ENOENT) goto done; error = vfs_setopts(opts, "host.domainname", pr->pr_domainname); if (error != 0 && error != ENOENT) goto done; error = vfs_setopts(opts, "host.hostuuid", pr->pr_hostuuid); if (error != 0 && error != ENOENT) goto done; #ifdef COMPAT_FREEBSD32 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { uint32_t hid32 = pr->pr_hostid; error = vfs_setopt(opts, "host.hostid", &hid32, sizeof(hid32)); } else #endif error = vfs_setopt(opts, "host.hostid", &pr->pr_hostid, sizeof(pr->pr_hostid)); if (error != 0 && error != ENOENT) goto done; error = vfs_setopt(opts, "enforce_statfs", &pr->pr_enforce_statfs, sizeof(pr->pr_enforce_statfs)); if (error != 0 && error != ENOENT) goto done; error = vfs_setopt(opts, "devfs_ruleset", &pr->pr_devfs_rsnum, sizeof(pr->pr_devfs_rsnum)); if (error != 0 && error != ENOENT) goto done; for (bf = pr_flag_bool; bf < pr_flag_bool + nitems(pr_flag_bool); bf++) { i = (pr->pr_flags & bf->flag) ? 1 : 0; error = vfs_setopt(opts, bf->name, &i, sizeof(i)); if (error != 0 && error != ENOENT) goto done; i = !i; error = vfs_setopt(opts, bf->noname, &i, sizeof(i)); if (error != 0 && error != ENOENT) goto done; } for (jsf = pr_flag_jailsys; jsf < pr_flag_jailsys + nitems(pr_flag_jailsys); jsf++) { f = pr->pr_flags & (jsf->disable | jsf->new); i = (f != 0 && f == jsf->disable) ? JAIL_SYS_DISABLE : (f == jsf->new) ? JAIL_SYS_NEW : JAIL_SYS_INHERIT; error = vfs_setopt(opts, jsf->name, &i, sizeof(i)); if (error != 0 && error != ENOENT) goto done; } for (bf = pr_flag_allow; bf < pr_flag_allow + nitems(pr_flag_allow) && atomic_load_int(&bf->flag) != 0; bf++) { i = (pr->pr_allow & bf->flag) ? 1 : 0; error = vfs_setopt(opts, bf->name, &i, sizeof(i)); if (error != 0 && error != ENOENT) goto done; i = !i; error = vfs_setopt(opts, bf->noname, &i, sizeof(i)); if (error != 0 && error != ENOENT) goto done; } i = !prison_isalive(pr); error = vfs_setopt(opts, "dying", &i, sizeof(i)); if (error != 0 && error != ENOENT) goto done; i = !i; error = vfs_setopt(opts, "nodying", &i, sizeof(i)); if (error != 0 && error != ENOENT) goto done; error = vfs_setopt(opts, "osreldate", &pr->pr_osreldate, sizeof(pr->pr_osreldate)); if (error != 0 && error != ENOENT) goto done; error = vfs_setopts(opts, "osrelease", pr->pr_osrelease); if (error != 0 && error != ENOENT) goto done; /* Get the module parameters. */ mtx_unlock(&pr->pr_mtx); drflags &= ~PD_LOCKED; error = osd_jail_call(pr, PR_METHOD_GET, opts); if (error) goto done; prison_deref(pr, drflags); pr = NULL; drflags = 0; /* By now, all parameters should have been noted. */ TAILQ_FOREACH(opt, opts, link) { if (!opt->seen && strcmp(opt->name, "errmsg")) { error = EINVAL; vfs_opterror(opts, "unknown parameter: %s", opt->name); goto done; } } /* Write the fetched parameters back to userspace. */ error = 0; TAILQ_FOREACH(opt, opts, link) { if (opt->pos >= 0 && opt->pos != errmsg_pos) { pos = 2 * opt->pos + 1; optuio->uio_iov[pos].iov_len = opt->len; if (opt->value != NULL) { if (optuio->uio_segflg == UIO_SYSSPACE) { bcopy(opt->value, optuio->uio_iov[pos].iov_base, opt->len); } else { error = copyout(opt->value, optuio->uio_iov[pos].iov_base, opt->len); if (error) break; } } } } done: /* Release any temporary prison holds and/or locks. */ if (pr != NULL) prison_deref(pr, drflags); else if (drflags & PD_LIST_SLOCKED) sx_sunlock(&allprison_lock); if (error && errmsg_pos >= 0) { /* Write the error message back to userspace. */ vfs_getopt(opts, "errmsg", (void **)&errmsg, &errmsg_len); errmsg_pos = 2 * errmsg_pos + 1; if (errmsg_len > 0) { if (optuio->uio_segflg == UIO_SYSSPACE) bcopy(errmsg, optuio->uio_iov[errmsg_pos].iov_base, errmsg_len); else copyout(errmsg, optuio->uio_iov[errmsg_pos].iov_base, errmsg_len); } } vfs_freeopts(opts); return (error); } /* * struct jail_remove_args { * int jid; * }; */ int sys_jail_remove(struct thread *td, struct jail_remove_args *uap) { struct prison *pr; int error; error = priv_check(td, PRIV_JAIL_REMOVE); if (error) return (error); sx_xlock(&allprison_lock); pr = prison_find_child(td->td_ucred->cr_prison, uap->jid); if (pr == NULL) { sx_xunlock(&allprison_lock); return (EINVAL); } if (!prison_isalive(pr)) { /* Silently ignore already-dying prisons. */ mtx_unlock(&pr->pr_mtx); sx_xunlock(&allprison_lock); return (0); } prison_deref(pr, PD_KILL | PD_LOCKED | PD_LIST_XLOCKED); return (0); } /* * struct jail_attach_args { * int jid; * }; */ int sys_jail_attach(struct thread *td, struct jail_attach_args *uap) { struct prison *pr; int error; error = priv_check(td, PRIV_JAIL_ATTACH); if (error) return (error); sx_slock(&allprison_lock); pr = prison_find_child(td->td_ucred->cr_prison, uap->jid); if (pr == NULL) { sx_sunlock(&allprison_lock); return (EINVAL); } /* Do not allow a process to attach to a prison that is not alive. */ if (!prison_isalive(pr)) { mtx_unlock(&pr->pr_mtx); sx_sunlock(&allprison_lock); return (EINVAL); } return (do_jail_attach(td, pr, PD_LOCKED | PD_LIST_SLOCKED)); } static int do_jail_attach(struct thread *td, struct prison *pr, int drflags) { struct proc *p; struct ucred *newcred, *oldcred; int error; mtx_assert(&pr->pr_mtx, MA_OWNED); sx_assert(&allprison_lock, SX_LOCKED); drflags &= PD_LOCK_FLAGS; /* * XXX: Note that there is a slight race here if two threads * in the same privileged process attempt to attach to two * different jails at the same time. It is important for * user processes not to do this, or they might end up with * a process root from one prison, but attached to the jail * of another. */ prison_hold(pr); refcount_acquire(&pr->pr_uref); drflags |= PD_DEREF | PD_DEUREF; mtx_unlock(&pr->pr_mtx); drflags &= ~PD_LOCKED; /* Let modules do whatever they need to prepare for attaching. */ error = osd_jail_call(pr, PR_METHOD_ATTACH, td); if (error) { prison_deref(pr, drflags); return (error); } sx_unlock(&allprison_lock); drflags &= ~(PD_LIST_SLOCKED | PD_LIST_XLOCKED); /* * Reparent the newly attached process to this jail. */ p = td->td_proc; error = cpuset_setproc_update_set(p, pr->pr_cpuset); if (error) goto e_revert_osd; vn_lock(pr->pr_root, LK_EXCLUSIVE | LK_RETRY); if ((error = change_dir(pr->pr_root, td)) != 0) goto e_unlock; #ifdef MAC if ((error = mac_vnode_check_chroot(td->td_ucred, pr->pr_root))) goto e_unlock; #endif VOP_UNLOCK(pr->pr_root); if ((error = pwd_chroot_chdir(td, pr->pr_root))) goto e_revert_osd; newcred = crget(); PROC_LOCK(p); oldcred = crcopysafe(p, newcred); newcred->cr_prison = pr; proc_set_cred(p, newcred); setsugid(p); #ifdef RACCT racct_proc_ucred_changed(p, oldcred, newcred); crhold(newcred); #endif PROC_UNLOCK(p); #ifdef RCTL rctl_proc_ucred_changed(p, newcred); crfree(newcred); #endif prison_proc_relink(oldcred->cr_prison, pr, p); prison_deref(oldcred->cr_prison, drflags); crfree(oldcred); /* * If the prison was killed while changing credentials, die along * with it. */ if (!prison_isalive(pr)) { PROC_LOCK(p); kern_psignal(p, SIGKILL); PROC_UNLOCK(p); } return (0); e_unlock: VOP_UNLOCK(pr->pr_root); e_revert_osd: /* Tell modules this thread is still in its old jail after all. */ sx_slock(&allprison_lock); drflags |= PD_LIST_SLOCKED; (void)osd_jail_call(td->td_ucred->cr_prison, PR_METHOD_ATTACH, td); prison_deref(pr, drflags); return (error); } /* * Returns a locked prison instance, or NULL on failure. */ struct prison * prison_find(int prid) { struct prison *pr; sx_assert(&allprison_lock, SX_LOCKED); TAILQ_FOREACH(pr, &allprison, pr_list) { if (pr->pr_id < prid) continue; if (pr->pr_id > prid) break; KASSERT(prison_isvalid(pr), ("Found invalid prison %p", pr)); mtx_lock(&pr->pr_mtx); return (pr); } return (NULL); } /* * Find a prison that is a descendant of mypr. Returns a locked prison or NULL. */ struct prison * prison_find_child(struct prison *mypr, int prid) { struct prison *pr; int descend; sx_assert(&allprison_lock, SX_LOCKED); FOREACH_PRISON_DESCENDANT(mypr, pr, descend) { if (pr->pr_id == prid) { KASSERT(prison_isvalid(pr), ("Found invalid prison %p", pr)); mtx_lock(&pr->pr_mtx); return (pr); } } return (NULL); } /* * Look for the name relative to mypr. Returns a locked prison or NULL. */ struct prison * prison_find_name(struct prison *mypr, const char *name) { struct prison *pr, *deadpr; size_t mylen; int descend; sx_assert(&allprison_lock, SX_LOCKED); mylen = (mypr == &prison0) ? 0 : strlen(mypr->pr_name) + 1; deadpr = NULL; FOREACH_PRISON_DESCENDANT(mypr, pr, descend) { if (!strcmp(pr->pr_name + mylen, name)) { KASSERT(prison_isvalid(pr), ("Found invalid prison %p", pr)); if (prison_isalive(pr)) { mtx_lock(&pr->pr_mtx); return (pr); } deadpr = pr; } } /* There was no valid prison - perhaps there was a dying one. */ if (deadpr != NULL) mtx_lock(&deadpr->pr_mtx); return (deadpr); } /* * See if a prison has the specific flag set. The prison should be locked, * unless checking for flags that are only set at jail creation (such as * PR_IP4 and PR_IP6), or only the single bit is examined, without regard * to any other prison data. */ bool prison_flag(struct ucred *cred, unsigned flag) { return ((cred->cr_prison->pr_flags & flag) != 0); } /* * See if a prison has the specific allow flag set. * The prison *should* be locked, or only a single bit is examined, without * regard to any other prison data. */ bool prison_allow(struct ucred *cred, unsigned flag) { return ((cred->cr_prison->pr_allow & flag) != 0); } /* * Hold a prison reference, by incrementing pr_ref. It is generally * an error to hold a prison that does not already have a reference. * A prison record will remain valid as long as it has at least one * reference, and will not be removed as long as either the prison * mutex or the allprison lock is held (allprison_lock may be shared). */ void prison_hold_locked(struct prison *pr) { /* Locking is no longer required. */ prison_hold(pr); } void prison_hold(struct prison *pr) { #ifdef INVARIANTS int was_valid = refcount_acquire_if_not_zero(&pr->pr_ref); KASSERT(was_valid, ("Trying to hold dead prison %p (jid=%d).", pr, pr->pr_id)); #else refcount_acquire(&pr->pr_ref); #endif } /* * Remove a prison reference. If that was the last reference, the * prison will be removed (at a later time). */ void prison_free_locked(struct prison *pr) { mtx_assert(&pr->pr_mtx, MA_OWNED); /* * Locking is no longer required, but unlock because the caller * expects it. */ mtx_unlock(&pr->pr_mtx); prison_free(pr); } void prison_free(struct prison *pr) { KASSERT(refcount_load(&pr->pr_ref) > 0, ("Trying to free dead prison %p (jid=%d).", pr, pr->pr_id)); if (!refcount_release_if_not_last(&pr->pr_ref)) { /* * Don't remove the last reference in this context, * in case there are locks held. */ taskqueue_enqueue(taskqueue_thread, &pr->pr_task); } } static void prison_free_not_last(struct prison *pr) { #ifdef INVARIANTS int lastref; KASSERT(refcount_load(&pr->pr_ref) > 0, ("Trying to free dead prison %p (jid=%d).", pr, pr->pr_id)); lastref = refcount_release(&pr->pr_ref); KASSERT(!lastref, ("prison_free_not_last freed last ref on prison %p (jid=%d).", pr, pr->pr_id)); #else refcount_release(&pr->pr_ref); #endif } /* * Hold a prison for user visibility, by incrementing pr_uref. * It is generally an error to hold a prison that isn't already * user-visible, except through the jail system calls. It is also * an error to hold an invalid prison. A prison record will remain * alive as long as it has at least one user reference, and will not * be set to the dying state until the prison mutex and allprison_lock * are both freed. */ void prison_proc_hold(struct prison *pr) { #ifdef INVARIANTS int was_alive = refcount_acquire_if_not_zero(&pr->pr_uref); KASSERT(was_alive, ("Cannot add a process to a non-alive prison (jid=%d)", pr->pr_id)); #else refcount_acquire(&pr->pr_uref); #endif } /* * Remove a prison user reference. If it was the last reference, the * prison will be considered "dying", and may be removed once all of * its references are dropped. */ void prison_proc_free(struct prison *pr) { /* * Locking is only required when releasing the last reference. * This allows assurance that a locked prison will remain alive * until it is unlocked. */ KASSERT(refcount_load(&pr->pr_uref) > 0, ("Trying to kill a process in a dead prison (jid=%d)", pr->pr_id)); if (!refcount_release_if_not_last(&pr->pr_uref)) { /* * Don't remove the last user reference in this context, * which is expected to be a process that is not only locked, * but also half dead. Add a reference so any calls to * prison_free() won't re-submit the task. */ prison_hold(pr); mtx_lock(&pr->pr_mtx); KASSERT(!(pr->pr_flags & PR_COMPLETE_PROC), ("Redundant last reference in prison_proc_free (jid=%d)", pr->pr_id)); pr->pr_flags |= PR_COMPLETE_PROC; mtx_unlock(&pr->pr_mtx); taskqueue_enqueue(taskqueue_thread, &pr->pr_task); } } static void prison_proc_free_not_last(struct prison *pr) { #ifdef INVARIANTS int lastref; KASSERT(refcount_load(&pr->pr_uref) > 0, ("Trying to free dead prison %p (jid=%d).", pr, pr->pr_id)); lastref = refcount_release(&pr->pr_uref); KASSERT(!lastref, ("prison_proc_free_not_last freed last uref on prison %p (jid=%d).", pr, pr->pr_id)); #else refcount_release(&pr->pr_uref); #endif } void prison_proc_link(struct prison *pr, struct proc *p) { sx_assert(&allproc_lock, SA_XLOCKED); LIST_INSERT_HEAD(&pr->pr_proclist, p, p_jaillist); } void prison_proc_unlink(struct prison *pr, struct proc *p) { sx_assert(&allproc_lock, SA_XLOCKED); LIST_REMOVE(p, p_jaillist); } static void prison_proc_relink(struct prison *opr, struct prison *npr, struct proc *p) { sx_xlock(&allproc_lock); prison_proc_unlink(opr, p); prison_proc_link(npr, p); sx_xunlock(&allproc_lock); } /* * Complete a call to either prison_free or prison_proc_free. */ static void prison_complete(void *context, int pending) { struct prison *pr = context; int drflags; /* * This could be called to release the last reference, or the last * user reference (plus the reference held in prison_proc_free). */ drflags = prison_lock_xlock(pr, PD_DEREF); if (pr->pr_flags & PR_COMPLETE_PROC) { pr->pr_flags &= ~PR_COMPLETE_PROC; drflags |= PD_DEUREF; } prison_deref(pr, drflags); } static void prison_kill_processes_cb(struct proc *p, void *arg __unused) { kern_psignal(p, SIGKILL); } /* * Note the iteration does not guarantee acting on all processes. * Most notably there may be fork or jail_attach in progress. */ void prison_proc_iterate(struct prison *pr, void (*cb)(struct proc *, void *), void *cbarg) { struct prison *ppr; struct proc *p; if (atomic_load_int(&pr->pr_childcount) == 0) { sx_slock(&allproc_lock); LIST_FOREACH(p, &pr->pr_proclist, p_jaillist) { if (p->p_state == PRS_NEW) continue; PROC_LOCK(p); cb(p, cbarg); PROC_UNLOCK(p); } sx_sunlock(&allproc_lock); if (atomic_load_int(&pr->pr_childcount) == 0) return; /* * Some jails popped up during the iteration, fall through to a * system-wide search. */ } sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); if (p->p_state != PRS_NEW && p->p_ucred != NULL) { for (ppr = p->p_ucred->cr_prison; ppr != &prison0; ppr = ppr->pr_parent) { if (ppr == pr) { cb(p, cbarg); break; } } } PROC_UNLOCK(p); } sx_sunlock(&allproc_lock); } /* * Remove a prison reference and/or user reference (usually). * This assumes context that allows sleeping (for allprison_lock), * with no non-sleeping locks held, except perhaps the prison itself. * If there are no more references, release and delist the prison. * On completion, the prison lock and the allprison lock are both * unlocked. */ static void prison_deref(struct prison *pr, int flags) { struct prisonlist freeprison; struct prison *killpr, *rpr, *ppr, *tpr; killpr = NULL; TAILQ_INIT(&freeprison); /* * Release this prison as requested, which may cause its parent * to be released, and then maybe its grandparent, etc. */ for (;;) { if (flags & PD_KILL) { /* Kill the prison and its descendents. */ KASSERT(pr != &prison0, ("prison_deref trying to kill prison0")); if (!(flags & PD_DEREF)) { prison_hold(pr); flags |= PD_DEREF; } flags = prison_lock_xlock(pr, flags); prison_deref_kill(pr, &freeprison); } if (flags & PD_DEUREF) { /* Drop a user reference. */ KASSERT(refcount_load(&pr->pr_uref) > 0, ("prison_deref PD_DEUREF on a dead prison (jid=%d)", pr->pr_id)); if (!refcount_release_if_not_last(&pr->pr_uref)) { if (!(flags & PD_DEREF)) { prison_hold(pr); flags |= PD_DEREF; } flags = prison_lock_xlock(pr, flags); if (refcount_release(&pr->pr_uref) && pr->pr_state == PRISON_STATE_ALIVE) { /* * When the last user references goes, * this becomes a dying prison. */ KASSERT( refcount_load(&prison0.pr_uref) > 0, ("prison0 pr_uref=0")); pr->pr_state = PRISON_STATE_DYING; mtx_unlock(&pr->pr_mtx); flags &= ~PD_LOCKED; prison_cleanup(pr); } } } if (flags & PD_KILL) { /* * Any remaining user references are probably processes * that need to be killed, either in this prison or its * descendants. */ if (refcount_load(&pr->pr_uref) > 0) killpr = pr; /* Make sure the parent prison doesn't get killed. */ flags &= ~PD_KILL; } if (flags & PD_DEREF) { /* Drop a reference. */ KASSERT(refcount_load(&pr->pr_ref) > 0, ("prison_deref PD_DEREF on a dead prison (jid=%d)", pr->pr_id)); if (!refcount_release_if_not_last(&pr->pr_ref)) { flags = prison_lock_xlock(pr, flags); if (refcount_release(&pr->pr_ref)) { /* * When the last reference goes, * unlink the prison and set it aside. */ KASSERT( refcount_load(&pr->pr_uref) == 0, ("prison_deref: last ref, " "but still has %d urefs (jid=%d)", pr->pr_uref, pr->pr_id)); KASSERT( refcount_load(&prison0.pr_ref) != 0, ("prison0 pr_ref=0")); pr->pr_state = PRISON_STATE_INVALID; TAILQ_REMOVE(&allprison, pr, pr_list); LIST_REMOVE(pr, pr_sibling); TAILQ_INSERT_TAIL(&freeprison, pr, pr_list); for (ppr = pr->pr_parent; ppr != NULL; ppr = ppr->pr_parent) ppr->pr_childcount--; /* * Removing a prison frees references * from its parent. */ mtx_unlock(&pr->pr_mtx); flags &= ~PD_LOCKED; pr = pr->pr_parent; flags |= PD_DEREF | PD_DEUREF; continue; } } } break; } /* Release all the prison locks. */ if (flags & PD_LOCKED) mtx_unlock(&pr->pr_mtx); if (flags & PD_LIST_SLOCKED) sx_sunlock(&allprison_lock); else if (flags & PD_LIST_XLOCKED) sx_xunlock(&allprison_lock); /* Kill any processes attached to a killed prison. */ if (killpr != NULL) prison_proc_iterate(killpr, prison_kill_processes_cb, NULL); /* * Finish removing any unreferenced prisons, which couldn't happen * while allprison_lock was held (to avoid a LOR on vrele). */ TAILQ_FOREACH_SAFE(rpr, &freeprison, pr_list, tpr) { #ifdef VIMAGE if (rpr->pr_vnet != rpr->pr_parent->pr_vnet) vnet_destroy(rpr->pr_vnet); #endif if (rpr->pr_root != NULL) vrele(rpr->pr_root); mtx_destroy(&rpr->pr_mtx); #ifdef INET prison_ip_free(rpr->pr_addrs[PR_INET]); #endif #ifdef INET6 prison_ip_free(rpr->pr_addrs[PR_INET6]); #endif if (rpr->pr_cpuset != NULL) cpuset_rel(rpr->pr_cpuset); osd_jail_exit(rpr); #ifdef RACCT if (racct_enable) prison_racct_detach(rpr); #endif TAILQ_REMOVE(&freeprison, rpr, pr_list); free(rpr, M_PRISON); } } /* * Kill the prison and its descendants. Mark them as dying, clear the * persist flag, and call module remove methods. */ static void prison_deref_kill(struct prison *pr, struct prisonlist *freeprison) { struct prison *cpr, *ppr, *rpr; bool descend; /* * Unlike the descendants, the target prison can be killed * even if it is currently dying. This is useful for failed * creation in jail_set(2). */ KASSERT(refcount_load(&pr->pr_ref) > 0, ("Trying to kill dead prison %p (jid=%d).", pr, pr->pr_id)); refcount_acquire(&pr->pr_uref); pr->pr_state = PRISON_STATE_DYING; mtx_unlock(&pr->pr_mtx); rpr = NULL; FOREACH_PRISON_DESCENDANT_PRE_POST(pr, cpr, descend) { if (descend) { if (!prison_isalive(cpr)) { descend = false; continue; } prison_hold(cpr); prison_proc_hold(cpr); mtx_lock(&cpr->pr_mtx); cpr->pr_state = PRISON_STATE_DYING; cpr->pr_flags |= PR_REMOVE; mtx_unlock(&cpr->pr_mtx); continue; } if (!(cpr->pr_flags & PR_REMOVE)) continue; prison_cleanup(cpr); mtx_lock(&cpr->pr_mtx); cpr->pr_flags &= ~PR_REMOVE; if (cpr->pr_flags & PR_PERSIST) { cpr->pr_flags &= ~PR_PERSIST; prison_proc_free_not_last(cpr); prison_free_not_last(cpr); } (void)refcount_release(&cpr->pr_uref); if (refcount_release(&cpr->pr_ref)) { /* * When the last reference goes, unlink the prison * and set it aside for prison_deref() to handle. * Delay unlinking the sibling list to keep the loop * safe. */ if (rpr != NULL) LIST_REMOVE(rpr, pr_sibling); rpr = cpr; rpr->pr_state = PRISON_STATE_INVALID; TAILQ_REMOVE(&allprison, rpr, pr_list); TAILQ_INSERT_TAIL(freeprison, rpr, pr_list); /* * Removing a prison frees references from its parent. */ ppr = rpr->pr_parent; prison_proc_free_not_last(ppr); prison_free_not_last(ppr); for (; ppr != NULL; ppr = ppr->pr_parent) ppr->pr_childcount--; } mtx_unlock(&cpr->pr_mtx); } if (rpr != NULL) LIST_REMOVE(rpr, pr_sibling); prison_cleanup(pr); mtx_lock(&pr->pr_mtx); if (pr->pr_flags & PR_PERSIST) { pr->pr_flags &= ~PR_PERSIST; prison_proc_free_not_last(pr); prison_free_not_last(pr); } (void)refcount_release(&pr->pr_uref); } /* * Given the current locking state in the flags, make sure allprison_lock * is held exclusive, and the prison is locked. Return flags indicating * the new state. */ static int prison_lock_xlock(struct prison *pr, int flags) { if (!(flags & PD_LIST_XLOCKED)) { /* * Get allprison_lock, which may be an upgrade, * and may require unlocking the prison. */ if (flags & PD_LOCKED) { mtx_unlock(&pr->pr_mtx); flags &= ~PD_LOCKED; } if (flags & PD_LIST_SLOCKED) { if (!sx_try_upgrade(&allprison_lock)) { sx_sunlock(&allprison_lock); sx_xlock(&allprison_lock); } flags &= ~PD_LIST_SLOCKED; } else sx_xlock(&allprison_lock); flags |= PD_LIST_XLOCKED; } if (!(flags & PD_LOCKED)) { /* Lock the prison mutex. */ mtx_lock(&pr->pr_mtx); flags |= PD_LOCKED; } return flags; } /* * Release a prison's resources when it starts dying (when the last user * reference is dropped, or when it is killed). */ static void prison_cleanup(struct prison *pr) { sx_assert(&allprison_lock, SA_XLOCKED); mtx_assert(&pr->pr_mtx, MA_NOTOWNED); vfs_exjail_delete(pr); shm_remove_prison(pr); (void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL); } /* * Set or clear a permission bit in the pr_allow field, passing restrictions * (cleared permission) down to child jails. */ void prison_set_allow(struct ucred *cred, unsigned flag, int enable) { struct prison *pr; pr = cred->cr_prison; sx_slock(&allprison_lock); mtx_lock(&pr->pr_mtx); prison_set_allow_locked(pr, flag, enable); mtx_unlock(&pr->pr_mtx); sx_sunlock(&allprison_lock); } static void prison_set_allow_locked(struct prison *pr, unsigned flag, int enable) { struct prison *cpr; int descend; if (enable != 0) pr->pr_allow |= flag; else { pr->pr_allow &= ~flag; FOREACH_PRISON_DESCENDANT_LOCKED(pr, cpr, descend) cpr->pr_allow &= ~flag; } } /* * Check if a jail supports the given address family. * * Returns 0 if not jailed or the address family is supported, EAFNOSUPPORT * if not. */ int prison_check_af(struct ucred *cred, int af) { struct prison *pr; int error; KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); pr = cred->cr_prison; #ifdef VIMAGE /* Prisons with their own network stack are not limited. */ if (prison_owns_vnet(cred)) return (0); #endif error = 0; switch (af) { #ifdef INET case AF_INET: if (pr->pr_flags & PR_IP4) { mtx_lock(&pr->pr_mtx); if ((pr->pr_flags & PR_IP4) && pr->pr_addrs[PR_INET] == NULL) error = EAFNOSUPPORT; mtx_unlock(&pr->pr_mtx); } break; #endif #ifdef INET6 case AF_INET6: if (pr->pr_flags & PR_IP6) { mtx_lock(&pr->pr_mtx); if ((pr->pr_flags & PR_IP6) && pr->pr_addrs[PR_INET6] == NULL) error = EAFNOSUPPORT; mtx_unlock(&pr->pr_mtx); } break; #endif case AF_LOCAL: case AF_ROUTE: case AF_NETLINK: break; default: if (!(pr->pr_allow & PR_ALLOW_SOCKET_AF)) error = EAFNOSUPPORT; } return (error); } /* * Check if given address belongs to the jail referenced by cred (wrapper to * prison_check_ip[46]). * * Returns 0 if jail doesn't restrict the address family or if address belongs * to jail, EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if * the jail doesn't allow the address family. IPv4 Address passed in in NBO. */ int prison_if(struct ucred *cred, const struct sockaddr *sa) { #ifdef INET const struct sockaddr_in *sai; #endif #ifdef INET6 const struct sockaddr_in6 *sai6; #endif int error; KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); KASSERT(sa != NULL, ("%s: sa is NULL", __func__)); #ifdef VIMAGE if (prison_owns_vnet(cred)) return (0); #endif error = 0; switch (sa->sa_family) { #ifdef INET case AF_INET: sai = (const struct sockaddr_in *)sa; error = prison_check_ip4(cred, &sai->sin_addr); break; #endif #ifdef INET6 case AF_INET6: sai6 = (const struct sockaddr_in6 *)sa; error = prison_check_ip6(cred, &sai6->sin6_addr); break; #endif default: if (!(cred->cr_prison->pr_allow & PR_ALLOW_SOCKET_AF)) error = EAFNOSUPPORT; } return (error); } /* * Return 0 if jails permit p1 to frob p2, otherwise ESRCH. */ int prison_check(struct ucred *cred1, struct ucred *cred2) { return ((cred1->cr_prison == cred2->cr_prison || prison_ischild(cred1->cr_prison, cred2->cr_prison)) ? 0 : ESRCH); } /* * For mountd/nfsd to run within a prison, it must be: * - A vnet prison. * - PR_ALLOW_NFSD must be set on it. * - The root directory (pr_root) of the prison must be * a file system mount point, so the mountd can hang * export information on it. * - The prison's enforce_statfs cannot be 0, so that * mountd(8) can do exports. */ bool prison_check_nfsd(struct ucred *cred) { if (jailed_without_vnet(cred)) return (false); if (!prison_allow(cred, PR_ALLOW_NFSD)) return (false); if ((cred->cr_prison->pr_root->v_vflag & VV_ROOT) == 0) return (false); if (cred->cr_prison->pr_enforce_statfs == 0) return (false); return (true); } /* * Return true if p2 is a child of p1, otherwise false. */ bool prison_ischild(struct prison *pr1, struct prison *pr2) { for (pr2 = pr2->pr_parent; pr2 != NULL; pr2 = pr2->pr_parent) if (pr1 == pr2) return (true); return (false); } /* * Return true if the prison is currently alive. A prison is alive if it * holds user references and it isn't being removed. */ bool prison_isalive(const struct prison *pr) { if (__predict_false(pr->pr_state != PRISON_STATE_ALIVE)) return (false); return (true); } /* * Return true if the prison is currently valid. A prison is valid if it has * been fully created, and is not being destroyed. Note that dying prisons * are still considered valid. Invalid prisons won't be found under normal * circumstances, as they're only put in that state by functions that have * an exclusive hold on allprison_lock. */ bool prison_isvalid(struct prison *pr) { if (__predict_false(pr->pr_state == PRISON_STATE_INVALID)) return (false); if (__predict_false(refcount_load(&pr->pr_ref) == 0)) return (false); return (true); } /* * Return true if the passed credential is in a jail and that jail does not * have its own virtual network stack, otherwise false. */ bool jailed_without_vnet(struct ucred *cred) { if (!jailed(cred)) return (false); #ifdef VIMAGE if (prison_owns_vnet(cred)) return (false); #endif return (true); } /* * Return the correct hostname (domainname, et al) for the passed credential. */ void getcredhostname(struct ucred *cred, char *buf, size_t size) { struct prison *pr; /* * A NULL credential can be used to shortcut to the physical * system's hostname. */ pr = (cred != NULL) ? cred->cr_prison : &prison0; mtx_lock(&pr->pr_mtx); strlcpy(buf, pr->pr_hostname, size); mtx_unlock(&pr->pr_mtx); } void getcreddomainname(struct ucred *cred, char *buf, size_t size) { mtx_lock(&cred->cr_prison->pr_mtx); strlcpy(buf, cred->cr_prison->pr_domainname, size); mtx_unlock(&cred->cr_prison->pr_mtx); } void getcredhostuuid(struct ucred *cred, char *buf, size_t size) { mtx_lock(&cred->cr_prison->pr_mtx); strlcpy(buf, cred->cr_prison->pr_hostuuid, size); mtx_unlock(&cred->cr_prison->pr_mtx); } void getcredhostid(struct ucred *cred, unsigned long *hostid) { mtx_lock(&cred->cr_prison->pr_mtx); *hostid = cred->cr_prison->pr_hostid; mtx_unlock(&cred->cr_prison->pr_mtx); } void getjailname(struct ucred *cred, char *name, size_t len) { mtx_lock(&cred->cr_prison->pr_mtx); strlcpy(name, cred->cr_prison->pr_name, len); mtx_unlock(&cred->cr_prison->pr_mtx); } #ifdef VIMAGE /* * Determine whether the prison represented by cred owns * its vnet rather than having it inherited. * * Returns true in case the prison owns the vnet, false otherwise. */ bool prison_owns_vnet(struct ucred *cred) { /* * vnets cannot be added/removed after jail creation, * so no need to lock here. */ return ((cred->cr_prison->pr_flags & PR_VNET) != 0); } #endif /* * Determine whether the subject represented by cred can "see" * status of a mount point. * Returns: 0 for permitted, ENOENT otherwise. * XXX: This function should be called cr_canseemount() and should be * placed in kern_prot.c. */ int prison_canseemount(struct ucred *cred, struct mount *mp) { struct prison *pr; struct statfs *sp; size_t len; pr = cred->cr_prison; if (pr->pr_enforce_statfs == 0) return (0); if (pr->pr_root->v_mount == mp) return (0); if (pr->pr_enforce_statfs == 2) return (ENOENT); /* * If jail's chroot directory is set to "/" we should be able to see * all mount-points from inside a jail. * This is ugly check, but this is the only situation when jail's * directory ends with '/'. */ if (strcmp(pr->pr_path, "/") == 0) return (0); len = strlen(pr->pr_path); sp = &mp->mnt_stat; if (strncmp(pr->pr_path, sp->f_mntonname, len) != 0) return (ENOENT); /* * Be sure that we don't have situation where jail's root directory * is "/some/path" and mount point is "/some/pathpath". */ if (sp->f_mntonname[len] != '\0' && sp->f_mntonname[len] != '/') return (ENOENT); return (0); } void prison_enforce_statfs(struct ucred *cred, struct mount *mp, struct statfs *sp) { char jpath[MAXPATHLEN]; struct prison *pr; size_t len; pr = cred->cr_prison; if (pr->pr_enforce_statfs == 0) return; if (prison_canseemount(cred, mp) != 0) { bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); strlcpy(sp->f_mntonname, "[restricted]", sizeof(sp->f_mntonname)); return; } if (pr->pr_root->v_mount == mp) { /* * Clear current buffer data, so we are sure nothing from * the valid path left there. */ bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); *sp->f_mntonname = '/'; return; } /* * If jail's chroot directory is set to "/" we should be able to see * all mount-points from inside a jail. */ if (strcmp(pr->pr_path, "/") == 0) return; len = strlen(pr->pr_path); strlcpy(jpath, sp->f_mntonname + len, sizeof(jpath)); /* * Clear current buffer data, so we are sure nothing from * the valid path left there. */ bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); if (*jpath == '\0') { /* Should never happen. */ *sp->f_mntonname = '/'; } else { strlcpy(sp->f_mntonname, jpath, sizeof(sp->f_mntonname)); } } /* * Check with permission for a specific privilege is granted within jail. We * have a specific list of accepted privileges; the rest are denied. */ int prison_priv_check(struct ucred *cred, int priv) { struct prison *pr; int error; /* * Some policies have custom handlers. This routine should not be * called for them. See priv_check_cred(). */ switch (priv) { case PRIV_VFS_LOOKUP: case PRIV_VFS_GENERATION: KASSERT(0, ("prison_priv_check instead of a custom handler " "called for %d\n", priv)); } if (!jailed(cred)) return (0); #ifdef VIMAGE /* * Privileges specific to prisons with a virtual network stack. * There might be a duplicate entry here in case the privilege * is only granted conditionally in the legacy jail case. */ switch (priv) { /* * NFS-specific privileges. */ case PRIV_NFS_DAEMON: case PRIV_VFS_GETFH: case PRIV_VFS_MOUNT_EXPORTED: if (!prison_check_nfsd(cred)) return (EPERM); #ifdef notyet case PRIV_NFS_LOCKD: #endif /* * Network stack privileges. */ case PRIV_NET_BRIDGE: case PRIV_NET_GRE: case PRIV_NET_BPF: case PRIV_NET_RAW: /* Dup, cond. in legacy jail case. */ case PRIV_NET_ROUTE: case PRIV_NET_TAP: case PRIV_NET_SETIFMTU: case PRIV_NET_SETIFFLAGS: case PRIV_NET_SETIFCAP: case PRIV_NET_SETIFDESCR: case PRIV_NET_SETIFNAME : case PRIV_NET_SETIFMETRIC: case PRIV_NET_SETIFPHYS: case PRIV_NET_SETIFMAC: case PRIV_NET_SETLANPCP: case PRIV_NET_ADDMULTI: case PRIV_NET_DELMULTI: case PRIV_NET_HWIOCTL: case PRIV_NET_SETLLADDR: case PRIV_NET_ADDIFGROUP: case PRIV_NET_DELIFGROUP: case PRIV_NET_IFCREATE: case PRIV_NET_IFDESTROY: case PRIV_NET_ADDIFADDR: case PRIV_NET_DELIFADDR: case PRIV_NET_LAGG: case PRIV_NET_GIF: case PRIV_NET_SETIFVNET: case PRIV_NET_SETIFFIB: case PRIV_NET_OVPN: case PRIV_NET_ME: case PRIV_NET_WG: /* * 802.11-related privileges. */ case PRIV_NET80211_VAP_GETKEY: case PRIV_NET80211_VAP_MANAGE: #ifdef notyet /* * ATM privileges. */ case PRIV_NETATM_CFG: case PRIV_NETATM_ADD: case PRIV_NETATM_DEL: case PRIV_NETATM_SET: /* * Bluetooth privileges. */ case PRIV_NETBLUETOOTH_RAW: #endif /* * Netgraph and netgraph module privileges. */ case PRIV_NETGRAPH_CONTROL: #ifdef notyet case PRIV_NETGRAPH_TTY: #endif /* * IPv4 and IPv6 privileges. */ case PRIV_NETINET_IPFW: case PRIV_NETINET_DIVERT: case PRIV_NETINET_PF: case PRIV_NETINET_DUMMYNET: case PRIV_NETINET_CARP: case PRIV_NETINET_MROUTE: case PRIV_NETINET_RAW: case PRIV_NETINET_ADDRCTRL6: case PRIV_NETINET_ND6: case PRIV_NETINET_SCOPE6: case PRIV_NETINET_ALIFETIME6: case PRIV_NETINET_IPSEC: case PRIV_NETINET_BINDANY: #ifdef notyet /* * NCP privileges. */ case PRIV_NETNCP: /* * SMB privileges. */ case PRIV_NETSMB: #endif /* * No default: or deny here. * In case of no permit fall through to next switch(). */ if (cred->cr_prison->pr_flags & PR_VNET) return (0); } #endif /* VIMAGE */ switch (priv) { /* * Allow ktrace privileges for root in jail. */ case PRIV_KTRACE: #if 0 /* * Allow jailed processes to configure audit identity and * submit audit records (login, etc). In the future we may * want to further refine the relationship between audit and * jail. */ case PRIV_AUDIT_GETAUDIT: case PRIV_AUDIT_SETAUDIT: case PRIV_AUDIT_SUBMIT: #endif /* * Allow jailed processes to manipulate process UNIX * credentials in any way they see fit. */ case PRIV_CRED_SETUID: case PRIV_CRED_SETEUID: case PRIV_CRED_SETGID: case PRIV_CRED_SETEGID: case PRIV_CRED_SETGROUPS: case PRIV_CRED_SETREUID: case PRIV_CRED_SETREGID: case PRIV_CRED_SETRESUID: case PRIV_CRED_SETRESGID: /* * Jail implements visibility constraints already, so allow * jailed root to override uid/gid-based constraints. */ case PRIV_SEEOTHERGIDS: case PRIV_SEEOTHERUIDS: + case PRIV_SEEJAILPROC: /* * Jail implements inter-process debugging limits already, so * allow jailed root various debugging privileges. */ case PRIV_DEBUG_DIFFCRED: case PRIV_DEBUG_SUGID: case PRIV_DEBUG_UNPRIV: /* * Allow jail to set various resource limits and login * properties, and for now, exceed process resource limits. */ case PRIV_PROC_LIMIT: case PRIV_PROC_SETLOGIN: case PRIV_PROC_SETRLIMIT: /* * System V and POSIX IPC privileges are granted in jail. */ case PRIV_IPC_READ: case PRIV_IPC_WRITE: case PRIV_IPC_ADMIN: case PRIV_IPC_MSGSIZE: case PRIV_MQ_ADMIN: /* * Jail operations within a jail work on child jails. */ case PRIV_JAIL_ATTACH: case PRIV_JAIL_SET: case PRIV_JAIL_REMOVE: /* * Jail implements its own inter-process limits, so allow * root processes in jail to change scheduling on other * processes in the same jail. Likewise for signalling. */ case PRIV_SCHED_DIFFCRED: case PRIV_SCHED_CPUSET: case PRIV_SIGNAL_DIFFCRED: case PRIV_SIGNAL_SUGID: /* * Allow jailed processes to write to sysctls marked as jail * writable. */ case PRIV_SYSCTL_WRITEJAIL: /* * Allow root in jail to manage a variety of quota * properties. These should likely be conditional on a * configuration option. */ case PRIV_VFS_GETQUOTA: case PRIV_VFS_SETQUOTA: /* * Since Jail relies on chroot() to implement file system * protections, grant many VFS privileges to root in jail. * Be careful to exclude mount-related and NFS-related * privileges. */ case PRIV_VFS_READ: case PRIV_VFS_WRITE: case PRIV_VFS_ADMIN: case PRIV_VFS_EXEC: case PRIV_VFS_BLOCKRESERVE: /* XXXRW: Slightly surprising. */ case PRIV_VFS_CHFLAGS_DEV: case PRIV_VFS_CHOWN: case PRIV_VFS_CHROOT: case PRIV_VFS_RETAINSUGID: case PRIV_VFS_FCHROOT: case PRIV_VFS_LINK: case PRIV_VFS_SETGID: case PRIV_VFS_STAT: case PRIV_VFS_STICKYFILE: /* * As in the non-jail case, non-root users are expected to be * able to read kernel/physical memory (provided /dev/[k]mem * exists in the jail and they have permission to access it). */ case PRIV_KMEM_READ: return (0); /* * Depending on the global setting, allow privilege of * setting system flags. */ case PRIV_VFS_SYSFLAGS: if (cred->cr_prison->pr_allow & PR_ALLOW_CHFLAGS) return (0); else return (EPERM); /* * Depending on the global setting, allow privilege of * mounting/unmounting file systems. */ case PRIV_VFS_MOUNT: case PRIV_VFS_UNMOUNT: case PRIV_VFS_MOUNT_NONUSER: case PRIV_VFS_MOUNT_OWNER: pr = cred->cr_prison; prison_lock(pr); if (pr->pr_allow & PR_ALLOW_MOUNT && pr->pr_enforce_statfs < 2) error = 0; else error = EPERM; prison_unlock(pr); return (error); /* * Jails should hold no disposition on the PRIV_VFS_READ_DIR * policy. priv_check_cred will not specifically allow it, and * we may want a MAC policy to allow it. */ case PRIV_VFS_READ_DIR: return (0); /* * Conditionnaly allow locking (unlocking) physical pages * in memory. */ case PRIV_VM_MLOCK: case PRIV_VM_MUNLOCK: if (cred->cr_prison->pr_allow & PR_ALLOW_MLOCK) return (0); else return (EPERM); /* * Conditionally allow jailed root to bind reserved ports. */ case PRIV_NETINET_RESERVEDPORT: if (cred->cr_prison->pr_allow & PR_ALLOW_RESERVED_PORTS) return (0); else return (EPERM); /* * Allow jailed root to reuse in-use ports. */ case PRIV_NETINET_REUSEPORT: return (0); /* * Allow jailed root to set certain IPv4/6 (option) headers. */ case PRIV_NETINET_SETHDROPTS: return (0); /* * Conditionally allow creating raw sockets in jail. */ case PRIV_NETINET_RAW: if (cred->cr_prison->pr_allow & PR_ALLOW_RAW_SOCKETS) return (0); else return (EPERM); /* * Since jail implements its own visibility limits on netstat * sysctls, allow getcred. This allows identd to work in * jail. */ case PRIV_NETINET_GETCRED: return (0); /* * Allow jailed root to set loginclass. */ case PRIV_PROC_SETLOGINCLASS: return (0); /* * Do not allow a process inside a jail to read the kernel * message buffer unless explicitly permitted. */ case PRIV_MSGBUF: if (cred->cr_prison->pr_allow & PR_ALLOW_READ_MSGBUF) return (0); return (EPERM); default: /* * In all remaining cases, deny the privilege request. This * includes almost all network privileges, many system * configuration privileges. */ return (EPERM); } } /* * Return the part of pr2's name that is relative to pr1, or the whole name * if it does not directly follow. */ char * prison_name(struct prison *pr1, struct prison *pr2) { char *name; /* Jails see themselves as "0" (if they see themselves at all). */ if (pr1 == pr2) return "0"; name = pr2->pr_name; if (prison_ischild(pr1, pr2)) { /* * pr1 isn't locked (and allprison_lock may not be either) * so its length can't be counted on. But the number of dots * can be counted on - and counted. */ for (; pr1 != &prison0; pr1 = pr1->pr_parent) name = strchr(name, '.') + 1; } return (name); } /* * Return the part of pr2's path that is relative to pr1, or the whole path * if it does not directly follow. */ static char * prison_path(struct prison *pr1, struct prison *pr2) { char *path1, *path2; int len1; path1 = pr1->pr_path; path2 = pr2->pr_path; if (!strcmp(path1, "/")) return (path2); len1 = strlen(path1); if (strncmp(path1, path2, len1)) return (path2); if (path2[len1] == '\0') return "/"; if (path2[len1] == '/') return (path2 + len1); return (path2); } /* * Jail-related sysctls. */ static SYSCTL_NODE(_security, OID_AUTO, jail, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "Jails"); #if defined(INET) || defined(INET6) /* * Copy address array to memory that would be then SYSCTL_OUT-ed. * sysctl_jail_list() helper. */ static void prison_ip_copyout(struct prison *pr, const pr_family_t af, void **out, int *len) { const struct prison_ip *pip; const size_t size = pr_families[af].size; again: mtx_assert(&pr->pr_mtx, MA_OWNED); if ((pip = pr->pr_addrs[af]) != NULL) { if (*len < pip->ips) { *len = pip->ips; mtx_unlock(&pr->pr_mtx); *out = realloc(*out, *len * size, M_TEMP, M_WAITOK); mtx_lock(&pr->pr_mtx); goto again; } bcopy(pip->pr_ip, *out, pip->ips * size); } } #endif static int sysctl_jail_list(SYSCTL_HANDLER_ARGS) { struct xprison *xp; struct prison *pr, *cpr; #ifdef INET struct in_addr *ip4 = NULL; int ip4s = 0; #endif #ifdef INET6 struct in6_addr *ip6 = NULL; int ip6s = 0; #endif int descend, error; xp = malloc(sizeof(*xp), M_TEMP, M_WAITOK); pr = req->td->td_ucred->cr_prison; error = 0; sx_slock(&allprison_lock); FOREACH_PRISON_DESCENDANT(pr, cpr, descend) { mtx_lock(&cpr->pr_mtx); #ifdef INET prison_ip_copyout(cpr, PR_INET, (void **)&ip4, &ip4s); #endif #ifdef INET6 prison_ip_copyout(cpr, PR_INET6, (void **)&ip6, &ip6s); #endif bzero(xp, sizeof(*xp)); xp->pr_version = XPRISON_VERSION; xp->pr_id = cpr->pr_id; xp->pr_state = cpr->pr_state; strlcpy(xp->pr_path, prison_path(pr, cpr), sizeof(xp->pr_path)); strlcpy(xp->pr_host, cpr->pr_hostname, sizeof(xp->pr_host)); strlcpy(xp->pr_name, prison_name(pr, cpr), sizeof(xp->pr_name)); #ifdef INET xp->pr_ip4s = ip4s; #endif #ifdef INET6 xp->pr_ip6s = ip6s; #endif mtx_unlock(&cpr->pr_mtx); error = SYSCTL_OUT(req, xp, sizeof(*xp)); if (error) break; #ifdef INET if (xp->pr_ip4s > 0) { error = SYSCTL_OUT(req, ip4, xp->pr_ip4s * sizeof(struct in_addr)); if (error) break; } #endif #ifdef INET6 if (xp->pr_ip6s > 0) { error = SYSCTL_OUT(req, ip6, xp->pr_ip6s * sizeof(struct in6_addr)); if (error) break; } #endif } sx_sunlock(&allprison_lock); free(xp, M_TEMP); #ifdef INET free(ip4, M_TEMP); #endif #ifdef INET6 free(ip6, M_TEMP); #endif return (error); } SYSCTL_OID(_security_jail, OID_AUTO, list, CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, sysctl_jail_list, "S", "List of active jails"); static int sysctl_jail_jailed(SYSCTL_HANDLER_ARGS) { int error, injail; injail = jailed(req->td->td_ucred); error = SYSCTL_OUT(req, &injail, sizeof(injail)); return (error); } SYSCTL_PROC(_security_jail, OID_AUTO, jailed, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, sysctl_jail_jailed, "I", "Process in jail?"); static int sysctl_jail_vnet(SYSCTL_HANDLER_ARGS) { int error, havevnet; #ifdef VIMAGE struct ucred *cred = req->td->td_ucred; havevnet = jailed(cred) && prison_owns_vnet(cred); #else havevnet = 0; #endif error = SYSCTL_OUT(req, &havevnet, sizeof(havevnet)); return (error); } SYSCTL_PROC(_security_jail, OID_AUTO, vnet, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, sysctl_jail_vnet, "I", "Jail owns vnet?"); #if defined(INET) || defined(INET6) SYSCTL_UINT(_security_jail, OID_AUTO, jail_max_af_ips, CTLFLAG_RW, &jail_max_af_ips, 0, "Number of IP addresses a jail may have at most per address family (deprecated)"); #endif /* * Default parameters for jail(2) compatibility. For historical reasons, * the sysctl names have varying similarity to the parameter names. Prisons * just see their own parameters, and can't change them. */ static int sysctl_jail_default_allow(SYSCTL_HANDLER_ARGS) { int error, i; /* Get the current flag value, and convert it to a boolean. */ if (req->td->td_ucred->cr_prison == &prison0) { mtx_lock(&prison0.pr_mtx); i = (jail_default_allow & arg2) != 0; mtx_unlock(&prison0.pr_mtx); } else i = prison_allow(req->td->td_ucred, arg2); if (arg1 != NULL) i = !i; error = sysctl_handle_int(oidp, &i, 0, req); if (error || !req->newptr) return (error); i = i ? arg2 : 0; if (arg1 != NULL) i ^= arg2; /* * The sysctls don't have CTLFLAGS_PRISON, so assume prison0 * for writing. */ mtx_lock(&prison0.pr_mtx); jail_default_allow = (jail_default_allow & ~arg2) | i; mtx_unlock(&prison0.pr_mtx); return (0); } SYSCTL_PROC(_security_jail, OID_AUTO, set_hostname_allowed, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, PR_ALLOW_SET_HOSTNAME, sysctl_jail_default_allow, "I", "Processes in jail can set their hostnames (deprecated)"); SYSCTL_PROC(_security_jail, OID_AUTO, socket_unixiproute_only, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, (void *)1, PR_ALLOW_SOCKET_AF, sysctl_jail_default_allow, "I", "Processes in jail are limited to creating UNIX/IP/route sockets only (deprecated)"); SYSCTL_PROC(_security_jail, OID_AUTO, sysvipc_allowed, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, PR_ALLOW_SYSVIPC, sysctl_jail_default_allow, "I", "Processes in jail can use System V IPC primitives (deprecated)"); SYSCTL_PROC(_security_jail, OID_AUTO, allow_raw_sockets, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, PR_ALLOW_RAW_SOCKETS, sysctl_jail_default_allow, "I", "Prison root can create raw sockets (deprecated)"); SYSCTL_PROC(_security_jail, OID_AUTO, chflags_allowed, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, PR_ALLOW_CHFLAGS, sysctl_jail_default_allow, "I", "Processes in jail can alter system file flags (deprecated)"); SYSCTL_PROC(_security_jail, OID_AUTO, mount_allowed, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, PR_ALLOW_MOUNT, sysctl_jail_default_allow, "I", "Processes in jail can mount/unmount jail-friendly file systems (deprecated)"); static int sysctl_jail_default_level(SYSCTL_HANDLER_ARGS) { struct prison *pr; int level, error; pr = req->td->td_ucred->cr_prison; level = (pr == &prison0) ? *(int *)arg1 : *(int *)((char *)pr + arg2); error = sysctl_handle_int(oidp, &level, 0, req); if (error || !req->newptr) return (error); *(int *)arg1 = level; return (0); } SYSCTL_PROC(_security_jail, OID_AUTO, enforce_statfs, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, &jail_default_enforce_statfs, offsetof(struct prison, pr_enforce_statfs), sysctl_jail_default_level, "I", "Processes in jail cannot see all mounted file systems (deprecated)"); SYSCTL_PROC(_security_jail, OID_AUTO, devfs_ruleset, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, &jail_default_devfs_rsnum, offsetof(struct prison, pr_devfs_rsnum), sysctl_jail_default_level, "I", "Ruleset for the devfs filesystem in jail (deprecated)"); /* * Nodes to describe jail parameters. Maximum length of string parameters * is returned in the string itself, and the other parameters exist merely * to make themselves and their types known. */ SYSCTL_NODE(_security_jail, OID_AUTO, param, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "Jail parameters"); int sysctl_jail_param(SYSCTL_HANDLER_ARGS) { int i; long l; size_t s; char numbuf[12]; switch (oidp->oid_kind & CTLTYPE) { case CTLTYPE_LONG: case CTLTYPE_ULONG: l = 0; #ifdef SCTL_MASK32 if (!(req->flags & SCTL_MASK32)) #endif return (SYSCTL_OUT(req, &l, sizeof(l))); case CTLTYPE_INT: case CTLTYPE_UINT: i = 0; return (SYSCTL_OUT(req, &i, sizeof(i))); case CTLTYPE_STRING: snprintf(numbuf, sizeof(numbuf), "%jd", (intmax_t)arg2); return (sysctl_handle_string(oidp, numbuf, sizeof(numbuf), req)); case CTLTYPE_STRUCT: s = (size_t)arg2; return (SYSCTL_OUT(req, &s, sizeof(s))); } return (0); } /* * CTLFLAG_RDTUN in the following indicates jail parameters that can be set at * jail creation time but cannot be changed in an existing jail. */ SYSCTL_JAIL_PARAM(, jid, CTLTYPE_INT | CTLFLAG_RDTUN, "I", "Jail ID"); SYSCTL_JAIL_PARAM(, parent, CTLTYPE_INT | CTLFLAG_RD, "I", "Jail parent ID"); SYSCTL_JAIL_PARAM_STRING(, name, CTLFLAG_RW, MAXHOSTNAMELEN, "Jail name"); SYSCTL_JAIL_PARAM_STRING(, path, CTLFLAG_RDTUN, MAXPATHLEN, "Jail root path"); SYSCTL_JAIL_PARAM(, securelevel, CTLTYPE_INT | CTLFLAG_RW, "I", "Jail secure level"); SYSCTL_JAIL_PARAM(, osreldate, CTLTYPE_INT | CTLFLAG_RDTUN, "I", "Jail value for kern.osreldate and uname -K"); SYSCTL_JAIL_PARAM_STRING(, osrelease, CTLFLAG_RDTUN, OSRELEASELEN, "Jail value for kern.osrelease and uname -r"); SYSCTL_JAIL_PARAM(, enforce_statfs, CTLTYPE_INT | CTLFLAG_RW, "I", "Jail cannot see all mounted file systems"); SYSCTL_JAIL_PARAM(, devfs_ruleset, CTLTYPE_INT | CTLFLAG_RW, "I", "Ruleset for in-jail devfs mounts"); SYSCTL_JAIL_PARAM(, persist, CTLTYPE_INT | CTLFLAG_RW, "B", "Jail persistence"); #ifdef VIMAGE SYSCTL_JAIL_PARAM(, vnet, CTLTYPE_INT | CTLFLAG_RDTUN, "E,jailsys", "Virtual network stack"); #endif SYSCTL_JAIL_PARAM(, dying, CTLTYPE_INT | CTLFLAG_RD, "B", "Jail is in the process of shutting down"); SYSCTL_JAIL_PARAM_NODE(children, "Number of child jails"); SYSCTL_JAIL_PARAM(_children, cur, CTLTYPE_INT | CTLFLAG_RD, "I", "Current number of child jails"); SYSCTL_JAIL_PARAM(_children, max, CTLTYPE_INT | CTLFLAG_RW, "I", "Maximum number of child jails"); SYSCTL_JAIL_PARAM_SYS_NODE(host, CTLFLAG_RW, "Jail host info"); SYSCTL_JAIL_PARAM_STRING(_host, hostname, CTLFLAG_RW, MAXHOSTNAMELEN, "Jail hostname"); SYSCTL_JAIL_PARAM_STRING(_host, domainname, CTLFLAG_RW, MAXHOSTNAMELEN, "Jail NIS domainname"); SYSCTL_JAIL_PARAM_STRING(_host, hostuuid, CTLFLAG_RW, HOSTUUIDLEN, "Jail host UUID"); SYSCTL_JAIL_PARAM(_host, hostid, CTLTYPE_ULONG | CTLFLAG_RW, "LU", "Jail host ID"); SYSCTL_JAIL_PARAM_NODE(cpuset, "Jail cpuset"); SYSCTL_JAIL_PARAM(_cpuset, id, CTLTYPE_INT | CTLFLAG_RD, "I", "Jail cpuset ID"); #ifdef INET SYSCTL_JAIL_PARAM_SYS_NODE(ip4, CTLFLAG_RDTUN, "Jail IPv4 address virtualization"); SYSCTL_JAIL_PARAM_STRUCT(_ip4, addr, CTLFLAG_RW, sizeof(struct in_addr), "S,in_addr,a", "Jail IPv4 addresses"); SYSCTL_JAIL_PARAM(_ip4, saddrsel, CTLTYPE_INT | CTLFLAG_RW, "B", "Do (not) use IPv4 source address selection rather than the " "primary jail IPv4 address."); #endif #ifdef INET6 SYSCTL_JAIL_PARAM_SYS_NODE(ip6, CTLFLAG_RDTUN, "Jail IPv6 address virtualization"); SYSCTL_JAIL_PARAM_STRUCT(_ip6, addr, CTLFLAG_RW, sizeof(struct in6_addr), "S,in6_addr,a", "Jail IPv6 addresses"); SYSCTL_JAIL_PARAM(_ip6, saddrsel, CTLTYPE_INT | CTLFLAG_RW, "B", "Do (not) use IPv6 source address selection rather than the " "primary jail IPv6 address."); #endif SYSCTL_JAIL_PARAM_NODE(allow, "Jail permission flags"); SYSCTL_JAIL_PARAM(_allow, set_hostname, CTLTYPE_INT | CTLFLAG_RW, "B", "Jail may set hostname"); SYSCTL_JAIL_PARAM(_allow, sysvipc, CTLTYPE_INT | CTLFLAG_RW, "B", "Jail may use SYSV IPC"); SYSCTL_JAIL_PARAM(_allow, raw_sockets, CTLTYPE_INT | CTLFLAG_RW, "B", "Jail may create raw sockets"); SYSCTL_JAIL_PARAM(_allow, chflags, CTLTYPE_INT | CTLFLAG_RW, "B", "Jail may alter system file flags"); SYSCTL_JAIL_PARAM(_allow, quotas, CTLTYPE_INT | CTLFLAG_RW, "B", "Jail may set file quotas"); SYSCTL_JAIL_PARAM(_allow, socket_af, CTLTYPE_INT | CTLFLAG_RW, "B", "Jail may create sockets other than just UNIX/IPv4/IPv6/route"); SYSCTL_JAIL_PARAM(_allow, mlock, CTLTYPE_INT | CTLFLAG_RW, "B", "Jail may lock (unlock) physical pages in memory"); SYSCTL_JAIL_PARAM(_allow, reserved_ports, CTLTYPE_INT | CTLFLAG_RW, "B", "Jail may bind sockets to reserved ports"); SYSCTL_JAIL_PARAM(_allow, read_msgbuf, CTLTYPE_INT | CTLFLAG_RW, "B", "Jail may read the kernel message buffer"); SYSCTL_JAIL_PARAM(_allow, unprivileged_proc_debug, CTLTYPE_INT | CTLFLAG_RW, "B", "Unprivileged processes may use process debugging facilities"); SYSCTL_JAIL_PARAM(_allow, suser, CTLTYPE_INT | CTLFLAG_RW, "B", "Processes in jail with uid 0 have privilege"); #ifdef VIMAGE SYSCTL_JAIL_PARAM(_allow, nfsd, CTLTYPE_INT | CTLFLAG_RW, "B", "Mountd/nfsd may run in the jail"); #endif SYSCTL_JAIL_PARAM_SUBNODE(allow, mount, "Jail mount/unmount permission flags"); SYSCTL_JAIL_PARAM(_allow_mount, , CTLTYPE_INT | CTLFLAG_RW, "B", "Jail may mount/unmount jail-friendly file systems in general"); /* * Add a dynamic parameter allow., or allow... Return * its associated bit in the pr_allow bitmask, or zero if the parameter was * not created. */ unsigned prison_add_allow(const char *prefix, const char *name, const char *prefix_descr, const char *descr) { struct bool_flags *bf; struct sysctl_oid *parent; char *allow_name, *allow_noname, *allowed; #ifndef NO_SYSCTL_DESCR char *descr_deprecated; #endif u_int allow_flag; if (prefix ? asprintf(&allow_name, M_PRISON, "allow.%s.%s", prefix, name) < 0 || asprintf(&allow_noname, M_PRISON, "allow.%s.no%s", prefix, name) < 0 : asprintf(&allow_name, M_PRISON, "allow.%s", name) < 0 || asprintf(&allow_noname, M_PRISON, "allow.no%s", name) < 0) { free(allow_name, M_PRISON); return 0; } /* * See if this parameter has already beed added, i.e. a module was * previously loaded/unloaded. */ mtx_lock(&prison0.pr_mtx); for (bf = pr_flag_allow; bf < pr_flag_allow + nitems(pr_flag_allow) && atomic_load_int(&bf->flag) != 0; bf++) { if (strcmp(bf->name, allow_name) == 0) { allow_flag = bf->flag; goto no_add; } } /* * Find a free bit in pr_allow_all, failing if there are none * (which shouldn't happen as long as we keep track of how many * potential dynamic flags exist). */ for (allow_flag = 1;; allow_flag <<= 1) { if (allow_flag == 0) goto no_add; if ((pr_allow_all & allow_flag) == 0) break; } /* Note the parameter in the next open slot in pr_flag_allow. */ for (bf = pr_flag_allow; ; bf++) { if (bf == pr_flag_allow + nitems(pr_flag_allow)) { /* This should never happen, but is not fatal. */ allow_flag = 0; goto no_add; } if (atomic_load_int(&bf->flag) == 0) break; } bf->name = allow_name; bf->noname = allow_noname; pr_allow_all |= allow_flag; /* * prison0 always has permission for the new parameter. * Other jails must have it granted to them. */ prison0.pr_allow |= allow_flag; /* The flag indicates a valid entry, so make sure it is set last. */ atomic_store_rel_int(&bf->flag, allow_flag); mtx_unlock(&prison0.pr_mtx); /* * Create sysctls for the parameter, and the back-compat global * permission. */ parent = prefix ? SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(&sysctl___security_jail_param_allow), OID_AUTO, prefix, CTLFLAG_MPSAFE, 0, prefix_descr) : &sysctl___security_jail_param_allow; (void)SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(parent), OID_AUTO, name, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0, sysctl_jail_param, "B", descr); if ((prefix ? asprintf(&allowed, M_TEMP, "%s_%s_allowed", prefix, name) : asprintf(&allowed, M_TEMP, "%s_allowed", name)) >= 0) { #ifndef NO_SYSCTL_DESCR (void)asprintf(&descr_deprecated, M_TEMP, "%s (deprecated)", descr); #endif (void)SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(&sysctl___security_jail), OID_AUTO, allowed, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, allow_flag, sysctl_jail_default_allow, "I", descr_deprecated); #ifndef NO_SYSCTL_DESCR free(descr_deprecated, M_TEMP); #endif free(allowed, M_TEMP); } return allow_flag; no_add: mtx_unlock(&prison0.pr_mtx); free(allow_name, M_PRISON); free(allow_noname, M_PRISON); return allow_flag; } /* * The VFS system will register jail-aware filesystems here. They each get * a parameter allow.mount.xxxfs and a flag to check when a jailed user * attempts to mount. */ void prison_add_vfs(struct vfsconf *vfsp) { #ifdef NO_SYSCTL_DESCR vfsp->vfc_prison_flag = prison_add_allow("mount", vfsp->vfc_name, NULL, NULL); #else char *descr; (void)asprintf(&descr, M_TEMP, "Jail may mount the %s file system", vfsp->vfc_name); vfsp->vfc_prison_flag = prison_add_allow("mount", vfsp->vfc_name, NULL, descr); free(descr, M_TEMP); #endif } #ifdef RACCT void prison_racct_foreach(void (*callback)(struct racct *racct, void *arg2, void *arg3), void (*pre)(void), void (*post)(void), void *arg2, void *arg3) { struct prison_racct *prr; ASSERT_RACCT_ENABLED(); sx_slock(&allprison_lock); if (pre != NULL) (pre)(); LIST_FOREACH(prr, &allprison_racct, prr_next) (callback)(prr->prr_racct, arg2, arg3); if (post != NULL) (post)(); sx_sunlock(&allprison_lock); } static struct prison_racct * prison_racct_find_locked(const char *name) { struct prison_racct *prr; ASSERT_RACCT_ENABLED(); sx_assert(&allprison_lock, SA_XLOCKED); if (name[0] == '\0' || strlen(name) >= MAXHOSTNAMELEN) return (NULL); LIST_FOREACH(prr, &allprison_racct, prr_next) { if (strcmp(name, prr->prr_name) != 0) continue; /* Found prison_racct with a matching name? */ prison_racct_hold(prr); return (prr); } /* Add new prison_racct. */ prr = malloc(sizeof(*prr), M_PRISON_RACCT, M_ZERO | M_WAITOK); racct_create(&prr->prr_racct); strcpy(prr->prr_name, name); refcount_init(&prr->prr_refcount, 1); LIST_INSERT_HEAD(&allprison_racct, prr, prr_next); return (prr); } struct prison_racct * prison_racct_find(const char *name) { struct prison_racct *prr; ASSERT_RACCT_ENABLED(); sx_xlock(&allprison_lock); prr = prison_racct_find_locked(name); sx_xunlock(&allprison_lock); return (prr); } void prison_racct_hold(struct prison_racct *prr) { ASSERT_RACCT_ENABLED(); refcount_acquire(&prr->prr_refcount); } static void prison_racct_free_locked(struct prison_racct *prr) { ASSERT_RACCT_ENABLED(); sx_assert(&allprison_lock, SA_XLOCKED); if (refcount_release(&prr->prr_refcount)) { racct_destroy(&prr->prr_racct); LIST_REMOVE(prr, prr_next); free(prr, M_PRISON_RACCT); } } void prison_racct_free(struct prison_racct *prr) { ASSERT_RACCT_ENABLED(); sx_assert(&allprison_lock, SA_UNLOCKED); if (refcount_release_if_not_last(&prr->prr_refcount)) return; sx_xlock(&allprison_lock); prison_racct_free_locked(prr); sx_xunlock(&allprison_lock); } static void prison_racct_attach(struct prison *pr) { struct prison_racct *prr; ASSERT_RACCT_ENABLED(); sx_assert(&allprison_lock, SA_XLOCKED); prr = prison_racct_find_locked(pr->pr_name); KASSERT(prr != NULL, ("cannot find prison_racct")); pr->pr_prison_racct = prr; } /* * Handle jail renaming. From the racct point of view, renaming means * moving from one prison_racct to another. */ static void prison_racct_modify(struct prison *pr) { #ifdef RCTL struct proc *p; struct ucred *cred; #endif struct prison_racct *oldprr; ASSERT_RACCT_ENABLED(); sx_slock(&allproc_lock); sx_xlock(&allprison_lock); if (strcmp(pr->pr_name, pr->pr_prison_racct->prr_name) == 0) { sx_xunlock(&allprison_lock); sx_sunlock(&allproc_lock); return; } oldprr = pr->pr_prison_racct; pr->pr_prison_racct = NULL; prison_racct_attach(pr); /* * Move resource utilisation records. */ racct_move(pr->pr_prison_racct->prr_racct, oldprr->prr_racct); #ifdef RCTL /* * Force rctl to reattach rules to processes. */ FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); cred = crhold(p->p_ucred); PROC_UNLOCK(p); rctl_proc_ucred_changed(p, cred); crfree(cred); } #endif sx_sunlock(&allproc_lock); prison_racct_free_locked(oldprr); sx_xunlock(&allprison_lock); } static void prison_racct_detach(struct prison *pr) { ASSERT_RACCT_ENABLED(); sx_assert(&allprison_lock, SA_UNLOCKED); if (pr->pr_prison_racct == NULL) return; prison_racct_free(pr->pr_prison_racct); pr->pr_prison_racct = NULL; } #endif /* RACCT */ #ifdef DDB static void db_show_prison(struct prison *pr) { struct bool_flags *bf; struct jailsys_flags *jsf; #if defined(INET) || defined(INET6) int ii; struct prison_ip *pip; #endif unsigned f; #ifdef INET char ip4buf[INET_ADDRSTRLEN]; #endif #ifdef INET6 char ip6buf[INET6_ADDRSTRLEN]; #endif db_printf("prison %p:\n", pr); db_printf(" jid = %d\n", pr->pr_id); db_printf(" name = %s\n", pr->pr_name); db_printf(" parent = %p\n", pr->pr_parent); db_printf(" ref = %d\n", pr->pr_ref); db_printf(" uref = %d\n", pr->pr_uref); db_printf(" state = %s\n", pr->pr_state == PRISON_STATE_ALIVE ? "alive" : pr->pr_state == PRISON_STATE_DYING ? "dying" : "invalid"); db_printf(" path = %s\n", pr->pr_path); db_printf(" cpuset = %d\n", pr->pr_cpuset ? pr->pr_cpuset->cs_id : -1); #ifdef VIMAGE db_printf(" vnet = %p\n", pr->pr_vnet); #endif db_printf(" root = %p\n", pr->pr_root); db_printf(" securelevel = %d\n", pr->pr_securelevel); db_printf(" devfs_rsnum = %d\n", pr->pr_devfs_rsnum); db_printf(" children.max = %d\n", pr->pr_childmax); db_printf(" children.cur = %d\n", pr->pr_childcount); db_printf(" child = %p\n", LIST_FIRST(&pr->pr_children)); db_printf(" sibling = %p\n", LIST_NEXT(pr, pr_sibling)); db_printf(" flags = 0x%x", pr->pr_flags); for (bf = pr_flag_bool; bf < pr_flag_bool + nitems(pr_flag_bool); bf++) if (pr->pr_flags & bf->flag) db_printf(" %s", bf->name); for (jsf = pr_flag_jailsys; jsf < pr_flag_jailsys + nitems(pr_flag_jailsys); jsf++) { f = pr->pr_flags & (jsf->disable | jsf->new); db_printf(" %-16s= %s\n", jsf->name, (f != 0 && f == jsf->disable) ? "disable" : (f == jsf->new) ? "new" : "inherit"); } db_printf(" allow = 0x%x", pr->pr_allow); for (bf = pr_flag_allow; bf < pr_flag_allow + nitems(pr_flag_allow) && atomic_load_int(&bf->flag) != 0; bf++) if (pr->pr_allow & bf->flag) db_printf(" %s", bf->name); db_printf("\n"); db_printf(" enforce_statfs = %d\n", pr->pr_enforce_statfs); db_printf(" host.hostname = %s\n", pr->pr_hostname); db_printf(" host.domainname = %s\n", pr->pr_domainname); db_printf(" host.hostuuid = %s\n", pr->pr_hostuuid); db_printf(" host.hostid = %lu\n", pr->pr_hostid); #ifdef INET if ((pip = pr->pr_addrs[PR_INET]) != NULL) { db_printf(" ip4s = %d\n", pip->ips); for (ii = 0; ii < pip->ips; ii++) db_printf(" %s %s\n", ii == 0 ? "ip4.addr =" : " ", inet_ntoa_r( *(const struct in_addr *)PR_IP(pip, PR_INET, ii), ip4buf)); } #endif #ifdef INET6 if ((pip = pr->pr_addrs[PR_INET6]) != NULL) { db_printf(" ip6s = %d\n", pip->ips); for (ii = 0; ii < pip->ips; ii++) db_printf(" %s %s\n", ii == 0 ? "ip6.addr =" : " ", ip6_sprintf(ip6buf, (const struct in6_addr *)PR_IP(pip, PR_INET6, ii))); } #endif } DB_SHOW_COMMAND(prison, db_show_prison_command) { struct prison *pr; if (!have_addr) { /* * Show all prisons in the list, and prison0 which is not * listed. */ db_show_prison(&prison0); if (!db_pager_quit) { TAILQ_FOREACH(pr, &allprison, pr_list) { db_show_prison(pr); if (db_pager_quit) break; } } return; } if (addr == 0) pr = &prison0; else { /* Look for a prison with the ID and with references. */ TAILQ_FOREACH(pr, &allprison, pr_list) if (pr->pr_id == addr && pr->pr_ref > 0) break; if (pr == NULL) /* Look again, without requiring a reference. */ TAILQ_FOREACH(pr, &allprison, pr_list) if (pr->pr_id == addr) break; if (pr == NULL) /* Assume address points to a valid prison. */ pr = (struct prison *)addr; } db_show_prison(pr); } #endif /* DDB */ diff --git a/sys/kern/kern_prot.c b/sys/kern/kern_prot.c index 19e0b78c6709..ed15cb566499 100644 --- a/sys/kern/kern_prot.c +++ b/sys/kern/kern_prot.c @@ -1,2499 +1,2502 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1989, 1990, 1991, 1993 * The Regents of the University of California. * (c) UNIX System Laboratories, Inc. * Copyright (c) 2000-2001 Robert N. M. Watson. * All rights reserved. * * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_prot.c 8.6 (Berkeley) 1/21/94 */ /* * System calls related to processes and protection */ #include #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef COMPAT_43 #include #endif #include #include #include #include #include #include #include #include #include #ifdef REGRESSION FEATURE(regression, "Kernel support for interfaces necessary for regression testing (SECURITY RISK!)"); #endif #include #include static MALLOC_DEFINE(M_CRED, "cred", "credentials"); SYSCTL_NODE(_security, OID_AUTO, bsd, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "BSD security policy"); static void crfree_final(struct ucred *cr); static void crsetgroups_locked(struct ucred *cr, int ngrp, gid_t *groups); #ifndef _SYS_SYSPROTO_H_ struct getpid_args { int dummy; }; #endif /* ARGSUSED */ int sys_getpid(struct thread *td, struct getpid_args *uap) { struct proc *p = td->td_proc; td->td_retval[0] = p->p_pid; #if defined(COMPAT_43) if (SV_PROC_FLAG(p, SV_AOUT)) td->td_retval[1] = kern_getppid(td); #endif return (0); } #ifndef _SYS_SYSPROTO_H_ struct getppid_args { int dummy; }; #endif /* ARGSUSED */ int sys_getppid(struct thread *td, struct getppid_args *uap) { td->td_retval[0] = kern_getppid(td); return (0); } int kern_getppid(struct thread *td) { struct proc *p = td->td_proc; return (p->p_oppid); } /* * Get process group ID; note that POSIX getpgrp takes no parameter. */ #ifndef _SYS_SYSPROTO_H_ struct getpgrp_args { int dummy; }; #endif int sys_getpgrp(struct thread *td, struct getpgrp_args *uap) { struct proc *p = td->td_proc; PROC_LOCK(p); td->td_retval[0] = p->p_pgrp->pg_id; PROC_UNLOCK(p); return (0); } /* Get an arbitrary pid's process group id */ #ifndef _SYS_SYSPROTO_H_ struct getpgid_args { pid_t pid; }; #endif int sys_getpgid(struct thread *td, struct getpgid_args *uap) { struct proc *p; int error; if (uap->pid == 0) { p = td->td_proc; PROC_LOCK(p); } else { p = pfind(uap->pid); if (p == NULL) return (ESRCH); error = p_cansee(td, p); if (error) { PROC_UNLOCK(p); return (error); } } td->td_retval[0] = p->p_pgrp->pg_id; PROC_UNLOCK(p); return (0); } /* * Get an arbitrary pid's session id. */ #ifndef _SYS_SYSPROTO_H_ struct getsid_args { pid_t pid; }; #endif int sys_getsid(struct thread *td, struct getsid_args *uap) { return (kern_getsid(td, uap->pid)); } int kern_getsid(struct thread *td, pid_t pid) { struct proc *p; int error; if (pid == 0) { p = td->td_proc; PROC_LOCK(p); } else { p = pfind(pid); if (p == NULL) return (ESRCH); error = p_cansee(td, p); if (error) { PROC_UNLOCK(p); return (error); } } td->td_retval[0] = p->p_session->s_sid; PROC_UNLOCK(p); return (0); } #ifndef _SYS_SYSPROTO_H_ struct getuid_args { int dummy; }; #endif /* ARGSUSED */ int sys_getuid(struct thread *td, struct getuid_args *uap) { td->td_retval[0] = td->td_ucred->cr_ruid; #if defined(COMPAT_43) td->td_retval[1] = td->td_ucred->cr_uid; #endif return (0); } #ifndef _SYS_SYSPROTO_H_ struct geteuid_args { int dummy; }; #endif /* ARGSUSED */ int sys_geteuid(struct thread *td, struct geteuid_args *uap) { td->td_retval[0] = td->td_ucred->cr_uid; return (0); } #ifndef _SYS_SYSPROTO_H_ struct getgid_args { int dummy; }; #endif /* ARGSUSED */ int sys_getgid(struct thread *td, struct getgid_args *uap) { td->td_retval[0] = td->td_ucred->cr_rgid; #if defined(COMPAT_43) td->td_retval[1] = td->td_ucred->cr_groups[0]; #endif return (0); } /* * Get effective group ID. The "egid" is groups[0], and could be obtained * via getgroups. This syscall exists because it is somewhat painful to do * correctly in a library function. */ #ifndef _SYS_SYSPROTO_H_ struct getegid_args { int dummy; }; #endif /* ARGSUSED */ int sys_getegid(struct thread *td, struct getegid_args *uap) { td->td_retval[0] = td->td_ucred->cr_groups[0]; return (0); } #ifndef _SYS_SYSPROTO_H_ struct getgroups_args { int gidsetsize; gid_t *gidset; }; #endif int sys_getgroups(struct thread *td, struct getgroups_args *uap) { struct ucred *cred; int ngrp, error; cred = td->td_ucred; ngrp = cred->cr_ngroups; if (uap->gidsetsize == 0) { error = 0; goto out; } if (uap->gidsetsize < ngrp) return (EINVAL); error = copyout(cred->cr_groups, uap->gidset, ngrp * sizeof(gid_t)); out: td->td_retval[0] = ngrp; return (error); } #ifndef _SYS_SYSPROTO_H_ struct setsid_args { int dummy; }; #endif /* ARGSUSED */ int sys_setsid(struct thread *td, struct setsid_args *uap) { struct pgrp *pgrp; int error; struct proc *p = td->td_proc; struct pgrp *newpgrp; struct session *newsess; pgrp = NULL; newpgrp = uma_zalloc(pgrp_zone, M_WAITOK); newsess = malloc(sizeof(struct session), M_SESSION, M_WAITOK | M_ZERO); again: error = 0; sx_xlock(&proctree_lock); if (p->p_pgid == p->p_pid || (pgrp = pgfind(p->p_pid)) != NULL) { if (pgrp != NULL) PGRP_UNLOCK(pgrp); error = EPERM; } else { error = enterpgrp(p, p->p_pid, newpgrp, newsess); if (error == ERESTART) goto again; MPASS(error == 0); td->td_retval[0] = p->p_pid; newpgrp = NULL; newsess = NULL; } sx_xunlock(&proctree_lock); uma_zfree(pgrp_zone, newpgrp); free(newsess, M_SESSION); return (error); } /* * set process group (setpgid/old setpgrp) * * caller does setpgid(targpid, targpgid) * * pid must be caller or child of caller (ESRCH) * if a child * pid must be in same session (EPERM) * pid can't have done an exec (EACCES) * if pgid != pid * there must exist some pid in same session having pgid (EPERM) * pid must not be session leader (EPERM) */ #ifndef _SYS_SYSPROTO_H_ struct setpgid_args { int pid; /* target process id */ int pgid; /* target pgrp id */ }; #endif /* ARGSUSED */ int sys_setpgid(struct thread *td, struct setpgid_args *uap) { struct proc *curp = td->td_proc; struct proc *targp; /* target process */ struct pgrp *pgrp; /* target pgrp */ int error; struct pgrp *newpgrp; if (uap->pgid < 0) return (EINVAL); newpgrp = uma_zalloc(pgrp_zone, M_WAITOK); again: error = 0; sx_xlock(&proctree_lock); if (uap->pid != 0 && uap->pid != curp->p_pid) { if ((targp = pfind(uap->pid)) == NULL) { error = ESRCH; goto done; } if (!inferior(targp)) { PROC_UNLOCK(targp); error = ESRCH; goto done; } if ((error = p_cansee(td, targp))) { PROC_UNLOCK(targp); goto done; } if (targp->p_pgrp == NULL || targp->p_session != curp->p_session) { PROC_UNLOCK(targp); error = EPERM; goto done; } if (targp->p_flag & P_EXEC) { PROC_UNLOCK(targp); error = EACCES; goto done; } PROC_UNLOCK(targp); } else targp = curp; if (SESS_LEADER(targp)) { error = EPERM; goto done; } if (uap->pgid == 0) uap->pgid = targp->p_pid; if ((pgrp = pgfind(uap->pgid)) == NULL) { if (uap->pgid == targp->p_pid) { error = enterpgrp(targp, uap->pgid, newpgrp, NULL); if (error == 0) newpgrp = NULL; } else error = EPERM; } else { if (pgrp == targp->p_pgrp) { PGRP_UNLOCK(pgrp); goto done; } if (pgrp->pg_id != targp->p_pid && pgrp->pg_session != curp->p_session) { PGRP_UNLOCK(pgrp); error = EPERM; goto done; } PGRP_UNLOCK(pgrp); error = enterthispgrp(targp, pgrp); } done: KASSERT(error == 0 || newpgrp != NULL, ("setpgid failed and newpgrp is NULL")); if (error == ERESTART) goto again; sx_xunlock(&proctree_lock); uma_zfree(pgrp_zone, newpgrp); return (error); } /* * Use the clause in B.4.2.2 that allows setuid/setgid to be 4.2/4.3BSD * compatible. It says that setting the uid/gid to euid/egid is a special * case of "appropriate privilege". Once the rules are expanded out, this * basically means that setuid(nnn) sets all three id's, in all permitted * cases unless _POSIX_SAVED_IDS is enabled. In that case, setuid(getuid()) * does not set the saved id - this is dangerous for traditional BSD * programs. For this reason, we *really* do not want to set * _POSIX_SAVED_IDS and do not want to clear POSIX_APPENDIX_B_4_2_2. */ #define POSIX_APPENDIX_B_4_2_2 #ifndef _SYS_SYSPROTO_H_ struct setuid_args { uid_t uid; }; #endif /* ARGSUSED */ int sys_setuid(struct thread *td, struct setuid_args *uap) { struct proc *p = td->td_proc; struct ucred *newcred, *oldcred; uid_t uid; struct uidinfo *uip; int error; uid = uap->uid; AUDIT_ARG_UID(uid); newcred = crget(); uip = uifind(uid); PROC_LOCK(p); /* * Copy credentials so other references do not see our changes. */ oldcred = crcopysafe(p, newcred); #ifdef MAC error = mac_cred_check_setuid(oldcred, uid); if (error) goto fail; #endif /* * See if we have "permission" by POSIX 1003.1 rules. * * Note that setuid(geteuid()) is a special case of * "appropriate privileges" in appendix B.4.2.2. We need * to use this clause to be compatible with traditional BSD * semantics. Basically, it means that "setuid(xx)" sets all * three id's (assuming you have privs). * * Notes on the logic. We do things in three steps. * 1: We determine if the euid is going to change, and do EPERM * right away. We unconditionally change the euid later if this * test is satisfied, simplifying that part of the logic. * 2: We determine if the real and/or saved uids are going to * change. Determined by compile options. * 3: Change euid last. (after tests in #2 for "appropriate privs") */ if (uid != oldcred->cr_ruid && /* allow setuid(getuid()) */ #ifdef _POSIX_SAVED_IDS uid != oldcred->cr_svuid && /* allow setuid(saved gid) */ #endif #ifdef POSIX_APPENDIX_B_4_2_2 /* Use BSD-compat clause from B.4.2.2 */ uid != oldcred->cr_uid && /* allow setuid(geteuid()) */ #endif (error = priv_check_cred(oldcred, PRIV_CRED_SETUID)) != 0) goto fail; #ifdef _POSIX_SAVED_IDS /* * Do we have "appropriate privileges" (are we root or uid == euid) * If so, we are changing the real uid and/or saved uid. */ if ( #ifdef POSIX_APPENDIX_B_4_2_2 /* Use the clause from B.4.2.2 */ uid == oldcred->cr_uid || #endif /* We are using privs. */ priv_check_cred(oldcred, PRIV_CRED_SETUID) == 0) #endif { /* * Set the real uid and transfer proc count to new user. */ if (uid != oldcred->cr_ruid) { change_ruid(newcred, uip); setsugid(p); } /* * Set saved uid * * XXX always set saved uid even if not _POSIX_SAVED_IDS, as * the security of seteuid() depends on it. B.4.2.2 says it * is important that we should do this. */ if (uid != oldcred->cr_svuid) { change_svuid(newcred, uid); setsugid(p); } } /* * In all permitted cases, we are changing the euid. */ if (uid != oldcred->cr_uid) { change_euid(newcred, uip); setsugid(p); } proc_set_cred(p, newcred); #ifdef RACCT racct_proc_ucred_changed(p, oldcred, newcred); crhold(newcred); #endif PROC_UNLOCK(p); #ifdef RCTL rctl_proc_ucred_changed(p, newcred); crfree(newcred); #endif uifree(uip); crfree(oldcred); return (0); fail: PROC_UNLOCK(p); uifree(uip); crfree(newcred); return (error); } #ifndef _SYS_SYSPROTO_H_ struct seteuid_args { uid_t euid; }; #endif /* ARGSUSED */ int sys_seteuid(struct thread *td, struct seteuid_args *uap) { struct proc *p = td->td_proc; struct ucred *newcred, *oldcred; uid_t euid; struct uidinfo *euip; int error; euid = uap->euid; AUDIT_ARG_EUID(euid); newcred = crget(); euip = uifind(euid); PROC_LOCK(p); /* * Copy credentials so other references do not see our changes. */ oldcred = crcopysafe(p, newcred); #ifdef MAC error = mac_cred_check_seteuid(oldcred, euid); if (error) goto fail; #endif if (euid != oldcred->cr_ruid && /* allow seteuid(getuid()) */ euid != oldcred->cr_svuid && /* allow seteuid(saved uid) */ (error = priv_check_cred(oldcred, PRIV_CRED_SETEUID)) != 0) goto fail; /* * Everything's okay, do it. */ if (oldcred->cr_uid != euid) { change_euid(newcred, euip); setsugid(p); } proc_set_cred(p, newcred); PROC_UNLOCK(p); uifree(euip); crfree(oldcred); return (0); fail: PROC_UNLOCK(p); uifree(euip); crfree(newcred); return (error); } #ifndef _SYS_SYSPROTO_H_ struct setgid_args { gid_t gid; }; #endif /* ARGSUSED */ int sys_setgid(struct thread *td, struct setgid_args *uap) { struct proc *p = td->td_proc; struct ucred *newcred, *oldcred; gid_t gid; int error; gid = uap->gid; AUDIT_ARG_GID(gid); newcred = crget(); PROC_LOCK(p); oldcred = crcopysafe(p, newcred); #ifdef MAC error = mac_cred_check_setgid(oldcred, gid); if (error) goto fail; #endif /* * See if we have "permission" by POSIX 1003.1 rules. * * Note that setgid(getegid()) is a special case of * "appropriate privileges" in appendix B.4.2.2. We need * to use this clause to be compatible with traditional BSD * semantics. Basically, it means that "setgid(xx)" sets all * three id's (assuming you have privs). * * For notes on the logic here, see setuid() above. */ if (gid != oldcred->cr_rgid && /* allow setgid(getgid()) */ #ifdef _POSIX_SAVED_IDS gid != oldcred->cr_svgid && /* allow setgid(saved gid) */ #endif #ifdef POSIX_APPENDIX_B_4_2_2 /* Use BSD-compat clause from B.4.2.2 */ gid != oldcred->cr_groups[0] && /* allow setgid(getegid()) */ #endif (error = priv_check_cred(oldcred, PRIV_CRED_SETGID)) != 0) goto fail; #ifdef _POSIX_SAVED_IDS /* * Do we have "appropriate privileges" (are we root or gid == egid) * If so, we are changing the real uid and saved gid. */ if ( #ifdef POSIX_APPENDIX_B_4_2_2 /* use the clause from B.4.2.2 */ gid == oldcred->cr_groups[0] || #endif /* We are using privs. */ priv_check_cred(oldcred, PRIV_CRED_SETGID) == 0) #endif { /* * Set real gid */ if (oldcred->cr_rgid != gid) { change_rgid(newcred, gid); setsugid(p); } /* * Set saved gid * * XXX always set saved gid even if not _POSIX_SAVED_IDS, as * the security of setegid() depends on it. B.4.2.2 says it * is important that we should do this. */ if (oldcred->cr_svgid != gid) { change_svgid(newcred, gid); setsugid(p); } } /* * In all cases permitted cases, we are changing the egid. * Copy credentials so other references do not see our changes. */ if (oldcred->cr_groups[0] != gid) { change_egid(newcred, gid); setsugid(p); } proc_set_cred(p, newcred); PROC_UNLOCK(p); crfree(oldcred); return (0); fail: PROC_UNLOCK(p); crfree(newcred); return (error); } #ifndef _SYS_SYSPROTO_H_ struct setegid_args { gid_t egid; }; #endif /* ARGSUSED */ int sys_setegid(struct thread *td, struct setegid_args *uap) { struct proc *p = td->td_proc; struct ucred *newcred, *oldcred; gid_t egid; int error; egid = uap->egid; AUDIT_ARG_EGID(egid); newcred = crget(); PROC_LOCK(p); oldcred = crcopysafe(p, newcred); #ifdef MAC error = mac_cred_check_setegid(oldcred, egid); if (error) goto fail; #endif if (egid != oldcred->cr_rgid && /* allow setegid(getgid()) */ egid != oldcred->cr_svgid && /* allow setegid(saved gid) */ (error = priv_check_cred(oldcred, PRIV_CRED_SETEGID)) != 0) goto fail; if (oldcred->cr_groups[0] != egid) { change_egid(newcred, egid); setsugid(p); } proc_set_cred(p, newcred); PROC_UNLOCK(p); crfree(oldcred); return (0); fail: PROC_UNLOCK(p); crfree(newcred); return (error); } #ifndef _SYS_SYSPROTO_H_ struct setgroups_args { int gidsetsize; gid_t *gidset; }; #endif /* ARGSUSED */ int sys_setgroups(struct thread *td, struct setgroups_args *uap) { gid_t smallgroups[XU_NGROUPS]; gid_t *groups; int gidsetsize, error; gidsetsize = uap->gidsetsize; if (gidsetsize > ngroups_max + 1 || gidsetsize < 0) return (EINVAL); if (gidsetsize > XU_NGROUPS) groups = malloc(gidsetsize * sizeof(gid_t), M_TEMP, M_WAITOK); else groups = smallgroups; error = copyin(uap->gidset, groups, gidsetsize * sizeof(gid_t)); if (error == 0) error = kern_setgroups(td, gidsetsize, groups); if (gidsetsize > XU_NGROUPS) free(groups, M_TEMP); return (error); } int kern_setgroups(struct thread *td, u_int ngrp, gid_t *groups) { struct proc *p = td->td_proc; struct ucred *newcred, *oldcred; int error; MPASS(ngrp <= ngroups_max + 1); AUDIT_ARG_GROUPSET(groups, ngrp); newcred = crget(); crextend(newcred, ngrp); PROC_LOCK(p); oldcred = crcopysafe(p, newcred); #ifdef MAC error = mac_cred_check_setgroups(oldcred, ngrp, groups); if (error) goto fail; #endif error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS); if (error) goto fail; if (ngrp == 0) { /* * setgroups(0, NULL) is a legitimate way of clearing the * groups vector on non-BSD systems (which generally do not * have the egid in the groups[0]). We risk security holes * when running non-BSD software if we do not do the same. */ newcred->cr_ngroups = 1; } else { crsetgroups_locked(newcred, ngrp, groups); } setsugid(p); proc_set_cred(p, newcred); PROC_UNLOCK(p); crfree(oldcred); return (0); fail: PROC_UNLOCK(p); crfree(newcred); return (error); } #ifndef _SYS_SYSPROTO_H_ struct setreuid_args { uid_t ruid; uid_t euid; }; #endif /* ARGSUSED */ int sys_setreuid(struct thread *td, struct setreuid_args *uap) { struct proc *p = td->td_proc; struct ucred *newcred, *oldcred; uid_t euid, ruid; struct uidinfo *euip, *ruip; int error; euid = uap->euid; ruid = uap->ruid; AUDIT_ARG_EUID(euid); AUDIT_ARG_RUID(ruid); newcred = crget(); euip = uifind(euid); ruip = uifind(ruid); PROC_LOCK(p); oldcred = crcopysafe(p, newcred); #ifdef MAC error = mac_cred_check_setreuid(oldcred, ruid, euid); if (error) goto fail; #endif if (((ruid != (uid_t)-1 && ruid != oldcred->cr_ruid && ruid != oldcred->cr_svuid) || (euid != (uid_t)-1 && euid != oldcred->cr_uid && euid != oldcred->cr_ruid && euid != oldcred->cr_svuid)) && (error = priv_check_cred(oldcred, PRIV_CRED_SETREUID)) != 0) goto fail; if (euid != (uid_t)-1 && oldcred->cr_uid != euid) { change_euid(newcred, euip); setsugid(p); } if (ruid != (uid_t)-1 && oldcred->cr_ruid != ruid) { change_ruid(newcred, ruip); setsugid(p); } if ((ruid != (uid_t)-1 || newcred->cr_uid != newcred->cr_ruid) && newcred->cr_svuid != newcred->cr_uid) { change_svuid(newcred, newcred->cr_uid); setsugid(p); } proc_set_cred(p, newcred); #ifdef RACCT racct_proc_ucred_changed(p, oldcred, newcred); crhold(newcred); #endif PROC_UNLOCK(p); #ifdef RCTL rctl_proc_ucred_changed(p, newcred); crfree(newcred); #endif uifree(ruip); uifree(euip); crfree(oldcred); return (0); fail: PROC_UNLOCK(p); uifree(ruip); uifree(euip); crfree(newcred); return (error); } #ifndef _SYS_SYSPROTO_H_ struct setregid_args { gid_t rgid; gid_t egid; }; #endif /* ARGSUSED */ int sys_setregid(struct thread *td, struct setregid_args *uap) { struct proc *p = td->td_proc; struct ucred *newcred, *oldcred; gid_t egid, rgid; int error; egid = uap->egid; rgid = uap->rgid; AUDIT_ARG_EGID(egid); AUDIT_ARG_RGID(rgid); newcred = crget(); PROC_LOCK(p); oldcred = crcopysafe(p, newcred); #ifdef MAC error = mac_cred_check_setregid(oldcred, rgid, egid); if (error) goto fail; #endif if (((rgid != (gid_t)-1 && rgid != oldcred->cr_rgid && rgid != oldcred->cr_svgid) || (egid != (gid_t)-1 && egid != oldcred->cr_groups[0] && egid != oldcred->cr_rgid && egid != oldcred->cr_svgid)) && (error = priv_check_cred(oldcred, PRIV_CRED_SETREGID)) != 0) goto fail; if (egid != (gid_t)-1 && oldcred->cr_groups[0] != egid) { change_egid(newcred, egid); setsugid(p); } if (rgid != (gid_t)-1 && oldcred->cr_rgid != rgid) { change_rgid(newcred, rgid); setsugid(p); } if ((rgid != (gid_t)-1 || newcred->cr_groups[0] != newcred->cr_rgid) && newcred->cr_svgid != newcred->cr_groups[0]) { change_svgid(newcred, newcred->cr_groups[0]); setsugid(p); } proc_set_cred(p, newcred); PROC_UNLOCK(p); crfree(oldcred); return (0); fail: PROC_UNLOCK(p); crfree(newcred); return (error); } /* * setresuid(ruid, euid, suid) is like setreuid except control over the saved * uid is explicit. */ #ifndef _SYS_SYSPROTO_H_ struct setresuid_args { uid_t ruid; uid_t euid; uid_t suid; }; #endif /* ARGSUSED */ int sys_setresuid(struct thread *td, struct setresuid_args *uap) { struct proc *p = td->td_proc; struct ucred *newcred, *oldcred; uid_t euid, ruid, suid; struct uidinfo *euip, *ruip; int error; euid = uap->euid; ruid = uap->ruid; suid = uap->suid; AUDIT_ARG_EUID(euid); AUDIT_ARG_RUID(ruid); AUDIT_ARG_SUID(suid); newcred = crget(); euip = uifind(euid); ruip = uifind(ruid); PROC_LOCK(p); oldcred = crcopysafe(p, newcred); #ifdef MAC error = mac_cred_check_setresuid(oldcred, ruid, euid, suid); if (error) goto fail; #endif if (((ruid != (uid_t)-1 && ruid != oldcred->cr_ruid && ruid != oldcred->cr_svuid && ruid != oldcred->cr_uid) || (euid != (uid_t)-1 && euid != oldcred->cr_ruid && euid != oldcred->cr_svuid && euid != oldcred->cr_uid) || (suid != (uid_t)-1 && suid != oldcred->cr_ruid && suid != oldcred->cr_svuid && suid != oldcred->cr_uid)) && (error = priv_check_cred(oldcred, PRIV_CRED_SETRESUID)) != 0) goto fail; if (euid != (uid_t)-1 && oldcred->cr_uid != euid) { change_euid(newcred, euip); setsugid(p); } if (ruid != (uid_t)-1 && oldcred->cr_ruid != ruid) { change_ruid(newcred, ruip); setsugid(p); } if (suid != (uid_t)-1 && oldcred->cr_svuid != suid) { change_svuid(newcred, suid); setsugid(p); } proc_set_cred(p, newcred); #ifdef RACCT racct_proc_ucred_changed(p, oldcred, newcred); crhold(newcred); #endif PROC_UNLOCK(p); #ifdef RCTL rctl_proc_ucred_changed(p, newcred); crfree(newcred); #endif uifree(ruip); uifree(euip); crfree(oldcred); return (0); fail: PROC_UNLOCK(p); uifree(ruip); uifree(euip); crfree(newcred); return (error); } /* * setresgid(rgid, egid, sgid) is like setregid except control over the saved * gid is explicit. */ #ifndef _SYS_SYSPROTO_H_ struct setresgid_args { gid_t rgid; gid_t egid; gid_t sgid; }; #endif /* ARGSUSED */ int sys_setresgid(struct thread *td, struct setresgid_args *uap) { struct proc *p = td->td_proc; struct ucred *newcred, *oldcred; gid_t egid, rgid, sgid; int error; egid = uap->egid; rgid = uap->rgid; sgid = uap->sgid; AUDIT_ARG_EGID(egid); AUDIT_ARG_RGID(rgid); AUDIT_ARG_SGID(sgid); newcred = crget(); PROC_LOCK(p); oldcred = crcopysafe(p, newcred); #ifdef MAC error = mac_cred_check_setresgid(oldcred, rgid, egid, sgid); if (error) goto fail; #endif if (((rgid != (gid_t)-1 && rgid != oldcred->cr_rgid && rgid != oldcred->cr_svgid && rgid != oldcred->cr_groups[0]) || (egid != (gid_t)-1 && egid != oldcred->cr_rgid && egid != oldcred->cr_svgid && egid != oldcred->cr_groups[0]) || (sgid != (gid_t)-1 && sgid != oldcred->cr_rgid && sgid != oldcred->cr_svgid && sgid != oldcred->cr_groups[0])) && (error = priv_check_cred(oldcred, PRIV_CRED_SETRESGID)) != 0) goto fail; if (egid != (gid_t)-1 && oldcred->cr_groups[0] != egid) { change_egid(newcred, egid); setsugid(p); } if (rgid != (gid_t)-1 && oldcred->cr_rgid != rgid) { change_rgid(newcred, rgid); setsugid(p); } if (sgid != (gid_t)-1 && oldcred->cr_svgid != sgid) { change_svgid(newcred, sgid); setsugid(p); } proc_set_cred(p, newcred); PROC_UNLOCK(p); crfree(oldcred); return (0); fail: PROC_UNLOCK(p); crfree(newcred); return (error); } #ifndef _SYS_SYSPROTO_H_ struct getresuid_args { uid_t *ruid; uid_t *euid; uid_t *suid; }; #endif /* ARGSUSED */ int sys_getresuid(struct thread *td, struct getresuid_args *uap) { struct ucred *cred; int error1 = 0, error2 = 0, error3 = 0; cred = td->td_ucred; if (uap->ruid) error1 = copyout(&cred->cr_ruid, uap->ruid, sizeof(cred->cr_ruid)); if (uap->euid) error2 = copyout(&cred->cr_uid, uap->euid, sizeof(cred->cr_uid)); if (uap->suid) error3 = copyout(&cred->cr_svuid, uap->suid, sizeof(cred->cr_svuid)); return (error1 ? error1 : error2 ? error2 : error3); } #ifndef _SYS_SYSPROTO_H_ struct getresgid_args { gid_t *rgid; gid_t *egid; gid_t *sgid; }; #endif /* ARGSUSED */ int sys_getresgid(struct thread *td, struct getresgid_args *uap) { struct ucred *cred; int error1 = 0, error2 = 0, error3 = 0; cred = td->td_ucred; if (uap->rgid) error1 = copyout(&cred->cr_rgid, uap->rgid, sizeof(cred->cr_rgid)); if (uap->egid) error2 = copyout(&cred->cr_groups[0], uap->egid, sizeof(cred->cr_groups[0])); if (uap->sgid) error3 = copyout(&cred->cr_svgid, uap->sgid, sizeof(cred->cr_svgid)); return (error1 ? error1 : error2 ? error2 : error3); } #ifndef _SYS_SYSPROTO_H_ struct issetugid_args { int dummy; }; #endif /* ARGSUSED */ int sys_issetugid(struct thread *td, struct issetugid_args *uap) { struct proc *p = td->td_proc; /* * Note: OpenBSD sets a P_SUGIDEXEC flag set at execve() time, * we use P_SUGID because we consider changing the owners as * "tainting" as well. * This is significant for procs that start as root and "become" * a user without an exec - programs cannot know *everything* * that libc *might* have put in their data segment. */ td->td_retval[0] = (p->p_flag & P_SUGID) ? 1 : 0; return (0); } int sys___setugid(struct thread *td, struct __setugid_args *uap) { #ifdef REGRESSION struct proc *p; p = td->td_proc; switch (uap->flag) { case 0: PROC_LOCK(p); p->p_flag &= ~P_SUGID; PROC_UNLOCK(p); return (0); case 1: PROC_LOCK(p); p->p_flag |= P_SUGID; PROC_UNLOCK(p); return (0); default: return (EINVAL); } #else /* !REGRESSION */ return (ENOSYS); #endif /* REGRESSION */ } /* * Check if gid is a member of the group set. */ int groupmember(gid_t gid, struct ucred *cred) { int l; int h; int m; if (cred->cr_groups[0] == gid) return(1); /* * If gid was not our primary group, perform a binary search * of the supplemental groups. This is possible because we * sort the groups in crsetgroups(). */ l = 1; h = cred->cr_ngroups; while (l < h) { m = l + ((h - l) / 2); if (cred->cr_groups[m] < gid) l = m + 1; else h = m; } if ((l < cred->cr_ngroups) && (cred->cr_groups[l] == gid)) return (1); return (0); } /* * Test the active securelevel against a given level. securelevel_gt() * implements (securelevel > level). securelevel_ge() implements * (securelevel >= level). Note that the logic is inverted -- these * functions return EPERM on "success" and 0 on "failure". * * Due to care taken when setting the securelevel, we know that no jail will * be less secure that its parent (or the physical system), so it is sufficient * to test the current jail only. * * XXXRW: Possibly since this has to do with privilege, it should move to * kern_priv.c. */ int securelevel_gt(struct ucred *cr, int level) { return (cr->cr_prison->pr_securelevel > level ? EPERM : 0); } int securelevel_ge(struct ucred *cr, int level) { return (cr->cr_prison->pr_securelevel >= level ? EPERM : 0); } /* * 'see_other_uids' determines whether or not visibility of processes * and sockets with credentials holding different real uids is possible * using a variety of system MIBs. * XXX: data declarations should be together near the beginning of the file. */ static int see_other_uids = 1; SYSCTL_INT(_security_bsd, OID_AUTO, see_other_uids, CTLFLAG_RW, &see_other_uids, 0, "Unprivileged processes may see subjects/objects with different real uid"); /*- * Determine if u1 "can see" the subject specified by u2, according to the * 'see_other_uids' policy. * Returns: 0 for permitted, ESRCH otherwise * Locks: none * References: *u1 and *u2 must not change during the call * u1 may equal u2, in which case only one reference is required */ int cr_canseeotheruids(struct ucred *u1, struct ucred *u2) { if (!see_other_uids && u1->cr_ruid != u2->cr_ruid) { if (priv_check_cred(u1, PRIV_SEEOTHERUIDS) != 0) return (ESRCH); } return (0); } /* * 'see_other_gids' determines whether or not visibility of processes * and sockets with credentials holding different real gids is possible * using a variety of system MIBs. * XXX: data declarations should be together near the beginning of the file. */ static int see_other_gids = 1; SYSCTL_INT(_security_bsd, OID_AUTO, see_other_gids, CTLFLAG_RW, &see_other_gids, 0, "Unprivileged processes may see subjects/objects with different real gid"); /* * Determine if u1 can "see" the subject specified by u2, according to the * 'see_other_gids' policy. * Returns: 0 for permitted, ESRCH otherwise * Locks: none * References: *u1 and *u2 must not change during the call * u1 may equal u2, in which case only one reference is required */ int cr_canseeothergids(struct ucred *u1, struct ucred *u2) { int i, match; if (!see_other_gids) { match = 0; for (i = 0; i < u1->cr_ngroups; i++) { if (groupmember(u1->cr_groups[i], u2)) match = 1; if (match) break; } if (!match) { if (priv_check_cred(u1, PRIV_SEEOTHERGIDS) != 0) return (ESRCH); } } return (0); } /* * 'see_jail_proc' determines whether or not visibility of processes and * sockets with credentials holding different jail ids is possible using a * variety of system MIBs. * * XXX: data declarations should be together near the beginning of the file. */ static int see_jail_proc = 1; SYSCTL_INT(_security_bsd, OID_AUTO, see_jail_proc, CTLFLAG_RW, &see_jail_proc, 0, "Unprivileged processes may see subjects/objects with different jail ids"); /*- * Determine if u1 "can see" the subject specified by u2, according to the * 'see_jail_proc' policy. * Returns: 0 for permitted, ESRCH otherwise * Locks: none * References: *u1 and *u2 must not change during the call * u1 may equal u2, in which case only one reference is required */ int cr_canseejailproc(struct ucred *u1, struct ucred *u2) { - if (u1->cr_uid == 0) + if (see_jail_proc || /* Policy deactivated. */ + u1->cr_prison == u2->cr_prison || /* Same jail. */ + priv_check_cred(u1, PRIV_SEEJAILPROC) == 0) /* Privileged. */ return (0); - return (!see_jail_proc && u1->cr_prison != u2->cr_prison ? ESRCH : 0); + + return (ESRCH); } /*- * Determine if u1 "can see" the subject specified by u2. * Returns: 0 for permitted, an errno value otherwise * Locks: none * References: *u1 and *u2 must not change during the call * u1 may equal u2, in which case only one reference is required */ int cr_cansee(struct ucred *u1, struct ucred *u2) { int error; if ((error = prison_check(u1, u2))) return (error); #ifdef MAC if ((error = mac_cred_check_visible(u1, u2))) return (error); #endif if ((error = cr_canseeotheruids(u1, u2))) return (error); if ((error = cr_canseeothergids(u1, u2))) return (error); if ((error = cr_canseejailproc(u1, u2))) return (error); return (0); } /*- * Determine if td "can see" the subject specified by p. * Returns: 0 for permitted, an errno value otherwise * Locks: Sufficient locks to protect p->p_ucred must be held. td really * should be curthread. * References: td and p must be valid for the lifetime of the call */ int p_cansee(struct thread *td, struct proc *p) { /* Wrap cr_cansee() for all functionality. */ KASSERT(td == curthread, ("%s: td not curthread", __func__)); PROC_LOCK_ASSERT(p, MA_OWNED); if (td->td_proc == p) return (0); return (cr_cansee(td->td_ucred, p->p_ucred)); } /* * 'conservative_signals' prevents the delivery of a broad class of * signals by unprivileged processes to processes that have changed their * credentials since the last invocation of execve(). This can prevent * the leakage of cached information or retained privileges as a result * of a common class of signal-related vulnerabilities. However, this * may interfere with some applications that expect to be able to * deliver these signals to peer processes after having given up * privilege. */ static int conservative_signals = 1; SYSCTL_INT(_security_bsd, OID_AUTO, conservative_signals, CTLFLAG_RW, &conservative_signals, 0, "Unprivileged processes prevented from " "sending certain signals to processes whose credentials have changed"); /*- * Determine whether cred may deliver the specified signal to proc. * Returns: 0 for permitted, an errno value otherwise. * Locks: A lock must be held for proc. * References: cred and proc must be valid for the lifetime of the call. */ int cr_cansignal(struct ucred *cred, struct proc *proc, int signum) { int error; PROC_LOCK_ASSERT(proc, MA_OWNED); /* * Jail semantics limit the scope of signalling to proc in the * same jail as cred, if cred is in jail. */ error = prison_check(cred, proc->p_ucred); if (error) return (error); #ifdef MAC if ((error = mac_proc_check_signal(cred, proc, signum))) return (error); #endif if ((error = cr_canseeotheruids(cred, proc->p_ucred))) return (error); if ((error = cr_canseeothergids(cred, proc->p_ucred))) return (error); /* * UNIX signal semantics depend on the status of the P_SUGID * bit on the target process. If the bit is set, then additional * restrictions are placed on the set of available signals. */ if (conservative_signals && (proc->p_flag & P_SUGID)) { switch (signum) { case 0: case SIGKILL: case SIGINT: case SIGTERM: case SIGALRM: case SIGSTOP: case SIGTTIN: case SIGTTOU: case SIGTSTP: case SIGHUP: case SIGUSR1: case SIGUSR2: /* * Generally, permit job and terminal control * signals. */ break; default: /* Not permitted without privilege. */ error = priv_check_cred(cred, PRIV_SIGNAL_SUGID); if (error) return (error); } } /* * Generally, the target credential's ruid or svuid must match the * subject credential's ruid or euid. */ if (cred->cr_ruid != proc->p_ucred->cr_ruid && cred->cr_ruid != proc->p_ucred->cr_svuid && cred->cr_uid != proc->p_ucred->cr_ruid && cred->cr_uid != proc->p_ucred->cr_svuid) { error = priv_check_cred(cred, PRIV_SIGNAL_DIFFCRED); if (error) return (error); } return (0); } /*- * Determine whether td may deliver the specified signal to p. * Returns: 0 for permitted, an errno value otherwise * Locks: Sufficient locks to protect various components of td and p * must be held. td must be curthread, and a lock must be * held for p. * References: td and p must be valid for the lifetime of the call */ int p_cansignal(struct thread *td, struct proc *p, int signum) { KASSERT(td == curthread, ("%s: td not curthread", __func__)); PROC_LOCK_ASSERT(p, MA_OWNED); if (td->td_proc == p) return (0); /* * UNIX signalling semantics require that processes in the same * session always be able to deliver SIGCONT to one another, * overriding the remaining protections. */ /* XXX: This will require an additional lock of some sort. */ if (signum == SIGCONT && td->td_proc->p_session == p->p_session) return (0); /* * Some compat layers use SIGTHR and higher signals for * communication between different kernel threads of the same * process, so that they expect that it's always possible to * deliver them, even for suid applications where cr_cansignal() can * deny such ability for security consideration. It should be * pretty safe to do since the only way to create two processes * with the same p_leader is via rfork(2). */ if (td->td_proc->p_leader != NULL && signum >= SIGTHR && signum < SIGTHR + 4 && td->td_proc->p_leader == p->p_leader) return (0); return (cr_cansignal(td->td_ucred, p, signum)); } /*- * Determine whether td may reschedule p. * Returns: 0 for permitted, an errno value otherwise * Locks: Sufficient locks to protect various components of td and p * must be held. td must be curthread, and a lock must * be held for p. * References: td and p must be valid for the lifetime of the call */ int p_cansched(struct thread *td, struct proc *p) { int error; KASSERT(td == curthread, ("%s: td not curthread", __func__)); PROC_LOCK_ASSERT(p, MA_OWNED); if (td->td_proc == p) return (0); if ((error = prison_check(td->td_ucred, p->p_ucred))) return (error); #ifdef MAC if ((error = mac_proc_check_sched(td->td_ucred, p))) return (error); #endif if ((error = cr_canseeotheruids(td->td_ucred, p->p_ucred))) return (error); if ((error = cr_canseeothergids(td->td_ucred, p->p_ucred))) return (error); if (td->td_ucred->cr_ruid != p->p_ucred->cr_ruid && td->td_ucred->cr_uid != p->p_ucred->cr_ruid) { error = priv_check(td, PRIV_SCHED_DIFFCRED); if (error) return (error); } return (0); } /* * Handle getting or setting the prison's unprivileged_proc_debug * value. */ static int sysctl_unprivileged_proc_debug(SYSCTL_HANDLER_ARGS) { int error, val; val = prison_allow(req->td->td_ucred, PR_ALLOW_UNPRIV_DEBUG); error = sysctl_handle_int(oidp, &val, 0, req); if (error != 0 || req->newptr == NULL) return (error); if (val != 0 && val != 1) return (EINVAL); prison_set_allow(req->td->td_ucred, PR_ALLOW_UNPRIV_DEBUG, val); return (0); } /* * The 'unprivileged_proc_debug' flag may be used to disable a variety of * unprivileged inter-process debugging services, including some procfs * functionality, ptrace(), and ktrace(). In the past, inter-process * debugging has been involved in a variety of security problems, and sites * not requiring the service might choose to disable it when hardening * systems. */ SYSCTL_PROC(_security_bsd, OID_AUTO, unprivileged_proc_debug, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_SECURE | CTLFLAG_MPSAFE, 0, 0, sysctl_unprivileged_proc_debug, "I", "Unprivileged processes may use process debugging facilities"); /*- * Determine whether td may debug p. * Returns: 0 for permitted, an errno value otherwise * Locks: Sufficient locks to protect various components of td and p * must be held. td must be curthread, and a lock must * be held for p. * References: td and p must be valid for the lifetime of the call */ int p_candebug(struct thread *td, struct proc *p) { int error, grpsubset, i, uidsubset; KASSERT(td == curthread, ("%s: td not curthread", __func__)); PROC_LOCK_ASSERT(p, MA_OWNED); if (td->td_proc == p) return (0); if ((error = priv_check(td, PRIV_DEBUG_UNPRIV))) return (error); if ((error = prison_check(td->td_ucred, p->p_ucred))) return (error); #ifdef MAC if ((error = mac_proc_check_debug(td->td_ucred, p))) return (error); #endif if ((error = cr_canseeotheruids(td->td_ucred, p->p_ucred))) return (error); if ((error = cr_canseeothergids(td->td_ucred, p->p_ucred))) return (error); /* * Is p's group set a subset of td's effective group set? This * includes p's egid, group access list, rgid, and svgid. */ grpsubset = 1; for (i = 0; i < p->p_ucred->cr_ngroups; i++) { if (!groupmember(p->p_ucred->cr_groups[i], td->td_ucred)) { grpsubset = 0; break; } } grpsubset = grpsubset && groupmember(p->p_ucred->cr_rgid, td->td_ucred) && groupmember(p->p_ucred->cr_svgid, td->td_ucred); /* * Are the uids present in p's credential equal to td's * effective uid? This includes p's euid, svuid, and ruid. */ uidsubset = (td->td_ucred->cr_uid == p->p_ucred->cr_uid && td->td_ucred->cr_uid == p->p_ucred->cr_svuid && td->td_ucred->cr_uid == p->p_ucred->cr_ruid); /* * If p's gids aren't a subset, or the uids aren't a subset, * or the credential has changed, require appropriate privilege * for td to debug p. */ if (!grpsubset || !uidsubset) { error = priv_check(td, PRIV_DEBUG_DIFFCRED); if (error) return (error); } /* * Has the credential of the process changed since the last exec()? */ if ((p->p_flag & P_SUGID) != 0) { error = priv_check(td, PRIV_DEBUG_SUGID); if (error) return (error); } /* Can't trace init when securelevel > 0. */ if (p == initproc) { error = securelevel_gt(td->td_ucred, 0); if (error) return (error); } /* * Can't trace a process that's currently exec'ing. * * XXX: Note, this is not a security policy decision, it's a * basic correctness/functionality decision. Therefore, this check * should be moved to the caller's of p_candebug(). */ if ((p->p_flag & P_INEXEC) != 0) return (EBUSY); /* Denied explicitly */ if ((p->p_flag2 & P2_NOTRACE) != 0) { error = priv_check(td, PRIV_DEBUG_DENIED); if (error != 0) return (error); } return (0); } /*- * Determine whether the subject represented by cred can "see" a socket. * Returns: 0 for permitted, ENOENT otherwise. */ int cr_canseesocket(struct ucred *cred, struct socket *so) { int error; error = prison_check(cred, so->so_cred); if (error) return (ENOENT); #ifdef MAC error = mac_socket_check_visible(cred, so); if (error) return (error); #endif if (cr_canseeotheruids(cred, so->so_cred)) return (ENOENT); if (cr_canseeothergids(cred, so->so_cred)) return (ENOENT); return (0); } /*- * Determine whether td can wait for the exit of p. * Returns: 0 for permitted, an errno value otherwise * Locks: Sufficient locks to protect various components of td and p * must be held. td must be curthread, and a lock must * be held for p. * References: td and p must be valid for the lifetime of the call */ int p_canwait(struct thread *td, struct proc *p) { int error; KASSERT(td == curthread, ("%s: td not curthread", __func__)); PROC_LOCK_ASSERT(p, MA_OWNED); if ((error = prison_check(td->td_ucred, p->p_ucred))) return (error); #ifdef MAC if ((error = mac_proc_check_wait(td->td_ucred, p))) return (error); #endif #if 0 /* XXXMAC: This could have odd effects on some shells. */ if ((error = cr_canseeotheruids(td->td_ucred, p->p_ucred))) return (error); #endif return (0); } /* * Credential management. * * struct ucred objects are rarely allocated but gain and lose references all * the time (e.g., on struct file alloc/dealloc) turning refcount updates into * a significant source of cache-line ping ponging. Common cases are worked * around by modifying thread-local counter instead if the cred to operate on * matches td_realucred. * * The counter is split into 2 parts: * - cr_users -- total count of all struct proc and struct thread objects * which have given cred in p_ucred and td_ucred respectively * - cr_ref -- the actual ref count, only valid if cr_users == 0 * * If users == 0 then cr_ref behaves similarly to refcount(9), in particular if * the count reaches 0 the object is freeable. * If users > 0 and curthread->td_realucred == cred, then updates are performed * against td_ucredref. * In other cases updates are performed against cr_ref. * * Changing td_realucred into something else decrements cr_users and transfers * accumulated updates. */ struct ucred * crcowget(struct ucred *cr) { mtx_lock(&cr->cr_mtx); KASSERT(cr->cr_users > 0, ("%s: users %d not > 0 on cred %p", __func__, cr->cr_users, cr)); cr->cr_users++; cr->cr_ref++; mtx_unlock(&cr->cr_mtx); return (cr); } static struct ucred * crunuse(struct thread *td) { struct ucred *cr, *crold; MPASS(td->td_realucred == td->td_ucred); cr = td->td_realucred; mtx_lock(&cr->cr_mtx); cr->cr_ref += td->td_ucredref; td->td_ucredref = 0; KASSERT(cr->cr_users > 0, ("%s: users %d not > 0 on cred %p", __func__, cr->cr_users, cr)); cr->cr_users--; if (cr->cr_users == 0) { KASSERT(cr->cr_ref > 0, ("%s: ref %ld not > 0 on cred %p", __func__, cr->cr_ref, cr)); crold = cr; } else { cr->cr_ref--; crold = NULL; } mtx_unlock(&cr->cr_mtx); td->td_realucred = NULL; return (crold); } static void crunusebatch(struct ucred *cr, int users, int ref) { KASSERT(users > 0, ("%s: passed users %d not > 0 ; cred %p", __func__, users, cr)); mtx_lock(&cr->cr_mtx); KASSERT(cr->cr_users >= users, ("%s: users %d not > %d on cred %p", __func__, cr->cr_users, users, cr)); cr->cr_users -= users; cr->cr_ref += ref; cr->cr_ref -= users; if (cr->cr_users > 0) { mtx_unlock(&cr->cr_mtx); return; } KASSERT(cr->cr_ref >= 0, ("%s: ref %ld not >= 0 on cred %p", __func__, cr->cr_ref, cr)); if (cr->cr_ref > 0) { mtx_unlock(&cr->cr_mtx); return; } crfree_final(cr); } void crcowfree(struct thread *td) { struct ucred *cr; cr = crunuse(td); if (cr != NULL) crfree(cr); } struct ucred * crcowsync(void) { struct thread *td; struct proc *p; struct ucred *crnew, *crold; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); MPASS(td->td_realucred == td->td_ucred); if (td->td_realucred == p->p_ucred) return (NULL); crnew = crcowget(p->p_ucred); crold = crunuse(td); td->td_realucred = crnew; td->td_ucred = td->td_realucred; return (crold); } /* * Batching. */ void credbatch_add(struct credbatch *crb, struct thread *td) { struct ucred *cr; MPASS(td->td_realucred != NULL); MPASS(td->td_realucred == td->td_ucred); MPASS(TD_GET_STATE(td) == TDS_INACTIVE); cr = td->td_realucred; KASSERT(cr->cr_users > 0, ("%s: users %d not > 0 on cred %p", __func__, cr->cr_users, cr)); if (crb->cred != cr) { if (crb->users > 0) { MPASS(crb->cred != NULL); crunusebatch(crb->cred, crb->users, crb->ref); crb->users = 0; crb->ref = 0; } } crb->cred = cr; crb->users++; crb->ref += td->td_ucredref; td->td_ucredref = 0; td->td_realucred = NULL; } void credbatch_final(struct credbatch *crb) { MPASS(crb->cred != NULL); MPASS(crb->users > 0); crunusebatch(crb->cred, crb->users, crb->ref); } /* * Allocate a zeroed cred structure. */ struct ucred * crget(void) { struct ucred *cr; cr = malloc(sizeof(*cr), M_CRED, M_WAITOK | M_ZERO); mtx_init(&cr->cr_mtx, "cred", NULL, MTX_DEF); cr->cr_ref = 1; #ifdef AUDIT audit_cred_init(cr); #endif #ifdef MAC mac_cred_init(cr); #endif cr->cr_groups = cr->cr_smallgroups; cr->cr_agroups = sizeof(cr->cr_smallgroups) / sizeof(cr->cr_smallgroups[0]); return (cr); } /* * Claim another reference to a ucred structure. */ struct ucred * crhold(struct ucred *cr) { struct thread *td; td = curthread; if (__predict_true(td->td_realucred == cr)) { KASSERT(cr->cr_users > 0, ("%s: users %d not > 0 on cred %p", __func__, cr->cr_users, cr)); td->td_ucredref++; return (cr); } mtx_lock(&cr->cr_mtx); cr->cr_ref++; mtx_unlock(&cr->cr_mtx); return (cr); } /* * Free a cred structure. Throws away space when ref count gets to 0. */ void crfree(struct ucred *cr) { struct thread *td; td = curthread; if (__predict_true(td->td_realucred == cr)) { KASSERT(cr->cr_users > 0, ("%s: users %d not > 0 on cred %p", __func__, cr->cr_users, cr)); td->td_ucredref--; return; } mtx_lock(&cr->cr_mtx); KASSERT(cr->cr_users >= 0, ("%s: users %d not >= 0 on cred %p", __func__, cr->cr_users, cr)); cr->cr_ref--; if (cr->cr_users > 0) { mtx_unlock(&cr->cr_mtx); return; } KASSERT(cr->cr_ref >= 0, ("%s: ref %ld not >= 0 on cred %p", __func__, cr->cr_ref, cr)); if (cr->cr_ref > 0) { mtx_unlock(&cr->cr_mtx); return; } crfree_final(cr); } static void crfree_final(struct ucred *cr) { KASSERT(cr->cr_users == 0, ("%s: users %d not == 0 on cred %p", __func__, cr->cr_users, cr)); KASSERT(cr->cr_ref == 0, ("%s: ref %ld not == 0 on cred %p", __func__, cr->cr_ref, cr)); /* * Some callers of crget(), such as nfs_statfs(), allocate a temporary * credential, but don't allocate a uidinfo structure. */ if (cr->cr_uidinfo != NULL) uifree(cr->cr_uidinfo); if (cr->cr_ruidinfo != NULL) uifree(cr->cr_ruidinfo); if (cr->cr_prison != NULL) prison_free(cr->cr_prison); if (cr->cr_loginclass != NULL) loginclass_free(cr->cr_loginclass); #ifdef AUDIT audit_cred_destroy(cr); #endif #ifdef MAC mac_cred_destroy(cr); #endif mtx_destroy(&cr->cr_mtx); if (cr->cr_groups != cr->cr_smallgroups) free(cr->cr_groups, M_CRED); free(cr, M_CRED); } /* * Copy a ucred's contents from a template. Does not block. */ void crcopy(struct ucred *dest, struct ucred *src) { KASSERT(dest->cr_ref == 1, ("crcopy of shared ucred")); bcopy(&src->cr_startcopy, &dest->cr_startcopy, (unsigned)((caddr_t)&src->cr_endcopy - (caddr_t)&src->cr_startcopy)); dest->cr_flags = src->cr_flags; crsetgroups(dest, src->cr_ngroups, src->cr_groups); uihold(dest->cr_uidinfo); uihold(dest->cr_ruidinfo); prison_hold(dest->cr_prison); loginclass_hold(dest->cr_loginclass); #ifdef AUDIT audit_cred_copy(src, dest); #endif #ifdef MAC mac_cred_copy(src, dest); #endif } /* * Dup cred struct to a new held one. */ struct ucred * crdup(struct ucred *cr) { struct ucred *newcr; newcr = crget(); crcopy(newcr, cr); return (newcr); } /* * Fill in a struct xucred based on a struct ucred. */ void cru2x(struct ucred *cr, struct xucred *xcr) { int ngroups; bzero(xcr, sizeof(*xcr)); xcr->cr_version = XUCRED_VERSION; xcr->cr_uid = cr->cr_uid; ngroups = MIN(cr->cr_ngroups, XU_NGROUPS); xcr->cr_ngroups = ngroups; bcopy(cr->cr_groups, xcr->cr_groups, ngroups * sizeof(*cr->cr_groups)); } void cru2xt(struct thread *td, struct xucred *xcr) { cru2x(td->td_ucred, xcr); xcr->cr_pid = td->td_proc->p_pid; } /* * Set initial process credentials. * Callers are responsible for providing the reference for provided credentials. */ void proc_set_cred_init(struct proc *p, struct ucred *newcred) { p->p_ucred = crcowget(newcred); } /* * Change process credentials. * Callers are responsible for providing the reference for passed credentials * and for freeing old ones. * * Process has to be locked except when it does not have credentials (as it * should not be visible just yet) or when newcred is NULL (as this can be * only used when the process is about to be freed, at which point it should * not be visible anymore). */ void proc_set_cred(struct proc *p, struct ucred *newcred) { struct ucred *cr; cr = p->p_ucred; MPASS(cr != NULL); PROC_LOCK_ASSERT(p, MA_OWNED); KASSERT(newcred->cr_users == 0, ("%s: users %d not 0 on cred %p", __func__, newcred->cr_users, newcred)); mtx_lock(&cr->cr_mtx); KASSERT(cr->cr_users > 0, ("%s: users %d not > 0 on cred %p", __func__, cr->cr_users, cr)); cr->cr_users--; mtx_unlock(&cr->cr_mtx); p->p_ucred = newcred; newcred->cr_users = 1; PROC_UPDATE_COW(p); } void proc_unset_cred(struct proc *p) { struct ucred *cr; MPASS(p->p_state == PRS_ZOMBIE || p->p_state == PRS_NEW); cr = p->p_ucred; p->p_ucred = NULL; KASSERT(cr->cr_users > 0, ("%s: users %d not > 0 on cred %p", __func__, cr->cr_users, cr)); mtx_lock(&cr->cr_mtx); cr->cr_users--; if (cr->cr_users == 0) KASSERT(cr->cr_ref > 0, ("%s: ref %ld not > 0 on cred %p", __func__, cr->cr_ref, cr)); mtx_unlock(&cr->cr_mtx); crfree(cr); } struct ucred * crcopysafe(struct proc *p, struct ucred *cr) { struct ucred *oldcred; int groups; PROC_LOCK_ASSERT(p, MA_OWNED); oldcred = p->p_ucred; while (cr->cr_agroups < oldcred->cr_agroups) { groups = oldcred->cr_agroups; PROC_UNLOCK(p); crextend(cr, groups); PROC_LOCK(p); oldcred = p->p_ucred; } crcopy(cr, oldcred); return (oldcred); } /* * Extend the passed in credential to hold n items. */ void crextend(struct ucred *cr, int n) { int cnt; /* Truncate? */ if (n <= cr->cr_agroups) return; /* * We extend by 2 each time since we're using a power of two * allocator until we need enough groups to fill a page. * Once we're allocating multiple pages, only allocate as many * as we actually need. The case of processes needing a * non-power of two number of pages seems more likely than * a real world process that adds thousands of groups one at a * time. */ if ( n < PAGE_SIZE / sizeof(gid_t) ) { if (cr->cr_agroups == 0) cnt = MAX(1, MINALLOCSIZE / sizeof(gid_t)); else cnt = cr->cr_agroups * 2; while (cnt < n) cnt *= 2; } else cnt = roundup2(n, PAGE_SIZE / sizeof(gid_t)); /* Free the old array. */ if (cr->cr_groups != cr->cr_smallgroups) free(cr->cr_groups, M_CRED); cr->cr_groups = malloc(cnt * sizeof(gid_t), M_CRED, M_WAITOK | M_ZERO); cr->cr_agroups = cnt; } /* * Copy groups in to a credential, preserving any necessary invariants. * Currently this includes the sorting of all supplemental gids. * crextend() must have been called before hand to ensure sufficient * space is available. */ static void crsetgroups_locked(struct ucred *cr, int ngrp, gid_t *groups) { int i; int j; gid_t g; KASSERT(cr->cr_agroups >= ngrp, ("cr_ngroups is too small")); bcopy(groups, cr->cr_groups, ngrp * sizeof(gid_t)); cr->cr_ngroups = ngrp; /* * Sort all groups except cr_groups[0] to allow groupmember to * perform a binary search. * * XXX: If large numbers of groups become common this should * be replaced with shell sort like linux uses or possibly * heap sort. */ for (i = 2; i < ngrp; i++) { g = cr->cr_groups[i]; for (j = i-1; j >= 1 && g < cr->cr_groups[j]; j--) cr->cr_groups[j + 1] = cr->cr_groups[j]; cr->cr_groups[j + 1] = g; } } /* * Copy groups in to a credential after expanding it if required. * Truncate the list to (ngroups_max + 1) if it is too large. */ void crsetgroups(struct ucred *cr, int ngrp, gid_t *groups) { if (ngrp > ngroups_max + 1) ngrp = ngroups_max + 1; crextend(cr, ngrp); crsetgroups_locked(cr, ngrp, groups); } /* * Get login name, if available. */ #ifndef _SYS_SYSPROTO_H_ struct getlogin_args { char *namebuf; u_int namelen; }; #endif /* ARGSUSED */ int sys_getlogin(struct thread *td, struct getlogin_args *uap) { char login[MAXLOGNAME]; struct proc *p = td->td_proc; size_t len; if (uap->namelen > MAXLOGNAME) uap->namelen = MAXLOGNAME; PROC_LOCK(p); SESS_LOCK(p->p_session); len = strlcpy(login, p->p_session->s_login, uap->namelen) + 1; SESS_UNLOCK(p->p_session); PROC_UNLOCK(p); if (len > uap->namelen) return (ERANGE); return (copyout(login, uap->namebuf, len)); } /* * Set login name. */ #ifndef _SYS_SYSPROTO_H_ struct setlogin_args { char *namebuf; }; #endif /* ARGSUSED */ int sys_setlogin(struct thread *td, struct setlogin_args *uap) { struct proc *p = td->td_proc; int error; char logintmp[MAXLOGNAME]; CTASSERT(sizeof(p->p_session->s_login) >= sizeof(logintmp)); error = priv_check(td, PRIV_PROC_SETLOGIN); if (error) return (error); error = copyinstr(uap->namebuf, logintmp, sizeof(logintmp), NULL); if (error != 0) { if (error == ENAMETOOLONG) error = EINVAL; return (error); } AUDIT_ARG_LOGIN(logintmp); PROC_LOCK(p); SESS_LOCK(p->p_session); strcpy(p->p_session->s_login, logintmp); SESS_UNLOCK(p->p_session); PROC_UNLOCK(p); return (0); } void setsugid(struct proc *p) { PROC_LOCK_ASSERT(p, MA_OWNED); p->p_flag |= P_SUGID; } /*- * Change a process's effective uid. * Side effects: newcred->cr_uid and newcred->cr_uidinfo will be modified. * References: newcred must be an exclusive credential reference for the * duration of the call. */ void change_euid(struct ucred *newcred, struct uidinfo *euip) { newcred->cr_uid = euip->ui_uid; uihold(euip); uifree(newcred->cr_uidinfo); newcred->cr_uidinfo = euip; } /*- * Change a process's effective gid. * Side effects: newcred->cr_gid will be modified. * References: newcred must be an exclusive credential reference for the * duration of the call. */ void change_egid(struct ucred *newcred, gid_t egid) { newcred->cr_groups[0] = egid; } /*- * Change a process's real uid. * Side effects: newcred->cr_ruid will be updated, newcred->cr_ruidinfo * will be updated, and the old and new cr_ruidinfo proc * counts will be updated. * References: newcred must be an exclusive credential reference for the * duration of the call. */ void change_ruid(struct ucred *newcred, struct uidinfo *ruip) { (void)chgproccnt(newcred->cr_ruidinfo, -1, 0); newcred->cr_ruid = ruip->ui_uid; uihold(ruip); uifree(newcred->cr_ruidinfo); newcred->cr_ruidinfo = ruip; (void)chgproccnt(newcred->cr_ruidinfo, 1, 0); } /*- * Change a process's real gid. * Side effects: newcred->cr_rgid will be updated. * References: newcred must be an exclusive credential reference for the * duration of the call. */ void change_rgid(struct ucred *newcred, gid_t rgid) { newcred->cr_rgid = rgid; } /*- * Change a process's saved uid. * Side effects: newcred->cr_svuid will be updated. * References: newcred must be an exclusive credential reference for the * duration of the call. */ void change_svuid(struct ucred *newcred, uid_t svuid) { newcred->cr_svuid = svuid; } /*- * Change a process's saved gid. * Side effects: newcred->cr_svgid will be updated. * References: newcred must be an exclusive credential reference for the * duration of the call. */ void change_svgid(struct ucred *newcred, gid_t svgid) { newcred->cr_svgid = svgid; } bool allow_ptrace = true; SYSCTL_BOOL(_security_bsd, OID_AUTO, allow_ptrace, CTLFLAG_RWTUN, &allow_ptrace, 0, "Deny ptrace(2) use by returning ENOSYS"); diff --git a/sys/security/mac_biba/mac_biba.c b/sys/security/mac_biba/mac_biba.c index 6948548503e1..5d66e2fd4b9b 100644 --- a/sys/security/mac_biba/mac_biba.c +++ b/sys/security/mac_biba/mac_biba.c @@ -1,3797 +1,3798 @@ /*- * Copyright (c) 1999-2002, 2007-2011 Robert N. M. Watson * Copyright (c) 2001-2005 McAfee, Inc. * Copyright (c) 2006 SPARTA, Inc. * All rights reserved. * * This software was developed by Robert Watson for the TrustedBSD Project. * * This software was developed for the FreeBSD Project in part by McAfee * Research, the Security Research Division of McAfee, Inc. under * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA * CHATS research program. * * This software was enhanced by SPARTA ISSO under SPAWAR contract * N66001-04-C-6019 ("SEFOS"). * * This software was developed at the University of Cambridge Computer * Laboratory with support from a grant from Google, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Developed by the TrustedBSD Project. * * Biba fixed label mandatory integrity policy. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include SYSCTL_DECL(_security_mac); static SYSCTL_NODE(_security_mac, OID_AUTO, biba, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "TrustedBSD mac_biba policy controls"); static int biba_label_size = sizeof(struct mac_biba); SYSCTL_INT(_security_mac_biba, OID_AUTO, label_size, CTLFLAG_RD, &biba_label_size, 0, "Size of struct mac_biba"); static int biba_enabled = 1; SYSCTL_INT(_security_mac_biba, OID_AUTO, enabled, CTLFLAG_RWTUN, &biba_enabled, 0, "Enforce MAC/Biba policy"); static int destroyed_not_inited; SYSCTL_INT(_security_mac_biba, OID_AUTO, destroyed_not_inited, CTLFLAG_RD, &destroyed_not_inited, 0, "Count of labels destroyed but not inited"); static int trust_all_interfaces = 0; SYSCTL_INT(_security_mac_biba, OID_AUTO, trust_all_interfaces, CTLFLAG_RDTUN, &trust_all_interfaces, 0, "Consider all interfaces 'trusted' by MAC/Biba"); static char trusted_interfaces[128]; SYSCTL_STRING(_security_mac_biba, OID_AUTO, trusted_interfaces, CTLFLAG_RDTUN, trusted_interfaces, 0, "Interfaces considered 'trusted' by MAC/Biba"); static int max_compartments = MAC_BIBA_MAX_COMPARTMENTS; SYSCTL_INT(_security_mac_biba, OID_AUTO, max_compartments, CTLFLAG_RD, &max_compartments, 0, "Maximum supported compartments"); static int ptys_equal = 0; SYSCTL_INT(_security_mac_biba, OID_AUTO, ptys_equal, CTLFLAG_RWTUN, &ptys_equal, 0, "Label pty devices as biba/equal on create"); static int interfaces_equal = 1; SYSCTL_INT(_security_mac_biba, OID_AUTO, interfaces_equal, CTLFLAG_RWTUN, &interfaces_equal, 0, "Label network interfaces as biba/equal on create"); static int revocation_enabled = 0; SYSCTL_INT(_security_mac_biba, OID_AUTO, revocation_enabled, CTLFLAG_RWTUN, &revocation_enabled, 0, "Revoke access to objects on relabel"); static int biba_slot; #define SLOT(l) ((struct mac_biba *)mac_label_get((l), biba_slot)) #define SLOT_SET(l, val) mac_label_set((l), biba_slot, (uintptr_t)(val)) static uma_zone_t zone_biba; static __inline int biba_bit_set_empty(u_char *set) { int i; for (i = 0; i < MAC_BIBA_MAX_COMPARTMENTS >> 3; i++) if (set[i] != 0) return (0); return (1); } static struct mac_biba * biba_alloc(int flag) { return (uma_zalloc(zone_biba, flag | M_ZERO)); } static void biba_free(struct mac_biba *mb) { if (mb != NULL) uma_zfree(zone_biba, mb); else atomic_add_int(&destroyed_not_inited, 1); } static int biba_atmostflags(struct mac_biba *mb, int flags) { if ((mb->mb_flags & flags) != mb->mb_flags) return (EINVAL); return (0); } static int biba_dominate_element(struct mac_biba_element *a, struct mac_biba_element *b) { int bit; switch (a->mbe_type) { case MAC_BIBA_TYPE_EQUAL: case MAC_BIBA_TYPE_HIGH: return (1); case MAC_BIBA_TYPE_LOW: switch (b->mbe_type) { case MAC_BIBA_TYPE_GRADE: case MAC_BIBA_TYPE_HIGH: return (0); case MAC_BIBA_TYPE_EQUAL: case MAC_BIBA_TYPE_LOW: return (1); default: panic("biba_dominate_element: b->mbe_type invalid"); } case MAC_BIBA_TYPE_GRADE: switch (b->mbe_type) { case MAC_BIBA_TYPE_EQUAL: case MAC_BIBA_TYPE_LOW: return (1); case MAC_BIBA_TYPE_HIGH: return (0); case MAC_BIBA_TYPE_GRADE: for (bit = 1; bit <= MAC_BIBA_MAX_COMPARTMENTS; bit++) if (!MAC_BIBA_BIT_TEST(bit, a->mbe_compartments) && MAC_BIBA_BIT_TEST(bit, b->mbe_compartments)) return (0); return (a->mbe_grade >= b->mbe_grade); default: panic("biba_dominate_element: b->mbe_type invalid"); } default: panic("biba_dominate_element: a->mbe_type invalid"); } return (0); } static int biba_subject_dominate_high(struct mac_biba *mb) { struct mac_biba_element *element; KASSERT((mb->mb_flags & MAC_BIBA_FLAG_EFFECTIVE) != 0, ("biba_effective_in_range: mb not effective")); element = &mb->mb_effective; return (element->mbe_type == MAC_BIBA_TYPE_EQUAL || element->mbe_type == MAC_BIBA_TYPE_HIGH); } static int biba_range_in_range(struct mac_biba *rangea, struct mac_biba *rangeb) { return (biba_dominate_element(&rangeb->mb_rangehigh, &rangea->mb_rangehigh) && biba_dominate_element(&rangea->mb_rangelow, &rangeb->mb_rangelow)); } static int biba_effective_in_range(struct mac_biba *effective, struct mac_biba *range) { KASSERT((effective->mb_flags & MAC_BIBA_FLAG_EFFECTIVE) != 0, ("biba_effective_in_range: a not effective")); KASSERT((range->mb_flags & MAC_BIBA_FLAG_RANGE) != 0, ("biba_effective_in_range: b not range")); return (biba_dominate_element(&range->mb_rangehigh, &effective->mb_effective) && biba_dominate_element(&effective->mb_effective, &range->mb_rangelow)); return (1); } static int biba_dominate_effective(struct mac_biba *a, struct mac_biba *b) { KASSERT((a->mb_flags & MAC_BIBA_FLAG_EFFECTIVE) != 0, ("biba_dominate_effective: a not effective")); KASSERT((b->mb_flags & MAC_BIBA_FLAG_EFFECTIVE) != 0, ("biba_dominate_effective: b not effective")); return (biba_dominate_element(&a->mb_effective, &b->mb_effective)); } static int biba_equal_element(struct mac_biba_element *a, struct mac_biba_element *b) { if (a->mbe_type == MAC_BIBA_TYPE_EQUAL || b->mbe_type == MAC_BIBA_TYPE_EQUAL) return (1); return (a->mbe_type == b->mbe_type && a->mbe_grade == b->mbe_grade); } static int biba_equal_effective(struct mac_biba *a, struct mac_biba *b) { KASSERT((a->mb_flags & MAC_BIBA_FLAG_EFFECTIVE) != 0, ("biba_equal_effective: a not effective")); KASSERT((b->mb_flags & MAC_BIBA_FLAG_EFFECTIVE) != 0, ("biba_equal_effective: b not effective")); return (biba_equal_element(&a->mb_effective, &b->mb_effective)); } static int biba_contains_equal(struct mac_biba *mb) { if (mb->mb_flags & MAC_BIBA_FLAG_EFFECTIVE) { if (mb->mb_effective.mbe_type == MAC_BIBA_TYPE_EQUAL) return (1); } if (mb->mb_flags & MAC_BIBA_FLAG_RANGE) { if (mb->mb_rangelow.mbe_type == MAC_BIBA_TYPE_EQUAL) return (1); if (mb->mb_rangehigh.mbe_type == MAC_BIBA_TYPE_EQUAL) return (1); } return (0); } static int biba_subject_privileged(struct mac_biba *mb) { KASSERT((mb->mb_flags & MAC_BIBA_FLAGS_BOTH) == MAC_BIBA_FLAGS_BOTH, ("biba_subject_privileged: subject doesn't have both labels")); /* If the effective is EQUAL, it's ok. */ if (mb->mb_effective.mbe_type == MAC_BIBA_TYPE_EQUAL) return (0); /* If either range endpoint is EQUAL, it's ok. */ if (mb->mb_rangelow.mbe_type == MAC_BIBA_TYPE_EQUAL || mb->mb_rangehigh.mbe_type == MAC_BIBA_TYPE_EQUAL) return (0); /* If the range is low-high, it's ok. */ if (mb->mb_rangelow.mbe_type == MAC_BIBA_TYPE_LOW && mb->mb_rangehigh.mbe_type == MAC_BIBA_TYPE_HIGH) return (0); /* It's not ok. */ return (EPERM); } static int biba_high_effective(struct mac_biba *mb) { KASSERT((mb->mb_flags & MAC_BIBA_FLAG_EFFECTIVE) != 0, ("biba_equal_effective: mb not effective")); return (mb->mb_effective.mbe_type == MAC_BIBA_TYPE_HIGH); } static int biba_valid(struct mac_biba *mb) { if (mb->mb_flags & MAC_BIBA_FLAG_EFFECTIVE) { switch (mb->mb_effective.mbe_type) { case MAC_BIBA_TYPE_GRADE: break; case MAC_BIBA_TYPE_EQUAL: case MAC_BIBA_TYPE_HIGH: case MAC_BIBA_TYPE_LOW: if (mb->mb_effective.mbe_grade != 0 || !MAC_BIBA_BIT_SET_EMPTY( mb->mb_effective.mbe_compartments)) return (EINVAL); break; default: return (EINVAL); } } else { if (mb->mb_effective.mbe_type != MAC_BIBA_TYPE_UNDEF) return (EINVAL); } if (mb->mb_flags & MAC_BIBA_FLAG_RANGE) { switch (mb->mb_rangelow.mbe_type) { case MAC_BIBA_TYPE_GRADE: break; case MAC_BIBA_TYPE_EQUAL: case MAC_BIBA_TYPE_HIGH: case MAC_BIBA_TYPE_LOW: if (mb->mb_rangelow.mbe_grade != 0 || !MAC_BIBA_BIT_SET_EMPTY( mb->mb_rangelow.mbe_compartments)) return (EINVAL); break; default: return (EINVAL); } switch (mb->mb_rangehigh.mbe_type) { case MAC_BIBA_TYPE_GRADE: break; case MAC_BIBA_TYPE_EQUAL: case MAC_BIBA_TYPE_HIGH: case MAC_BIBA_TYPE_LOW: if (mb->mb_rangehigh.mbe_grade != 0 || !MAC_BIBA_BIT_SET_EMPTY( mb->mb_rangehigh.mbe_compartments)) return (EINVAL); break; default: return (EINVAL); } if (!biba_dominate_element(&mb->mb_rangehigh, &mb->mb_rangelow)) return (EINVAL); } else { if (mb->mb_rangelow.mbe_type != MAC_BIBA_TYPE_UNDEF || mb->mb_rangehigh.mbe_type != MAC_BIBA_TYPE_UNDEF) return (EINVAL); } return (0); } static void biba_set_range(struct mac_biba *mb, u_short typelow, u_short gradelow, u_char *compartmentslow, u_short typehigh, u_short gradehigh, u_char *compartmentshigh) { mb->mb_rangelow.mbe_type = typelow; mb->mb_rangelow.mbe_grade = gradelow; if (compartmentslow != NULL) memcpy(mb->mb_rangelow.mbe_compartments, compartmentslow, sizeof(mb->mb_rangelow.mbe_compartments)); mb->mb_rangehigh.mbe_type = typehigh; mb->mb_rangehigh.mbe_grade = gradehigh; if (compartmentshigh != NULL) memcpy(mb->mb_rangehigh.mbe_compartments, compartmentshigh, sizeof(mb->mb_rangehigh.mbe_compartments)); mb->mb_flags |= MAC_BIBA_FLAG_RANGE; } static void biba_set_effective(struct mac_biba *mb, u_short type, u_short grade, u_char *compartments) { mb->mb_effective.mbe_type = type; mb->mb_effective.mbe_grade = grade; if (compartments != NULL) memcpy(mb->mb_effective.mbe_compartments, compartments, sizeof(mb->mb_effective.mbe_compartments)); mb->mb_flags |= MAC_BIBA_FLAG_EFFECTIVE; } static void biba_copy_range(struct mac_biba *labelfrom, struct mac_biba *labelto) { KASSERT((labelfrom->mb_flags & MAC_BIBA_FLAG_RANGE) != 0, ("biba_copy_range: labelfrom not range")); labelto->mb_rangelow = labelfrom->mb_rangelow; labelto->mb_rangehigh = labelfrom->mb_rangehigh; labelto->mb_flags |= MAC_BIBA_FLAG_RANGE; } static void biba_copy_effective(struct mac_biba *labelfrom, struct mac_biba *labelto) { KASSERT((labelfrom->mb_flags & MAC_BIBA_FLAG_EFFECTIVE) != 0, ("biba_copy_effective: labelfrom not effective")); labelto->mb_effective = labelfrom->mb_effective; labelto->mb_flags |= MAC_BIBA_FLAG_EFFECTIVE; } static void biba_copy(struct mac_biba *source, struct mac_biba *dest) { if (source->mb_flags & MAC_BIBA_FLAG_EFFECTIVE) biba_copy_effective(source, dest); if (source->mb_flags & MAC_BIBA_FLAG_RANGE) biba_copy_range(source, dest); } /* * Policy module operations. */ static void biba_init(struct mac_policy_conf *conf) { zone_biba = uma_zcreate("mac_biba", sizeof(struct mac_biba), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); } /* * Label operations. */ static void biba_init_label(struct label *label) { SLOT_SET(label, biba_alloc(M_WAITOK)); } static int biba_init_label_waitcheck(struct label *label, int flag) { SLOT_SET(label, biba_alloc(flag)); if (SLOT(label) == NULL) return (ENOMEM); return (0); } static void biba_destroy_label(struct label *label) { biba_free(SLOT(label)); SLOT_SET(label, NULL); } /* * biba_element_to_string() accepts an sbuf and Biba element. It converts * the Biba element to a string and stores the result in the sbuf; if there * isn't space in the sbuf, -1 is returned. */ static int biba_element_to_string(struct sbuf *sb, struct mac_biba_element *element) { int i, first; switch (element->mbe_type) { case MAC_BIBA_TYPE_HIGH: return (sbuf_printf(sb, "high")); case MAC_BIBA_TYPE_LOW: return (sbuf_printf(sb, "low")); case MAC_BIBA_TYPE_EQUAL: return (sbuf_printf(sb, "equal")); case MAC_BIBA_TYPE_GRADE: if (sbuf_printf(sb, "%d", element->mbe_grade) == -1) return (-1); first = 1; for (i = 1; i <= MAC_BIBA_MAX_COMPARTMENTS; i++) { if (MAC_BIBA_BIT_TEST(i, element->mbe_compartments)) { if (first) { if (sbuf_putc(sb, ':') == -1) return (-1); if (sbuf_printf(sb, "%d", i) == -1) return (-1); first = 0; } else { if (sbuf_printf(sb, "+%d", i) == -1) return (-1); } } } return (0); default: panic("biba_element_to_string: invalid type (%d)", element->mbe_type); } } /* * biba_to_string() converts a Biba label to a string, and places the results * in the passed sbuf. It returns 0 on success, or EINVAL if there isn't * room in the sbuf. Note: the sbuf will be modified even in a failure case, * so the caller may need to revert the sbuf by restoring the offset if * that's undesired. */ static int biba_to_string(struct sbuf *sb, struct mac_biba *mb) { if (mb->mb_flags & MAC_BIBA_FLAG_EFFECTIVE) { if (biba_element_to_string(sb, &mb->mb_effective) == -1) return (EINVAL); } if (mb->mb_flags & MAC_BIBA_FLAG_RANGE) { if (sbuf_putc(sb, '(') == -1) return (EINVAL); if (biba_element_to_string(sb, &mb->mb_rangelow) == -1) return (EINVAL); if (sbuf_putc(sb, '-') == -1) return (EINVAL); if (biba_element_to_string(sb, &mb->mb_rangehigh) == -1) return (EINVAL); if (sbuf_putc(sb, ')') == -1) return (EINVAL); } return (0); } static int biba_externalize_label(struct label *label, char *element_name, struct sbuf *sb, int *claimed) { struct mac_biba *mb; if (strcmp(MAC_BIBA_LABEL_NAME, element_name) != 0) return (0); (*claimed)++; mb = SLOT(label); return (biba_to_string(sb, mb)); } static int biba_parse_element(struct mac_biba_element *element, char *string) { char *compartment, *end, *grade; int value; if (strcmp(string, "high") == 0 || strcmp(string, "hi") == 0) { element->mbe_type = MAC_BIBA_TYPE_HIGH; element->mbe_grade = MAC_BIBA_TYPE_UNDEF; } else if (strcmp(string, "low") == 0 || strcmp(string, "lo") == 0) { element->mbe_type = MAC_BIBA_TYPE_LOW; element->mbe_grade = MAC_BIBA_TYPE_UNDEF; } else if (strcmp(string, "equal") == 0 || strcmp(string, "eq") == 0) { element->mbe_type = MAC_BIBA_TYPE_EQUAL; element->mbe_grade = MAC_BIBA_TYPE_UNDEF; } else { element->mbe_type = MAC_BIBA_TYPE_GRADE; /* * Numeric grade piece of the element. */ grade = strsep(&string, ":"); value = strtol(grade, &end, 10); if (end == grade || *end != '\0') return (EINVAL); if (value < 0 || value > 65535) return (EINVAL); element->mbe_grade = value; /* * Optional compartment piece of the element. If none are * included, we assume that the label has no compartments. */ if (string == NULL) return (0); if (*string == '\0') return (0); while ((compartment = strsep(&string, "+")) != NULL) { value = strtol(compartment, &end, 10); if (compartment == end || *end != '\0') return (EINVAL); if (value < 1 || value > MAC_BIBA_MAX_COMPARTMENTS) return (EINVAL); MAC_BIBA_BIT_SET(value, element->mbe_compartments); } } return (0); } /* * Note: destructively consumes the string, make a local copy before calling * if that's a problem. */ static int biba_parse(struct mac_biba *mb, char *string) { char *rangehigh, *rangelow, *effective; int error; effective = strsep(&string, "("); if (*effective == '\0') effective = NULL; if (string != NULL) { rangelow = strsep(&string, "-"); if (string == NULL) return (EINVAL); rangehigh = strsep(&string, ")"); if (string == NULL) return (EINVAL); if (*string != '\0') return (EINVAL); } else { rangelow = NULL; rangehigh = NULL; } KASSERT((rangelow != NULL && rangehigh != NULL) || (rangelow == NULL && rangehigh == NULL), ("biba_parse: range mismatch")); bzero(mb, sizeof(*mb)); if (effective != NULL) { error = biba_parse_element(&mb->mb_effective, effective); if (error) return (error); mb->mb_flags |= MAC_BIBA_FLAG_EFFECTIVE; } if (rangelow != NULL) { error = biba_parse_element(&mb->mb_rangelow, rangelow); if (error) return (error); error = biba_parse_element(&mb->mb_rangehigh, rangehigh); if (error) return (error); mb->mb_flags |= MAC_BIBA_FLAG_RANGE; } error = biba_valid(mb); if (error) return (error); return (0); } static int biba_internalize_label(struct label *label, char *element_name, char *element_data, int *claimed) { struct mac_biba *mb, mb_temp; int error; if (strcmp(MAC_BIBA_LABEL_NAME, element_name) != 0) return (0); (*claimed)++; error = biba_parse(&mb_temp, element_data); if (error) return (error); mb = SLOT(label); *mb = mb_temp; return (0); } static void biba_copy_label(struct label *src, struct label *dest) { *SLOT(dest) = *SLOT(src); } /* * Object-specific entry point implementations are sorted alphabetically by * object type name and then by operation. */ static int biba_bpfdesc_check_receive(struct bpf_d *d, struct label *dlabel, struct ifnet *ifp, struct label *ifplabel) { struct mac_biba *a, *b; if (!biba_enabled) return (0); a = SLOT(dlabel); b = SLOT(ifplabel); if (biba_equal_effective(a, b)) return (0); return (EACCES); } static void biba_bpfdesc_create(struct ucred *cred, struct bpf_d *d, struct label *dlabel) { struct mac_biba *source, *dest; source = SLOT(cred->cr_label); dest = SLOT(dlabel); biba_copy_effective(source, dest); } static void biba_bpfdesc_create_mbuf(struct bpf_d *d, struct label *dlabel, struct mbuf *m, struct label *mlabel) { struct mac_biba *source, *dest; source = SLOT(dlabel); dest = SLOT(mlabel); biba_copy_effective(source, dest); } static void biba_cred_associate_nfsd(struct ucred *cred) { struct mac_biba *label; label = SLOT(cred->cr_label); biba_set_effective(label, MAC_BIBA_TYPE_LOW, 0, NULL); biba_set_range(label, MAC_BIBA_TYPE_LOW, 0, NULL, MAC_BIBA_TYPE_HIGH, 0, NULL); } static int biba_cred_check_relabel(struct ucred *cred, struct label *newlabel) { struct mac_biba *subj, *new; int error; subj = SLOT(cred->cr_label); new = SLOT(newlabel); /* * If there is a Biba label update for the credential, it may * be an update of the effective, range, or both. */ error = biba_atmostflags(new, MAC_BIBA_FLAGS_BOTH); if (error) return (error); /* * If the Biba label is to be changed, authorize as appropriate. */ if (new->mb_flags & MAC_BIBA_FLAGS_BOTH) { /* * If the change request modifies both the Biba label * effective and range, check that the new effective will be * in the new range. */ if ((new->mb_flags & MAC_BIBA_FLAGS_BOTH) == MAC_BIBA_FLAGS_BOTH && !biba_effective_in_range(new, new)) return (EINVAL); /* * To change the Biba effective label on a credential, the * new effective label must be in the current range. */ if (new->mb_flags & MAC_BIBA_FLAG_EFFECTIVE && !biba_effective_in_range(new, subj)) return (EPERM); /* * To change the Biba range on a credential, the new range * label must be in the current range. */ if (new->mb_flags & MAC_BIBA_FLAG_RANGE && !biba_range_in_range(new, subj)) return (EPERM); /* * To have EQUAL in any component of the new credential Biba * label, the subject must already have EQUAL in their label. */ if (biba_contains_equal(new)) { error = biba_subject_privileged(subj); if (error) return (error); } } return (0); } static int biba_cred_check_visible(struct ucred *u1, struct ucred *u2) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(u1->cr_label); obj = SLOT(u2->cr_label); /* XXX: range */ if (!biba_dominate_effective(obj, subj)) return (ESRCH); return (0); } static void biba_cred_create_init(struct ucred *cred) { struct mac_biba *dest; dest = SLOT(cred->cr_label); biba_set_effective(dest, MAC_BIBA_TYPE_HIGH, 0, NULL); biba_set_range(dest, MAC_BIBA_TYPE_LOW, 0, NULL, MAC_BIBA_TYPE_HIGH, 0, NULL); } static void biba_cred_create_swapper(struct ucred *cred) { struct mac_biba *dest; dest = SLOT(cred->cr_label); biba_set_effective(dest, MAC_BIBA_TYPE_EQUAL, 0, NULL); biba_set_range(dest, MAC_BIBA_TYPE_LOW, 0, NULL, MAC_BIBA_TYPE_HIGH, 0, NULL); } static void biba_cred_relabel(struct ucred *cred, struct label *newlabel) { struct mac_biba *source, *dest; source = SLOT(newlabel); dest = SLOT(cred->cr_label); biba_copy(source, dest); } static void biba_devfs_create_device(struct ucred *cred, struct mount *mp, struct cdev *dev, struct devfs_dirent *de, struct label *delabel) { struct mac_biba *mb; const char *dn; int biba_type; mb = SLOT(delabel); dn = devtoname(dev); if (strcmp(dn, "null") == 0 || strcmp(dn, "zero") == 0 || strcmp(dn, "random") == 0 || strncmp(dn, "fd/", strlen("fd/")) == 0) biba_type = MAC_BIBA_TYPE_EQUAL; else if (ptys_equal && (strncmp(dn, "ttyp", strlen("ttyp")) == 0 || strncmp(dn, "pts/", strlen("pts/")) == 0 || strncmp(dn, "ptyp", strlen("ptyp")) == 0)) biba_type = MAC_BIBA_TYPE_EQUAL; else biba_type = MAC_BIBA_TYPE_HIGH; biba_set_effective(mb, biba_type, 0, NULL); } static void biba_devfs_create_directory(struct mount *mp, char *dirname, int dirnamelen, struct devfs_dirent *de, struct label *delabel) { struct mac_biba *mb; mb = SLOT(delabel); biba_set_effective(mb, MAC_BIBA_TYPE_HIGH, 0, NULL); } static void biba_devfs_create_symlink(struct ucred *cred, struct mount *mp, struct devfs_dirent *dd, struct label *ddlabel, struct devfs_dirent *de, struct label *delabel) { struct mac_biba *source, *dest; source = SLOT(cred->cr_label); dest = SLOT(delabel); biba_copy_effective(source, dest); } static void biba_devfs_update(struct mount *mp, struct devfs_dirent *de, struct label *delabel, struct vnode *vp, struct label *vplabel) { struct mac_biba *source, *dest; source = SLOT(vplabel); dest = SLOT(delabel); biba_copy(source, dest); } static void biba_devfs_vnode_associate(struct mount *mp, struct label *mntlabel, struct devfs_dirent *de, struct label *delabel, struct vnode *vp, struct label *vplabel) { struct mac_biba *source, *dest; source = SLOT(delabel); dest = SLOT(vplabel); biba_copy_effective(source, dest); } static int biba_ifnet_check_relabel(struct ucred *cred, struct ifnet *ifp, struct label *ifplabel, struct label *newlabel) { struct mac_biba *subj, *new; int error; subj = SLOT(cred->cr_label); new = SLOT(newlabel); /* * If there is a Biba label update for the interface, it may be an * update of the effective, range, or both. */ error = biba_atmostflags(new, MAC_BIBA_FLAGS_BOTH); if (error) return (error); /* * Relabling network interfaces requires Biba privilege. */ error = biba_subject_privileged(subj); if (error) return (error); return (0); } static int biba_ifnet_check_transmit(struct ifnet *ifp, struct label *ifplabel, struct mbuf *m, struct label *mlabel) { struct mac_biba *p, *i; if (!biba_enabled) return (0); p = SLOT(mlabel); i = SLOT(ifplabel); return (biba_effective_in_range(p, i) ? 0 : EACCES); } static void biba_ifnet_create(struct ifnet *ifp, struct label *ifplabel) { char tifname[IFNAMSIZ], *p, *q; char tiflist[sizeof(trusted_interfaces)]; struct mac_biba *dest; int len, type; dest = SLOT(ifplabel); if (if_gettype(ifp) == IFT_LOOP || interfaces_equal != 0) { type = MAC_BIBA_TYPE_EQUAL; goto set; } if (trust_all_interfaces) { type = MAC_BIBA_TYPE_HIGH; goto set; } type = MAC_BIBA_TYPE_LOW; if (trusted_interfaces[0] == '\0' || !strvalid(trusted_interfaces, sizeof(trusted_interfaces))) goto set; bzero(tiflist, sizeof(tiflist)); for (p = trusted_interfaces, q = tiflist; *p != '\0'; p++, q++) if(*p != ' ' && *p != '\t') *q = *p; for (p = q = tiflist;; p++) { if (*p == ',' || *p == '\0') { len = p - q; if (len < IFNAMSIZ) { bzero(tifname, sizeof(tifname)); bcopy(q, tifname, len); if (strcmp(tifname, if_name(ifp)) == 0) { type = MAC_BIBA_TYPE_HIGH; break; } } else { *p = '\0'; printf("mac_biba warning: interface name " "\"%s\" is too long (must be < %d)\n", q, IFNAMSIZ); } if (*p == '\0') break; q = p + 1; } } set: biba_set_effective(dest, type, 0, NULL); biba_set_range(dest, type, 0, NULL, type, 0, NULL); } static void biba_ifnet_create_mbuf(struct ifnet *ifp, struct label *ifplabel, struct mbuf *m, struct label *mlabel) { struct mac_biba *source, *dest; source = SLOT(ifplabel); dest = SLOT(mlabel); biba_copy_effective(source, dest); } static void biba_ifnet_relabel(struct ucred *cred, struct ifnet *ifp, struct label *ifplabel, struct label *newlabel) { struct mac_biba *source, *dest; source = SLOT(newlabel); dest = SLOT(ifplabel); biba_copy(source, dest); } static int biba_inpcb_check_deliver(struct inpcb *inp, struct label *inplabel, struct mbuf *m, struct label *mlabel) { struct mac_biba *p, *i; if (!biba_enabled) return (0); p = SLOT(mlabel); i = SLOT(inplabel); return (biba_equal_effective(p, i) ? 0 : EACCES); } static int biba_inpcb_check_visible(struct ucred *cred, struct inpcb *inp, struct label *inplabel) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(inplabel); if (!biba_dominate_effective(obj, subj)) return (ENOENT); return (0); } static void biba_inpcb_create(struct socket *so, struct label *solabel, struct inpcb *inp, struct label *inplabel) { struct mac_biba *source, *dest; source = SLOT(solabel); dest = SLOT(inplabel); SOCK_LOCK(so); biba_copy_effective(source, dest); SOCK_UNLOCK(so); } static void biba_inpcb_create_mbuf(struct inpcb *inp, struct label *inplabel, struct mbuf *m, struct label *mlabel) { struct mac_biba *source, *dest; source = SLOT(inplabel); dest = SLOT(mlabel); biba_copy_effective(source, dest); } static void biba_inpcb_sosetlabel(struct socket *so, struct label *solabel, struct inpcb *inp, struct label *inplabel) { struct mac_biba *source, *dest; SOCK_LOCK_ASSERT(so); source = SLOT(solabel); dest = SLOT(inplabel); biba_copy(source, dest); } static void biba_ip6q_create(struct mbuf *m, struct label *mlabel, struct ip6q *q6, struct label *q6label) { struct mac_biba *source, *dest; source = SLOT(mlabel); dest = SLOT(q6label); biba_copy_effective(source, dest); } static int biba_ip6q_match(struct mbuf *m, struct label *mlabel, struct ip6q *q6, struct label *q6label) { struct mac_biba *a, *b; a = SLOT(q6label); b = SLOT(mlabel); return (biba_equal_effective(a, b)); } static void biba_ip6q_reassemble(struct ip6q *q6, struct label *q6label, struct mbuf *m, struct label *mlabel) { struct mac_biba *source, *dest; source = SLOT(q6label); dest = SLOT(mlabel); /* Just use the head, since we require them all to match. */ biba_copy_effective(source, dest); } static void biba_ip6q_update(struct mbuf *m, struct label *mlabel, struct ip6q *q6, struct label *q6label) { /* NOOP: we only accept matching labels, so no need to update */ } static void biba_ipq_create(struct mbuf *m, struct label *mlabel, struct ipq *q, struct label *qlabel) { struct mac_biba *source, *dest; source = SLOT(mlabel); dest = SLOT(qlabel); biba_copy_effective(source, dest); } static int biba_ipq_match(struct mbuf *m, struct label *mlabel, struct ipq *q, struct label *qlabel) { struct mac_biba *a, *b; a = SLOT(qlabel); b = SLOT(mlabel); return (biba_equal_effective(a, b)); } static void biba_ipq_reassemble(struct ipq *q, struct label *qlabel, struct mbuf *m, struct label *mlabel) { struct mac_biba *source, *dest; source = SLOT(qlabel); dest = SLOT(mlabel); /* Just use the head, since we require them all to match. */ biba_copy_effective(source, dest); } static void biba_ipq_update(struct mbuf *m, struct label *mlabel, struct ipq *q, struct label *qlabel) { /* NOOP: we only accept matching labels, so no need to update */ } static int biba_kld_check_load(struct ucred *cred, struct vnode *vp, struct label *vplabel) { struct mac_biba *subj, *obj; int error; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); error = biba_subject_privileged(subj); if (error) return (error); obj = SLOT(vplabel); if (!biba_high_effective(obj)) return (EACCES); return (0); } static int biba_mount_check_stat(struct ucred *cred, struct mount *mp, struct label *mplabel) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(mplabel); if (!biba_dominate_effective(obj, subj)) return (EACCES); return (0); } static void biba_mount_create(struct ucred *cred, struct mount *mp, struct label *mplabel) { struct mac_biba *source, *dest; source = SLOT(cred->cr_label); dest = SLOT(mplabel); biba_copy_effective(source, dest); } static void biba_netinet_arp_send(struct ifnet *ifp, struct label *ifplabel, struct mbuf *m, struct label *mlabel) { struct mac_biba *dest; dest = SLOT(mlabel); biba_set_effective(dest, MAC_BIBA_TYPE_EQUAL, 0, NULL); } static void biba_netinet_firewall_reply(struct mbuf *mrecv, struct label *mrecvlabel, struct mbuf *msend, struct label *msendlabel) { struct mac_biba *source, *dest; source = SLOT(mrecvlabel); dest = SLOT(msendlabel); biba_copy_effective(source, dest); } static void biba_netinet_firewall_send(struct mbuf *m, struct label *mlabel) { struct mac_biba *dest; dest = SLOT(mlabel); /* XXX: where is the label for the firewall really coming from? */ biba_set_effective(dest, MAC_BIBA_TYPE_EQUAL, 0, NULL); } static void biba_netinet_fragment(struct mbuf *m, struct label *mlabel, struct mbuf *frag, struct label *fraglabel) { struct mac_biba *source, *dest; source = SLOT(mlabel); dest = SLOT(fraglabel); biba_copy_effective(source, dest); } static void biba_netinet_icmp_reply(struct mbuf *mrecv, struct label *mrecvlabel, struct mbuf *msend, struct label *msendlabel) { struct mac_biba *source, *dest; source = SLOT(mrecvlabel); dest = SLOT(msendlabel); biba_copy_effective(source, dest); } static void biba_netinet_igmp_send(struct ifnet *ifp, struct label *ifplabel, struct mbuf *m, struct label *mlabel) { struct mac_biba *dest; dest = SLOT(mlabel); biba_set_effective(dest, MAC_BIBA_TYPE_EQUAL, 0, NULL); } static void biba_netinet6_nd6_send(struct ifnet *ifp, struct label *ifplabel, struct mbuf *m, struct label *mlabel) { struct mac_biba *dest; dest = SLOT(mlabel); biba_set_effective(dest, MAC_BIBA_TYPE_EQUAL, 0, NULL); } static int biba_pipe_check_ioctl(struct ucred *cred, struct pipepair *pp, struct label *pplabel, unsigned long cmd, void /* caddr_t */ *data) { if(!biba_enabled) return (0); /* XXX: This will be implemented soon... */ return (0); } static int biba_pipe_check_poll(struct ucred *cred, struct pipepair *pp, struct label *pplabel) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(pplabel); if (!biba_dominate_effective(obj, subj)) return (EACCES); return (0); } static int biba_pipe_check_read(struct ucred *cred, struct pipepair *pp, struct label *pplabel) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(pplabel); if (!biba_dominate_effective(obj, subj)) return (EACCES); return (0); } static int biba_pipe_check_relabel(struct ucred *cred, struct pipepair *pp, struct label *pplabel, struct label *newlabel) { struct mac_biba *subj, *obj, *new; int error; new = SLOT(newlabel); subj = SLOT(cred->cr_label); obj = SLOT(pplabel); /* * If there is a Biba label update for a pipe, it must be a effective * update. */ error = biba_atmostflags(new, MAC_BIBA_FLAG_EFFECTIVE); if (error) return (error); /* * To perform a relabel of a pipe (Biba label or not), Biba must * authorize the relabel. */ if (!biba_effective_in_range(obj, subj)) return (EPERM); /* * If the Biba label is to be changed, authorize as appropriate. */ if (new->mb_flags & MAC_BIBA_FLAG_EFFECTIVE) { /* * To change the Biba label on a pipe, the new pipe label * must be in the subject range. */ if (!biba_effective_in_range(new, subj)) return (EPERM); /* * To change the Biba label on a pipe to be EQUAL, the * subject must have appropriate privilege. */ if (biba_contains_equal(new)) { error = biba_subject_privileged(subj); if (error) return (error); } } return (0); } static int biba_pipe_check_stat(struct ucred *cred, struct pipepair *pp, struct label *pplabel) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(pplabel); if (!biba_dominate_effective(obj, subj)) return (EACCES); return (0); } static int biba_pipe_check_write(struct ucred *cred, struct pipepair *pp, struct label *pplabel) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(pplabel); if (!biba_dominate_effective(subj, obj)) return (EACCES); return (0); } static void biba_pipe_create(struct ucred *cred, struct pipepair *pp, struct label *pplabel) { struct mac_biba *source, *dest; source = SLOT(cred->cr_label); dest = SLOT(pplabel); biba_copy_effective(source, dest); } static void biba_pipe_relabel(struct ucred *cred, struct pipepair *pp, struct label *pplabel, struct label *newlabel) { struct mac_biba *source, *dest; source = SLOT(newlabel); dest = SLOT(pplabel); biba_copy(source, dest); } static int biba_posixsem_check_openunlink(struct ucred *cred, struct ksem *ks, struct label *kslabel) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(kslabel); if (!biba_dominate_effective(subj, obj)) return (EACCES); return (0); } static int biba_posixsem_check_setmode(struct ucred *cred, struct ksem *ks, struct label *kslabel, mode_t mode) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(kslabel); if (!biba_dominate_effective(subj, obj)) return (EACCES); return (0); } static int biba_posixsem_check_setowner(struct ucred *cred, struct ksem *ks, struct label *kslabel, uid_t uid, gid_t gid) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(kslabel); if (!biba_dominate_effective(subj, obj)) return (EACCES); return (0); } static int biba_posixsem_check_write(struct ucred *active_cred, struct ucred *file_cred, struct ksem *ks, struct label *kslabel) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(active_cred->cr_label); obj = SLOT(kslabel); if (!biba_dominate_effective(subj, obj)) return (EACCES); return (0); } static int biba_posixsem_check_rdonly(struct ucred *active_cred, struct ucred *file_cred, struct ksem *ks, struct label *kslabel) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(active_cred->cr_label); obj = SLOT(kslabel); if (!biba_dominate_effective(obj, subj)) return (EACCES); return (0); } static void biba_posixsem_create(struct ucred *cred, struct ksem *ks, struct label *kslabel) { struct mac_biba *source, *dest; source = SLOT(cred->cr_label); dest = SLOT(kslabel); biba_copy_effective(source, dest); } static int biba_posixshm_check_mmap(struct ucred *cred, struct shmfd *shmfd, struct label *shmlabel, int prot, int flags) { struct mac_biba *subj, *obj; if (!biba_enabled || !revocation_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(shmlabel); if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) { if (!biba_dominate_effective(obj, subj)) return (EACCES); } if (((prot & VM_PROT_WRITE) != 0) && ((flags & MAP_SHARED) != 0)) { if (!biba_dominate_effective(subj, obj)) return (EACCES); } return (0); } static int biba_posixshm_check_open(struct ucred *cred, struct shmfd *shmfd, struct label *shmlabel, accmode_t accmode) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(shmlabel); if (accmode & (VREAD | VEXEC | VSTAT_PERMS)) { if (!biba_dominate_effective(obj, subj)) return (EACCES); } if (accmode & VMODIFY_PERMS) { if (!biba_dominate_effective(subj, obj)) return (EACCES); } return (0); } static int biba_posixshm_check_read(struct ucred *active_cred, struct ucred *file_cred, struct shmfd *vp, struct label *shmlabel) { struct mac_biba *subj, *obj; if (!biba_enabled || !revocation_enabled) return (0); subj = SLOT(active_cred->cr_label); obj = SLOT(shmlabel); if (!biba_dominate_effective(obj, subj)) return (EACCES); return (0); } static int biba_posixshm_check_setmode(struct ucred *cred, struct shmfd *shmfd, struct label *shmlabel, mode_t mode) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(shmlabel); if (!biba_dominate_effective(subj, obj)) return (EACCES); return (0); } static int biba_posixshm_check_setowner(struct ucred *cred, struct shmfd *shmfd, struct label *shmlabel, uid_t uid, gid_t gid) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(shmlabel); if (!biba_dominate_effective(subj, obj)) return (EACCES); return (0); } static int biba_posixshm_check_stat(struct ucred *active_cred, struct ucred *file_cred, struct shmfd *shmfd, struct label *shmlabel) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(active_cred->cr_label); obj = SLOT(shmlabel); if (!biba_dominate_effective(obj, subj)) return (EACCES); return (0); } static int biba_posixshm_check_truncate(struct ucred *active_cred, struct ucred *file_cred, struct shmfd *shmfd, struct label *shmlabel) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(active_cred->cr_label); obj = SLOT(shmlabel); if (!biba_dominate_effective(subj, obj)) return (EACCES); return (0); } static int biba_posixshm_check_unlink(struct ucred *cred, struct shmfd *shmfd, struct label *shmlabel) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(shmlabel); if (!biba_dominate_effective(subj, obj)) return (EACCES); return (0); } static int biba_posixshm_check_write(struct ucred *active_cred, struct ucred *file_cred, struct shmfd *vp, struct label *shmlabel) { struct mac_biba *subj, *obj; if (!biba_enabled || !revocation_enabled) return (0); subj = SLOT(active_cred->cr_label); obj = SLOT(shmlabel); if (!biba_dominate_effective(obj, subj)) return (EACCES); return (0); } static void biba_posixshm_create(struct ucred *cred, struct shmfd *shmfd, struct label *shmlabel) { struct mac_biba *source, *dest; source = SLOT(cred->cr_label); dest = SLOT(shmlabel); biba_copy_effective(source, dest); } /* * Some system privileges are allowed regardless of integrity grade; others * are allowed only when running with privilege with respect to the Biba * policy as they might otherwise allow bypassing of the integrity policy. */ static int biba_priv_check(struct ucred *cred, int priv) { struct mac_biba *subj; int error; if (!biba_enabled) return (0); /* * Exempt only specific privileges from the Biba integrity policy. */ switch (priv) { case PRIV_KTRACE: case PRIV_MSGBUF: /* * Allow processes to manipulate basic process audit properties, and * to submit audit records. */ case PRIV_AUDIT_GETAUDIT: case PRIV_AUDIT_SETAUDIT: case PRIV_AUDIT_SUBMIT: /* * Allow processes to manipulate their regular UNIX credentials. */ case PRIV_CRED_SETUID: case PRIV_CRED_SETEUID: case PRIV_CRED_SETGID: case PRIV_CRED_SETEGID: case PRIV_CRED_SETGROUPS: case PRIV_CRED_SETREUID: case PRIV_CRED_SETREGID: case PRIV_CRED_SETRESUID: case PRIV_CRED_SETRESGID: /* * Allow processes to perform system monitoring. */ case PRIV_SEEOTHERGIDS: case PRIV_SEEOTHERUIDS: + case PRIV_SEEJAILPROC: break; /* * Allow access to general process debugging facilities. We * separately control debugging based on MAC label. */ case PRIV_DEBUG_DIFFCRED: case PRIV_DEBUG_SUGID: case PRIV_DEBUG_UNPRIV: /* * Allow manipulating jails. */ case PRIV_JAIL_ATTACH: /* * Allow privilege with respect to the Partition policy, but not the * Privs policy. */ case PRIV_MAC_PARTITION: /* * Allow privilege with respect to process resource limits and login * context. */ case PRIV_PROC_LIMIT: case PRIV_PROC_SETLOGIN: case PRIV_PROC_SETRLIMIT: /* * Allow System V and POSIX IPC privileges. */ case PRIV_IPC_READ: case PRIV_IPC_WRITE: case PRIV_IPC_ADMIN: case PRIV_IPC_MSGSIZE: case PRIV_MQ_ADMIN: /* * Allow certain scheduler manipulations -- possibly this should be * controlled by more fine-grained policy, as potentially low * integrity processes can deny CPU to higher integrity ones. */ case PRIV_SCHED_DIFFCRED: case PRIV_SCHED_SETPRIORITY: case PRIV_SCHED_RTPRIO: case PRIV_SCHED_SETPOLICY: case PRIV_SCHED_SET: case PRIV_SCHED_SETPARAM: case PRIV_SCHED_IDPRIO: /* * More IPC privileges. */ case PRIV_SEM_WRITE: /* * Allow signaling privileges subject to integrity policy. */ case PRIV_SIGNAL_DIFFCRED: case PRIV_SIGNAL_SUGID: /* * Allow access to only limited sysctls from lower integrity levels; * piggy-back on the Jail definition. */ case PRIV_SYSCTL_WRITEJAIL: /* * Allow TTY-based privileges, subject to general device access using * labels on TTY device nodes, but not console privilege. */ case PRIV_TTY_DRAINWAIT: case PRIV_TTY_DTRWAIT: case PRIV_TTY_EXCLUSIVE: case PRIV_TTY_STI: case PRIV_TTY_SETA: /* * Grant most VFS privileges, as almost all are in practice bounded * by more specific checks using labels. */ case PRIV_VFS_READ: case PRIV_VFS_WRITE: case PRIV_VFS_ADMIN: case PRIV_VFS_EXEC: case PRIV_VFS_LOOKUP: case PRIV_VFS_CHFLAGS_DEV: case PRIV_VFS_CHOWN: case PRIV_VFS_CHROOT: case PRIV_VFS_RETAINSUGID: case PRIV_VFS_EXCEEDQUOTA: case PRIV_VFS_FCHROOT: case PRIV_VFS_FHOPEN: case PRIV_VFS_FHSTATFS: case PRIV_VFS_GENERATION: case PRIV_VFS_GETFH: case PRIV_VFS_GETQUOTA: case PRIV_VFS_LINK: case PRIV_VFS_MOUNT: case PRIV_VFS_MOUNT_OWNER: case PRIV_VFS_MOUNT_PERM: case PRIV_VFS_MOUNT_SUIDDIR: case PRIV_VFS_MOUNT_NONUSER: case PRIV_VFS_SETGID: case PRIV_VFS_STICKYFILE: case PRIV_VFS_SYSFLAGS: case PRIV_VFS_UNMOUNT: /* * Allow VM privileges; it would be nice if these were subject to * resource limits. */ case PRIV_VM_MADV_PROTECT: case PRIV_VM_MLOCK: case PRIV_VM_MUNLOCK: case PRIV_VM_SWAP_NOQUOTA: case PRIV_VM_SWAP_NORLIMIT: /* * Allow some but not all network privileges. In general, dont allow * reconfiguring the network stack, just normal use. */ case PRIV_NETINET_RESERVEDPORT: case PRIV_NETINET_RAW: case PRIV_NETINET_REUSEPORT: break; /* * All remaining system privileges are allow only if the process * holds privilege with respect to the Biba policy. */ default: subj = SLOT(cred->cr_label); error = biba_subject_privileged(subj); if (error) return (error); } return (0); } static int biba_proc_check_debug(struct ucred *cred, struct proc *p) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(p->p_ucred->cr_label); /* XXX: range checks */ if (!biba_dominate_effective(obj, subj)) return (ESRCH); if (!biba_dominate_effective(subj, obj)) return (EACCES); return (0); } static int biba_proc_check_sched(struct ucred *cred, struct proc *p) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(p->p_ucred->cr_label); /* XXX: range checks */ if (!biba_dominate_effective(obj, subj)) return (ESRCH); if (!biba_dominate_effective(subj, obj)) return (EACCES); return (0); } static int biba_proc_check_signal(struct ucred *cred, struct proc *p, int signum) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(p->p_ucred->cr_label); /* XXX: range checks */ if (!biba_dominate_effective(obj, subj)) return (ESRCH); if (!biba_dominate_effective(subj, obj)) return (EACCES); return (0); } static int biba_socket_check_deliver(struct socket *so, struct label *solabel, struct mbuf *m, struct label *mlabel) { struct mac_biba *p, *s; int error; if (!biba_enabled) return (0); p = SLOT(mlabel); s = SLOT(solabel); SOCK_LOCK(so); error = biba_equal_effective(p, s) ? 0 : EACCES; SOCK_UNLOCK(so); return (error); } static int biba_socket_check_relabel(struct ucred *cred, struct socket *so, struct label *solabel, struct label *newlabel) { struct mac_biba *subj, *obj, *new; int error; SOCK_LOCK_ASSERT(so); new = SLOT(newlabel); subj = SLOT(cred->cr_label); obj = SLOT(solabel); /* * If there is a Biba label update for the socket, it may be an * update of effective. */ error = biba_atmostflags(new, MAC_BIBA_FLAG_EFFECTIVE); if (error) return (error); /* * To relabel a socket, the old socket effective must be in the * subject range. */ if (!biba_effective_in_range(obj, subj)) return (EPERM); /* * If the Biba label is to be changed, authorize as appropriate. */ if (new->mb_flags & MAC_BIBA_FLAG_EFFECTIVE) { /* * To relabel a socket, the new socket effective must be in * the subject range. */ if (!biba_effective_in_range(new, subj)) return (EPERM); /* * To change the Biba label on the socket to contain EQUAL, * the subject must have appropriate privilege. */ if (biba_contains_equal(new)) { error = biba_subject_privileged(subj); if (error) return (error); } } return (0); } static int biba_socket_check_visible(struct ucred *cred, struct socket *so, struct label *solabel) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(solabel); SOCK_LOCK(so); if (!biba_dominate_effective(obj, subj)) { SOCK_UNLOCK(so); return (ENOENT); } SOCK_UNLOCK(so); return (0); } static void biba_socket_create(struct ucred *cred, struct socket *so, struct label *solabel) { struct mac_biba *source, *dest; source = SLOT(cred->cr_label); dest = SLOT(solabel); biba_copy_effective(source, dest); } static void biba_socket_create_mbuf(struct socket *so, struct label *solabel, struct mbuf *m, struct label *mlabel) { struct mac_biba *source, *dest; source = SLOT(solabel); dest = SLOT(mlabel); SOCK_LOCK(so); biba_copy_effective(source, dest); SOCK_UNLOCK(so); } static void biba_socket_newconn(struct socket *oldso, struct label *oldsolabel, struct socket *newso, struct label *newsolabel) { struct mac_biba source, *dest; SOCK_LOCK(oldso); source = *SLOT(oldsolabel); SOCK_UNLOCK(oldso); dest = SLOT(newsolabel); SOCK_LOCK(newso); biba_copy_effective(&source, dest); SOCK_UNLOCK(newso); } static void biba_socket_relabel(struct ucred *cred, struct socket *so, struct label *solabel, struct label *newlabel) { struct mac_biba *source, *dest; SOCK_LOCK_ASSERT(so); source = SLOT(newlabel); dest = SLOT(solabel); biba_copy(source, dest); } static void biba_socketpeer_set_from_mbuf(struct mbuf *m, struct label *mlabel, struct socket *so, struct label *sopeerlabel) { struct mac_biba *source, *dest; source = SLOT(mlabel); dest = SLOT(sopeerlabel); SOCK_LOCK(so); biba_copy_effective(source, dest); SOCK_UNLOCK(so); } static void biba_socketpeer_set_from_socket(struct socket *oldso, struct label *oldsolabel, struct socket *newso, struct label *newsopeerlabel) { struct mac_biba source, *dest; SOCK_LOCK(oldso); source = *SLOT(oldsolabel); SOCK_UNLOCK(oldso); dest = SLOT(newsopeerlabel); SOCK_LOCK(newso); biba_copy_effective(&source, dest); SOCK_UNLOCK(newso); } static void biba_syncache_create(struct label *label, struct inpcb *inp) { struct mac_biba *source, *dest; source = SLOT(inp->inp_label); dest = SLOT(label); biba_copy_effective(source, dest); } static void biba_syncache_create_mbuf(struct label *sc_label, struct mbuf *m, struct label *mlabel) { struct mac_biba *source, *dest; source = SLOT(sc_label); dest = SLOT(mlabel); biba_copy_effective(source, dest); } static int biba_system_check_acct(struct ucred *cred, struct vnode *vp, struct label *vplabel) { struct mac_biba *subj, *obj; int error; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); error = biba_subject_privileged(subj); if (error) return (error); if (vplabel == NULL) return (0); obj = SLOT(vplabel); if (!biba_high_effective(obj)) return (EACCES); return (0); } static int biba_system_check_auditctl(struct ucred *cred, struct vnode *vp, struct label *vplabel) { struct mac_biba *subj, *obj; int error; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); error = biba_subject_privileged(subj); if (error) return (error); if (vplabel == NULL) return (0); obj = SLOT(vplabel); if (!biba_high_effective(obj)) return (EACCES); return (0); } static int biba_system_check_auditon(struct ucred *cred, int cmd) { struct mac_biba *subj; int error; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); error = biba_subject_privileged(subj); if (error) return (error); return (0); } static int biba_system_check_swapoff(struct ucred *cred, struct vnode *vp, struct label *label) { struct mac_biba *subj; int error; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); error = biba_subject_privileged(subj); if (error) return (error); return (0); } static int biba_system_check_swapon(struct ucred *cred, struct vnode *vp, struct label *vplabel) { struct mac_biba *subj, *obj; int error; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(vplabel); error = biba_subject_privileged(subj); if (error) return (error); if (!biba_high_effective(obj)) return (EACCES); return (0); } static int biba_system_check_sysctl(struct ucred *cred, struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req) { struct mac_biba *subj; int error; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); /* * Treat sysctl variables without CTLFLAG_ANYBODY flag as biba/high, * but also require privilege to change them. */ if (req->newptr != NULL && (oidp->oid_kind & CTLFLAG_ANYBODY) == 0) { if (!biba_subject_dominate_high(subj)) return (EACCES); error = biba_subject_privileged(subj); if (error) return (error); } return (0); } static void biba_sysvmsg_cleanup(struct label *msglabel) { bzero(SLOT(msglabel), sizeof(struct mac_biba)); } static void biba_sysvmsg_create(struct ucred *cred, struct msqid_kernel *msqkptr, struct label *msqlabel, struct msg *msgptr, struct label *msglabel) { struct mac_biba *source, *dest; /* Ignore the msgq label */ source = SLOT(cred->cr_label); dest = SLOT(msglabel); biba_copy_effective(source, dest); } static int biba_sysvmsq_check_msgrcv(struct ucred *cred, struct msg *msgptr, struct label *msglabel) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(msglabel); if (!biba_dominate_effective(obj, subj)) return (EACCES); return (0); } static int biba_sysvmsq_check_msgrmid(struct ucred *cred, struct msg *msgptr, struct label *msglabel) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(msglabel); if (!biba_dominate_effective(subj, obj)) return (EACCES); return (0); } static int biba_sysvmsq_check_msqget(struct ucred *cred, struct msqid_kernel *msqkptr, struct label *msqklabel) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(msqklabel); if (!biba_dominate_effective(obj, subj)) return (EACCES); return (0); } static int biba_sysvmsq_check_msqsnd(struct ucred *cred, struct msqid_kernel *msqkptr, struct label *msqklabel) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(msqklabel); if (!biba_dominate_effective(subj, obj)) return (EACCES); return (0); } static int biba_sysvmsq_check_msqrcv(struct ucred *cred, struct msqid_kernel *msqkptr, struct label *msqklabel) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(msqklabel); if (!biba_dominate_effective(obj, subj)) return (EACCES); return (0); } static int biba_sysvmsq_check_msqctl(struct ucred *cred, struct msqid_kernel *msqkptr, struct label *msqklabel, int cmd) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(msqklabel); switch(cmd) { case IPC_RMID: case IPC_SET: if (!biba_dominate_effective(subj, obj)) return (EACCES); break; case IPC_STAT: if (!biba_dominate_effective(obj, subj)) return (EACCES); break; default: return (EACCES); } return (0); } static void biba_sysvmsq_cleanup(struct label *msqlabel) { bzero(SLOT(msqlabel), sizeof(struct mac_biba)); } static void biba_sysvmsq_create(struct ucred *cred, struct msqid_kernel *msqkptr, struct label *msqlabel) { struct mac_biba *source, *dest; source = SLOT(cred->cr_label); dest = SLOT(msqlabel); biba_copy_effective(source, dest); } static int biba_sysvsem_check_semctl(struct ucred *cred, struct semid_kernel *semakptr, struct label *semaklabel, int cmd) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(semaklabel); switch(cmd) { case IPC_RMID: case IPC_SET: case SETVAL: case SETALL: if (!biba_dominate_effective(subj, obj)) return (EACCES); break; case IPC_STAT: case GETVAL: case GETPID: case GETNCNT: case GETZCNT: case GETALL: if (!biba_dominate_effective(obj, subj)) return (EACCES); break; default: return (EACCES); } return (0); } static int biba_sysvsem_check_semget(struct ucred *cred, struct semid_kernel *semakptr, struct label *semaklabel) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(semaklabel); if (!biba_dominate_effective(obj, subj)) return (EACCES); return (0); } static int biba_sysvsem_check_semop(struct ucred *cred, struct semid_kernel *semakptr, struct label *semaklabel, size_t accesstype) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(semaklabel); if (accesstype & SEM_R) if (!biba_dominate_effective(obj, subj)) return (EACCES); if (accesstype & SEM_A) if (!biba_dominate_effective(subj, obj)) return (EACCES); return (0); } static void biba_sysvsem_cleanup(struct label *semalabel) { bzero(SLOT(semalabel), sizeof(struct mac_biba)); } static void biba_sysvsem_create(struct ucred *cred, struct semid_kernel *semakptr, struct label *semalabel) { struct mac_biba *source, *dest; source = SLOT(cred->cr_label); dest = SLOT(semalabel); biba_copy_effective(source, dest); } static int biba_sysvshm_check_shmat(struct ucred *cred, struct shmid_kernel *shmsegptr, struct label *shmseglabel, int shmflg) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(shmseglabel); if (!biba_dominate_effective(obj, subj)) return (EACCES); if ((shmflg & SHM_RDONLY) == 0) { if (!biba_dominate_effective(subj, obj)) return (EACCES); } return (0); } static int biba_sysvshm_check_shmctl(struct ucred *cred, struct shmid_kernel *shmsegptr, struct label *shmseglabel, int cmd) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(shmseglabel); switch(cmd) { case IPC_RMID: case IPC_SET: if (!biba_dominate_effective(subj, obj)) return (EACCES); break; case IPC_STAT: case SHM_STAT: if (!biba_dominate_effective(obj, subj)) return (EACCES); break; default: return (EACCES); } return (0); } static int biba_sysvshm_check_shmget(struct ucred *cred, struct shmid_kernel *shmsegptr, struct label *shmseglabel, int shmflg) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(shmseglabel); if (!biba_dominate_effective(obj, subj)) return (EACCES); return (0); } static void biba_sysvshm_cleanup(struct label *shmlabel) { bzero(SLOT(shmlabel), sizeof(struct mac_biba)); } static void biba_sysvshm_create(struct ucred *cred, struct shmid_kernel *shmsegptr, struct label *shmlabel) { struct mac_biba *source, *dest; source = SLOT(cred->cr_label); dest = SLOT(shmlabel); biba_copy_effective(source, dest); } static int biba_vnode_associate_extattr(struct mount *mp, struct label *mplabel, struct vnode *vp, struct label *vplabel) { struct mac_biba mb_temp, *source, *dest; int buflen, error; source = SLOT(mplabel); dest = SLOT(vplabel); buflen = sizeof(mb_temp); bzero(&mb_temp, buflen); error = vn_extattr_get(vp, IO_NODELOCKED, MAC_BIBA_EXTATTR_NAMESPACE, MAC_BIBA_EXTATTR_NAME, &buflen, (char *) &mb_temp, curthread); if (error == ENOATTR || error == EOPNOTSUPP) { /* Fall back to the mntlabel. */ biba_copy_effective(source, dest); return (0); } else if (error) return (error); if (buflen != sizeof(mb_temp)) { printf("biba_vnode_associate_extattr: bad size %d\n", buflen); return (EPERM); } if (biba_valid(&mb_temp) != 0) { printf("biba_vnode_associate_extattr: invalid\n"); return (EPERM); } if ((mb_temp.mb_flags & MAC_BIBA_FLAGS_BOTH) != MAC_BIBA_FLAG_EFFECTIVE) { printf("biba_vnode_associate_extattr: not effective\n"); return (EPERM); } biba_copy_effective(&mb_temp, dest); return (0); } static void biba_vnode_associate_singlelabel(struct mount *mp, struct label *mplabel, struct vnode *vp, struct label *vplabel) { struct mac_biba *source, *dest; source = SLOT(mplabel); dest = SLOT(vplabel); biba_copy_effective(source, dest); } static int biba_vnode_check_chdir(struct ucred *cred, struct vnode *dvp, struct label *dvplabel) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(dvplabel); if (!biba_dominate_effective(obj, subj)) return (EACCES); return (0); } static int biba_vnode_check_chroot(struct ucred *cred, struct vnode *dvp, struct label *dvplabel) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(dvplabel); if (!biba_dominate_effective(obj, subj)) return (EACCES); return (0); } static int biba_vnode_check_create(struct ucred *cred, struct vnode *dvp, struct label *dvplabel, struct componentname *cnp, struct vattr *vap) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(dvplabel); if (!biba_dominate_effective(subj, obj)) return (EACCES); return (0); } static int biba_vnode_check_deleteacl(struct ucred *cred, struct vnode *vp, struct label *vplabel, acl_type_t type) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(vplabel); if (!biba_dominate_effective(subj, obj)) return (EACCES); return (0); } static int biba_vnode_check_deleteextattr(struct ucred *cred, struct vnode *vp, struct label *vplabel, int attrnamespace, const char *name) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(vplabel); if (!biba_dominate_effective(subj, obj)) return (EACCES); return (0); } static int biba_vnode_check_exec(struct ucred *cred, struct vnode *vp, struct label *vplabel, struct image_params *imgp, struct label *execlabel) { struct mac_biba *subj, *obj, *exec; int error; if (execlabel != NULL) { /* * We currently don't permit labels to be changed at * exec-time as part of Biba, so disallow non-NULL Biba label * elements in the execlabel. */ exec = SLOT(execlabel); error = biba_atmostflags(exec, 0); if (error) return (error); } if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(vplabel); if (!biba_dominate_effective(obj, subj)) return (EACCES); return (0); } static int biba_vnode_check_getacl(struct ucred *cred, struct vnode *vp, struct label *vplabel, acl_type_t type) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(vplabel); if (!biba_dominate_effective(obj, subj)) return (EACCES); return (0); } static int biba_vnode_check_getextattr(struct ucred *cred, struct vnode *vp, struct label *vplabel, int attrnamespace, const char *name) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(vplabel); if (!biba_dominate_effective(obj, subj)) return (EACCES); return (0); } static int biba_vnode_check_link(struct ucred *cred, struct vnode *dvp, struct label *dvplabel, struct vnode *vp, struct label *vplabel, struct componentname *cnp) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(dvplabel); if (!biba_dominate_effective(subj, obj)) return (EACCES); obj = SLOT(vplabel); if (!biba_dominate_effective(subj, obj)) return (EACCES); return (0); } static int biba_vnode_check_listextattr(struct ucred *cred, struct vnode *vp, struct label *vplabel, int attrnamespace) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(vplabel); if (!biba_dominate_effective(obj, subj)) return (EACCES); return (0); } static int biba_vnode_check_lookup(struct ucred *cred, struct vnode *dvp, struct label *dvplabel, struct componentname *cnp) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(dvplabel); if (!biba_dominate_effective(obj, subj)) return (EACCES); return (0); } static int biba_vnode_check_mmap(struct ucred *cred, struct vnode *vp, struct label *vplabel, int prot, int flags) { struct mac_biba *subj, *obj; /* * Rely on the use of open()-time protections to handle * non-revocation cases. */ if (!biba_enabled || !revocation_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(vplabel); if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) { if (!biba_dominate_effective(obj, subj)) return (EACCES); } if (((prot & VM_PROT_WRITE) != 0) && ((flags & MAP_SHARED) != 0)) { if (!biba_dominate_effective(subj, obj)) return (EACCES); } return (0); } static int biba_vnode_check_open(struct ucred *cred, struct vnode *vp, struct label *vplabel, accmode_t accmode) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(vplabel); /* XXX privilege override for admin? */ if (accmode & (VREAD | VEXEC | VSTAT_PERMS)) { if (!biba_dominate_effective(obj, subj)) return (EACCES); } if (accmode & VMODIFY_PERMS) { if (!biba_dominate_effective(subj, obj)) return (EACCES); } return (0); } static int biba_vnode_check_poll(struct ucred *active_cred, struct ucred *file_cred, struct vnode *vp, struct label *vplabel) { struct mac_biba *subj, *obj; if (!biba_enabled || !revocation_enabled) return (0); subj = SLOT(active_cred->cr_label); obj = SLOT(vplabel); if (!biba_dominate_effective(obj, subj)) return (EACCES); return (0); } static int biba_vnode_check_read(struct ucred *active_cred, struct ucred *file_cred, struct vnode *vp, struct label *vplabel) { struct mac_biba *subj, *obj; if (!biba_enabled || !revocation_enabled) return (0); subj = SLOT(active_cred->cr_label); obj = SLOT(vplabel); if (!biba_dominate_effective(obj, subj)) return (EACCES); return (0); } static int biba_vnode_check_readdir(struct ucred *cred, struct vnode *dvp, struct label *dvplabel) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(dvplabel); if (!biba_dominate_effective(obj, subj)) return (EACCES); return (0); } static int biba_vnode_check_readlink(struct ucred *cred, struct vnode *vp, struct label *vplabel) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(vplabel); if (!biba_dominate_effective(obj, subj)) return (EACCES); return (0); } static int biba_vnode_check_relabel(struct ucred *cred, struct vnode *vp, struct label *vplabel, struct label *newlabel) { struct mac_biba *old, *new, *subj; int error; old = SLOT(vplabel); new = SLOT(newlabel); subj = SLOT(cred->cr_label); /* * If there is a Biba label update for the vnode, it must be a * effective label. */ error = biba_atmostflags(new, MAC_BIBA_FLAG_EFFECTIVE); if (error) return (error); /* * To perform a relabel of the vnode (Biba label or not), Biba must * authorize the relabel. */ if (!biba_effective_in_range(old, subj)) return (EPERM); /* * If the Biba label is to be changed, authorize as appropriate. */ if (new->mb_flags & MAC_BIBA_FLAG_EFFECTIVE) { /* * To change the Biba label on a vnode, the new vnode label * must be in the subject range. */ if (!biba_effective_in_range(new, subj)) return (EPERM); /* * To change the Biba label on the vnode to be EQUAL, the * subject must have appropriate privilege. */ if (biba_contains_equal(new)) { error = biba_subject_privileged(subj); if (error) return (error); } } return (0); } static int biba_vnode_check_rename_from(struct ucred *cred, struct vnode *dvp, struct label *dvplabel, struct vnode *vp, struct label *vplabel, struct componentname *cnp) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(dvplabel); if (!biba_dominate_effective(subj, obj)) return (EACCES); obj = SLOT(vplabel); if (!biba_dominate_effective(subj, obj)) return (EACCES); return (0); } static int biba_vnode_check_rename_to(struct ucred *cred, struct vnode *dvp, struct label *dvplabel, struct vnode *vp, struct label *vplabel, int samedir, struct componentname *cnp) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(dvplabel); if (!biba_dominate_effective(subj, obj)) return (EACCES); if (vp != NULL) { obj = SLOT(vplabel); if (!biba_dominate_effective(subj, obj)) return (EACCES); } return (0); } static int biba_vnode_check_revoke(struct ucred *cred, struct vnode *vp, struct label *vplabel) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(vplabel); if (!biba_dominate_effective(subj, obj)) return (EACCES); return (0); } static int biba_vnode_check_setacl(struct ucred *cred, struct vnode *vp, struct label *vplabel, acl_type_t type, struct acl *acl) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(vplabel); if (!biba_dominate_effective(subj, obj)) return (EACCES); return (0); } static int biba_vnode_check_setextattr(struct ucred *cred, struct vnode *vp, struct label *vplabel, int attrnamespace, const char *name) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(vplabel); if (!biba_dominate_effective(subj, obj)) return (EACCES); /* XXX: protect the MAC EA in a special way? */ return (0); } static int biba_vnode_check_setflags(struct ucred *cred, struct vnode *vp, struct label *vplabel, u_long flags) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(vplabel); if (!biba_dominate_effective(subj, obj)) return (EACCES); return (0); } static int biba_vnode_check_setmode(struct ucred *cred, struct vnode *vp, struct label *vplabel, mode_t mode) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(vplabel); if (!biba_dominate_effective(subj, obj)) return (EACCES); return (0); } static int biba_vnode_check_setowner(struct ucred *cred, struct vnode *vp, struct label *vplabel, uid_t uid, gid_t gid) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(vplabel); if (!biba_dominate_effective(subj, obj)) return (EACCES); return (0); } static int biba_vnode_check_setutimes(struct ucred *cred, struct vnode *vp, struct label *vplabel, struct timespec atime, struct timespec mtime) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(vplabel); if (!biba_dominate_effective(subj, obj)) return (EACCES); return (0); } static int biba_vnode_check_stat(struct ucred *active_cred, struct ucred *file_cred, struct vnode *vp, struct label *vplabel) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(active_cred->cr_label); obj = SLOT(vplabel); if (!biba_dominate_effective(obj, subj)) return (EACCES); return (0); } static int biba_vnode_check_unlink(struct ucred *cred, struct vnode *dvp, struct label *dvplabel, struct vnode *vp, struct label *vplabel, struct componentname *cnp) { struct mac_biba *subj, *obj; if (!biba_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(dvplabel); if (!biba_dominate_effective(subj, obj)) return (EACCES); obj = SLOT(vplabel); if (!biba_dominate_effective(subj, obj)) return (EACCES); return (0); } static int biba_vnode_check_write(struct ucred *active_cred, struct ucred *file_cred, struct vnode *vp, struct label *vplabel) { struct mac_biba *subj, *obj; if (!biba_enabled || !revocation_enabled) return (0); subj = SLOT(active_cred->cr_label); obj = SLOT(vplabel); if (!biba_dominate_effective(subj, obj)) return (EACCES); return (0); } static int biba_vnode_create_extattr(struct ucred *cred, struct mount *mp, struct label *mplabel, struct vnode *dvp, struct label *dvplabel, struct vnode *vp, struct label *vplabel, struct componentname *cnp) { struct mac_biba *source, *dest, mb_temp; size_t buflen; int error; buflen = sizeof(mb_temp); bzero(&mb_temp, buflen); source = SLOT(cred->cr_label); dest = SLOT(vplabel); biba_copy_effective(source, &mb_temp); error = vn_extattr_set(vp, IO_NODELOCKED, MAC_BIBA_EXTATTR_NAMESPACE, MAC_BIBA_EXTATTR_NAME, buflen, (char *) &mb_temp, curthread); if (error == 0) biba_copy_effective(source, dest); return (error); } static void biba_vnode_relabel(struct ucred *cred, struct vnode *vp, struct label *vplabel, struct label *newlabel) { struct mac_biba *source, *dest; source = SLOT(newlabel); dest = SLOT(vplabel); biba_copy(source, dest); } static int biba_vnode_setlabel_extattr(struct ucred *cred, struct vnode *vp, struct label *vplabel, struct label *intlabel) { struct mac_biba *source, mb_temp; size_t buflen; int error; buflen = sizeof(mb_temp); bzero(&mb_temp, buflen); source = SLOT(intlabel); if ((source->mb_flags & MAC_BIBA_FLAG_EFFECTIVE) == 0) return (0); biba_copy_effective(source, &mb_temp); error = vn_extattr_set(vp, IO_NODELOCKED, MAC_BIBA_EXTATTR_NAMESPACE, MAC_BIBA_EXTATTR_NAME, buflen, (char *) &mb_temp, curthread); return (error); } static struct mac_policy_ops mac_biba_ops = { .mpo_init = biba_init, .mpo_bpfdesc_check_receive = biba_bpfdesc_check_receive, .mpo_bpfdesc_create = biba_bpfdesc_create, .mpo_bpfdesc_create_mbuf = biba_bpfdesc_create_mbuf, .mpo_bpfdesc_destroy_label = biba_destroy_label, .mpo_bpfdesc_init_label = biba_init_label, .mpo_cred_associate_nfsd = biba_cred_associate_nfsd, .mpo_cred_check_relabel = biba_cred_check_relabel, .mpo_cred_check_visible = biba_cred_check_visible, .mpo_cred_copy_label = biba_copy_label, .mpo_cred_create_init = biba_cred_create_init, .mpo_cred_create_swapper = biba_cred_create_swapper, .mpo_cred_destroy_label = biba_destroy_label, .mpo_cred_externalize_label = biba_externalize_label, .mpo_cred_init_label = biba_init_label, .mpo_cred_internalize_label = biba_internalize_label, .mpo_cred_relabel = biba_cred_relabel, .mpo_devfs_create_device = biba_devfs_create_device, .mpo_devfs_create_directory = biba_devfs_create_directory, .mpo_devfs_create_symlink = biba_devfs_create_symlink, .mpo_devfs_destroy_label = biba_destroy_label, .mpo_devfs_init_label = biba_init_label, .mpo_devfs_update = biba_devfs_update, .mpo_devfs_vnode_associate = biba_devfs_vnode_associate, .mpo_ifnet_check_relabel = biba_ifnet_check_relabel, .mpo_ifnet_check_transmit = biba_ifnet_check_transmit, .mpo_ifnet_copy_label = biba_copy_label, .mpo_ifnet_create = biba_ifnet_create, .mpo_ifnet_create_mbuf = biba_ifnet_create_mbuf, .mpo_ifnet_destroy_label = biba_destroy_label, .mpo_ifnet_externalize_label = biba_externalize_label, .mpo_ifnet_init_label = biba_init_label, .mpo_ifnet_internalize_label = biba_internalize_label, .mpo_ifnet_relabel = biba_ifnet_relabel, .mpo_inpcb_check_deliver = biba_inpcb_check_deliver, .mpo_inpcb_check_visible = biba_inpcb_check_visible, .mpo_inpcb_create = biba_inpcb_create, .mpo_inpcb_create_mbuf = biba_inpcb_create_mbuf, .mpo_inpcb_destroy_label = biba_destroy_label, .mpo_inpcb_init_label = biba_init_label_waitcheck, .mpo_inpcb_sosetlabel = biba_inpcb_sosetlabel, .mpo_ip6q_create = biba_ip6q_create, .mpo_ip6q_destroy_label = biba_destroy_label, .mpo_ip6q_init_label = biba_init_label_waitcheck, .mpo_ip6q_match = biba_ip6q_match, .mpo_ip6q_reassemble = biba_ip6q_reassemble, .mpo_ip6q_update = biba_ip6q_update, .mpo_ipq_create = biba_ipq_create, .mpo_ipq_destroy_label = biba_destroy_label, .mpo_ipq_init_label = biba_init_label_waitcheck, .mpo_ipq_match = biba_ipq_match, .mpo_ipq_reassemble = biba_ipq_reassemble, .mpo_ipq_update = biba_ipq_update, .mpo_kld_check_load = biba_kld_check_load, .mpo_mbuf_copy_label = biba_copy_label, .mpo_mbuf_destroy_label = biba_destroy_label, .mpo_mbuf_init_label = biba_init_label_waitcheck, .mpo_mount_check_stat = biba_mount_check_stat, .mpo_mount_create = biba_mount_create, .mpo_mount_destroy_label = biba_destroy_label, .mpo_mount_init_label = biba_init_label, .mpo_netinet_arp_send = biba_netinet_arp_send, .mpo_netinet_firewall_reply = biba_netinet_firewall_reply, .mpo_netinet_firewall_send = biba_netinet_firewall_send, .mpo_netinet_fragment = biba_netinet_fragment, .mpo_netinet_icmp_reply = biba_netinet_icmp_reply, .mpo_netinet_igmp_send = biba_netinet_igmp_send, .mpo_netinet6_nd6_send = biba_netinet6_nd6_send, .mpo_pipe_check_ioctl = biba_pipe_check_ioctl, .mpo_pipe_check_poll = biba_pipe_check_poll, .mpo_pipe_check_read = biba_pipe_check_read, .mpo_pipe_check_relabel = biba_pipe_check_relabel, .mpo_pipe_check_stat = biba_pipe_check_stat, .mpo_pipe_check_write = biba_pipe_check_write, .mpo_pipe_copy_label = biba_copy_label, .mpo_pipe_create = biba_pipe_create, .mpo_pipe_destroy_label = biba_destroy_label, .mpo_pipe_externalize_label = biba_externalize_label, .mpo_pipe_init_label = biba_init_label, .mpo_pipe_internalize_label = biba_internalize_label, .mpo_pipe_relabel = biba_pipe_relabel, .mpo_posixsem_check_getvalue = biba_posixsem_check_rdonly, .mpo_posixsem_check_open = biba_posixsem_check_openunlink, .mpo_posixsem_check_post = biba_posixsem_check_write, .mpo_posixsem_check_setmode = biba_posixsem_check_setmode, .mpo_posixsem_check_setowner = biba_posixsem_check_setowner, .mpo_posixsem_check_stat = biba_posixsem_check_rdonly, .mpo_posixsem_check_unlink = biba_posixsem_check_openunlink, .mpo_posixsem_check_wait = biba_posixsem_check_write, .mpo_posixsem_create = biba_posixsem_create, .mpo_posixsem_destroy_label = biba_destroy_label, .mpo_posixsem_init_label = biba_init_label, .mpo_posixshm_check_mmap = biba_posixshm_check_mmap, .mpo_posixshm_check_open = biba_posixshm_check_open, .mpo_posixshm_check_read = biba_posixshm_check_read, .mpo_posixshm_check_setmode = biba_posixshm_check_setmode, .mpo_posixshm_check_setowner = biba_posixshm_check_setowner, .mpo_posixshm_check_stat = biba_posixshm_check_stat, .mpo_posixshm_check_truncate = biba_posixshm_check_truncate, .mpo_posixshm_check_unlink = biba_posixshm_check_unlink, .mpo_posixshm_check_write = biba_posixshm_check_write, .mpo_posixshm_create = biba_posixshm_create, .mpo_posixshm_destroy_label = biba_destroy_label, .mpo_posixshm_init_label = biba_init_label, .mpo_priv_check = biba_priv_check, .mpo_proc_check_debug = biba_proc_check_debug, .mpo_proc_check_sched = biba_proc_check_sched, .mpo_proc_check_signal = biba_proc_check_signal, .mpo_socket_check_deliver = biba_socket_check_deliver, .mpo_socket_check_relabel = biba_socket_check_relabel, .mpo_socket_check_visible = biba_socket_check_visible, .mpo_socket_copy_label = biba_copy_label, .mpo_socket_create = biba_socket_create, .mpo_socket_create_mbuf = biba_socket_create_mbuf, .mpo_socket_destroy_label = biba_destroy_label, .mpo_socket_externalize_label = biba_externalize_label, .mpo_socket_init_label = biba_init_label_waitcheck, .mpo_socket_internalize_label = biba_internalize_label, .mpo_socket_newconn = biba_socket_newconn, .mpo_socket_relabel = biba_socket_relabel, .mpo_socketpeer_destroy_label = biba_destroy_label, .mpo_socketpeer_externalize_label = biba_externalize_label, .mpo_socketpeer_init_label = biba_init_label_waitcheck, .mpo_socketpeer_set_from_mbuf = biba_socketpeer_set_from_mbuf, .mpo_socketpeer_set_from_socket = biba_socketpeer_set_from_socket, .mpo_syncache_create = biba_syncache_create, .mpo_syncache_create_mbuf = biba_syncache_create_mbuf, .mpo_syncache_destroy_label = biba_destroy_label, .mpo_syncache_init_label = biba_init_label_waitcheck, .mpo_system_check_acct = biba_system_check_acct, .mpo_system_check_auditctl = biba_system_check_auditctl, .mpo_system_check_auditon = biba_system_check_auditon, .mpo_system_check_swapoff = biba_system_check_swapoff, .mpo_system_check_swapon = biba_system_check_swapon, .mpo_system_check_sysctl = biba_system_check_sysctl, .mpo_sysvmsg_cleanup = biba_sysvmsg_cleanup, .mpo_sysvmsg_create = biba_sysvmsg_create, .mpo_sysvmsg_destroy_label = biba_destroy_label, .mpo_sysvmsg_init_label = biba_init_label, .mpo_sysvmsq_check_msgrcv = biba_sysvmsq_check_msgrcv, .mpo_sysvmsq_check_msgrmid = biba_sysvmsq_check_msgrmid, .mpo_sysvmsq_check_msqget = biba_sysvmsq_check_msqget, .mpo_sysvmsq_check_msqsnd = biba_sysvmsq_check_msqsnd, .mpo_sysvmsq_check_msqrcv = biba_sysvmsq_check_msqrcv, .mpo_sysvmsq_check_msqctl = biba_sysvmsq_check_msqctl, .mpo_sysvmsq_cleanup = biba_sysvmsq_cleanup, .mpo_sysvmsq_create = biba_sysvmsq_create, .mpo_sysvmsq_destroy_label = biba_destroy_label, .mpo_sysvmsq_init_label = biba_init_label, .mpo_sysvsem_check_semctl = biba_sysvsem_check_semctl, .mpo_sysvsem_check_semget = biba_sysvsem_check_semget, .mpo_sysvsem_check_semop = biba_sysvsem_check_semop, .mpo_sysvsem_cleanup = biba_sysvsem_cleanup, .mpo_sysvsem_create = biba_sysvsem_create, .mpo_sysvsem_destroy_label = biba_destroy_label, .mpo_sysvsem_init_label = biba_init_label, .mpo_sysvshm_check_shmat = biba_sysvshm_check_shmat, .mpo_sysvshm_check_shmctl = biba_sysvshm_check_shmctl, .mpo_sysvshm_check_shmget = biba_sysvshm_check_shmget, .mpo_sysvshm_cleanup = biba_sysvshm_cleanup, .mpo_sysvshm_create = biba_sysvshm_create, .mpo_sysvshm_destroy_label = biba_destroy_label, .mpo_sysvshm_init_label = biba_init_label, .mpo_vnode_associate_extattr = biba_vnode_associate_extattr, .mpo_vnode_associate_singlelabel = biba_vnode_associate_singlelabel, .mpo_vnode_check_access = biba_vnode_check_open, .mpo_vnode_check_chdir = biba_vnode_check_chdir, .mpo_vnode_check_chroot = biba_vnode_check_chroot, .mpo_vnode_check_create = biba_vnode_check_create, .mpo_vnode_check_deleteacl = biba_vnode_check_deleteacl, .mpo_vnode_check_deleteextattr = biba_vnode_check_deleteextattr, .mpo_vnode_check_exec = biba_vnode_check_exec, .mpo_vnode_check_getacl = biba_vnode_check_getacl, .mpo_vnode_check_getextattr = biba_vnode_check_getextattr, .mpo_vnode_check_link = biba_vnode_check_link, .mpo_vnode_check_listextattr = biba_vnode_check_listextattr, .mpo_vnode_check_lookup = biba_vnode_check_lookup, .mpo_vnode_check_mmap = biba_vnode_check_mmap, .mpo_vnode_check_open = biba_vnode_check_open, .mpo_vnode_check_poll = biba_vnode_check_poll, .mpo_vnode_check_read = biba_vnode_check_read, .mpo_vnode_check_readdir = biba_vnode_check_readdir, .mpo_vnode_check_readlink = biba_vnode_check_readlink, .mpo_vnode_check_relabel = biba_vnode_check_relabel, .mpo_vnode_check_rename_from = biba_vnode_check_rename_from, .mpo_vnode_check_rename_to = biba_vnode_check_rename_to, .mpo_vnode_check_revoke = biba_vnode_check_revoke, .mpo_vnode_check_setacl = biba_vnode_check_setacl, .mpo_vnode_check_setextattr = biba_vnode_check_setextattr, .mpo_vnode_check_setflags = biba_vnode_check_setflags, .mpo_vnode_check_setmode = biba_vnode_check_setmode, .mpo_vnode_check_setowner = biba_vnode_check_setowner, .mpo_vnode_check_setutimes = biba_vnode_check_setutimes, .mpo_vnode_check_stat = biba_vnode_check_stat, .mpo_vnode_check_unlink = biba_vnode_check_unlink, .mpo_vnode_check_write = biba_vnode_check_write, .mpo_vnode_create_extattr = biba_vnode_create_extattr, .mpo_vnode_copy_label = biba_copy_label, .mpo_vnode_destroy_label = biba_destroy_label, .mpo_vnode_externalize_label = biba_externalize_label, .mpo_vnode_init_label = biba_init_label, .mpo_vnode_internalize_label = biba_internalize_label, .mpo_vnode_relabel = biba_vnode_relabel, .mpo_vnode_setlabel_extattr = biba_vnode_setlabel_extattr, }; MAC_POLICY_SET(&mac_biba_ops, mac_biba, "TrustedBSD MAC/Biba", MPC_LOADTIME_FLAG_NOTLATE, &biba_slot); diff --git a/sys/security/mac_lomac/mac_lomac.c b/sys/security/mac_lomac/mac_lomac.c index 05bd0da06960..aa9abf458721 100644 --- a/sys/security/mac_lomac/mac_lomac.c +++ b/sys/security/mac_lomac/mac_lomac.c @@ -1,3069 +1,3070 @@ /*- * Copyright (c) 1999-2002, 2007-2009 Robert N. M. Watson * Copyright (c) 2001-2005 Networks Associates Technology, Inc. * Copyright (c) 2006 SPARTA, Inc. * All rights reserved. * * This software was developed by Robert Watson for the TrustedBSD Project. * * This software was developed for the FreeBSD Project in part by NAI Labs, * the Security Research Division of Network Associates, Inc. under * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA * CHATS research program. * * This software was enhanced by SPARTA ISSO under SPAWAR contract * N66001-04-C-6019 ("SEFOS"). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Developed by the TrustedBSD Project. * * Low-watermark floating label mandatory integrity policy. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct mac_lomac_proc { struct mac_lomac mac_lomac; struct mtx mtx; }; SYSCTL_DECL(_security_mac); static SYSCTL_NODE(_security_mac, OID_AUTO, lomac, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "TrustedBSD mac_lomac policy controls"); static int lomac_label_size = sizeof(struct mac_lomac); SYSCTL_INT(_security_mac_lomac, OID_AUTO, label_size, CTLFLAG_RD, &lomac_label_size, 0, "Size of struct mac_lomac"); static int lomac_enabled = 1; SYSCTL_INT(_security_mac_lomac, OID_AUTO, enabled, CTLFLAG_RWTUN, &lomac_enabled, 0, "Enforce MAC/LOMAC policy"); static int destroyed_not_inited; SYSCTL_INT(_security_mac_lomac, OID_AUTO, destroyed_not_inited, CTLFLAG_RD, &destroyed_not_inited, 0, "Count of labels destroyed but not inited"); static int trust_all_interfaces = 0; SYSCTL_INT(_security_mac_lomac, OID_AUTO, trust_all_interfaces, CTLFLAG_RDTUN, &trust_all_interfaces, 0, "Consider all interfaces 'trusted' by MAC/LOMAC"); static char trusted_interfaces[128]; SYSCTL_STRING(_security_mac_lomac, OID_AUTO, trusted_interfaces, CTLFLAG_RDTUN, trusted_interfaces, 0, "Interfaces considered 'trusted' by MAC/LOMAC"); static int ptys_equal = 0; SYSCTL_INT(_security_mac_lomac, OID_AUTO, ptys_equal, CTLFLAG_RWTUN, &ptys_equal, 0, "Label pty devices as lomac/equal on create"); static int revocation_enabled = 1; SYSCTL_INT(_security_mac_lomac, OID_AUTO, revocation_enabled, CTLFLAG_RWTUN, &revocation_enabled, 0, "Revoke access to objects on relabel"); static int lomac_slot; #define SLOT(l) ((struct mac_lomac *)mac_label_get((l), lomac_slot)) #define SLOT_SET(l, val) mac_label_set((l), lomac_slot, (uintptr_t)(val)) #define PSLOT(l) ((struct mac_lomac_proc *) \ mac_label_get((l), lomac_slot)) #define PSLOT_SET(l, val) mac_label_set((l), lomac_slot, (uintptr_t)(val)) static MALLOC_DEFINE(M_LOMAC, "mac_lomac_label", "MAC/LOMAC labels"); static struct mac_lomac * lomac_alloc(int flag) { struct mac_lomac *ml; ml = malloc(sizeof(*ml), M_LOMAC, M_ZERO | flag); return (ml); } static void lomac_free(struct mac_lomac *ml) { if (ml != NULL) free(ml, M_LOMAC); else atomic_add_int(&destroyed_not_inited, 1); } static int lomac_atmostflags(struct mac_lomac *ml, int flags) { if ((ml->ml_flags & flags) != ml->ml_flags) return (EINVAL); return (0); } static int lomac_dominate_element(struct mac_lomac_element *a, struct mac_lomac_element *b) { switch (a->mle_type) { case MAC_LOMAC_TYPE_EQUAL: case MAC_LOMAC_TYPE_HIGH: return (1); case MAC_LOMAC_TYPE_LOW: switch (b->mle_type) { case MAC_LOMAC_TYPE_GRADE: case MAC_LOMAC_TYPE_HIGH: return (0); case MAC_LOMAC_TYPE_EQUAL: case MAC_LOMAC_TYPE_LOW: return (1); default: panic("lomac_dominate_element: b->mle_type invalid"); } case MAC_LOMAC_TYPE_GRADE: switch (b->mle_type) { case MAC_LOMAC_TYPE_EQUAL: case MAC_LOMAC_TYPE_LOW: return (1); case MAC_LOMAC_TYPE_HIGH: return (0); case MAC_LOMAC_TYPE_GRADE: return (a->mle_grade >= b->mle_grade); default: panic("lomac_dominate_element: b->mle_type invalid"); } default: panic("lomac_dominate_element: a->mle_type invalid"); } } static int lomac_range_in_range(struct mac_lomac *rangea, struct mac_lomac *rangeb) { return (lomac_dominate_element(&rangeb->ml_rangehigh, &rangea->ml_rangehigh) && lomac_dominate_element(&rangea->ml_rangelow, &rangeb->ml_rangelow)); } static int lomac_single_in_range(struct mac_lomac *single, struct mac_lomac *range) { KASSERT((single->ml_flags & MAC_LOMAC_FLAG_SINGLE) != 0, ("lomac_single_in_range: a not single")); KASSERT((range->ml_flags & MAC_LOMAC_FLAG_RANGE) != 0, ("lomac_single_in_range: b not range")); return (lomac_dominate_element(&range->ml_rangehigh, &single->ml_single) && lomac_dominate_element(&single->ml_single, &range->ml_rangelow)); } static int lomac_auxsingle_in_range(struct mac_lomac *single, struct mac_lomac *range) { KASSERT((single->ml_flags & MAC_LOMAC_FLAG_AUX) != 0, ("lomac_single_in_range: a not auxsingle")); KASSERT((range->ml_flags & MAC_LOMAC_FLAG_RANGE) != 0, ("lomac_single_in_range: b not range")); return (lomac_dominate_element(&range->ml_rangehigh, &single->ml_auxsingle) && lomac_dominate_element(&single->ml_auxsingle, &range->ml_rangelow)); } static int lomac_dominate_single(struct mac_lomac *a, struct mac_lomac *b) { KASSERT((a->ml_flags & MAC_LOMAC_FLAG_SINGLE) != 0, ("lomac_dominate_single: a not single")); KASSERT((b->ml_flags & MAC_LOMAC_FLAG_SINGLE) != 0, ("lomac_dominate_single: b not single")); return (lomac_dominate_element(&a->ml_single, &b->ml_single)); } static int lomac_subject_dominate(struct mac_lomac *a, struct mac_lomac *b) { KASSERT((~a->ml_flags & (MAC_LOMAC_FLAG_SINGLE | MAC_LOMAC_FLAG_RANGE)) == 0, ("lomac_dominate_single: a not subject")); KASSERT((b->ml_flags & MAC_LOMAC_FLAG_SINGLE) != 0, ("lomac_dominate_single: b not single")); return (lomac_dominate_element(&a->ml_rangehigh, &b->ml_single)); } static int lomac_equal_element(struct mac_lomac_element *a, struct mac_lomac_element *b) { if (a->mle_type == MAC_LOMAC_TYPE_EQUAL || b->mle_type == MAC_LOMAC_TYPE_EQUAL) return (1); return (a->mle_type == b->mle_type && a->mle_grade == b->mle_grade); } static int lomac_equal_single(struct mac_lomac *a, struct mac_lomac *b) { KASSERT((a->ml_flags & MAC_LOMAC_FLAG_SINGLE) != 0, ("lomac_equal_single: a not single")); KASSERT((b->ml_flags & MAC_LOMAC_FLAG_SINGLE) != 0, ("lomac_equal_single: b not single")); return (lomac_equal_element(&a->ml_single, &b->ml_single)); } static int lomac_contains_equal(struct mac_lomac *ml) { if (ml->ml_flags & MAC_LOMAC_FLAG_SINGLE) if (ml->ml_single.mle_type == MAC_LOMAC_TYPE_EQUAL) return (1); if (ml->ml_flags & MAC_LOMAC_FLAG_AUX) if (ml->ml_auxsingle.mle_type == MAC_LOMAC_TYPE_EQUAL) return (1); if (ml->ml_flags & MAC_LOMAC_FLAG_RANGE) { if (ml->ml_rangelow.mle_type == MAC_LOMAC_TYPE_EQUAL) return (1); if (ml->ml_rangehigh.mle_type == MAC_LOMAC_TYPE_EQUAL) return (1); } return (0); } static int lomac_subject_privileged(struct mac_lomac *ml) { KASSERT((ml->ml_flags & MAC_LOMAC_FLAGS_BOTH) == MAC_LOMAC_FLAGS_BOTH, ("lomac_subject_privileged: subject doesn't have both labels")); /* If the single is EQUAL, it's ok. */ if (ml->ml_single.mle_type == MAC_LOMAC_TYPE_EQUAL) return (0); /* If either range endpoint is EQUAL, it's ok. */ if (ml->ml_rangelow.mle_type == MAC_LOMAC_TYPE_EQUAL || ml->ml_rangehigh.mle_type == MAC_LOMAC_TYPE_EQUAL) return (0); /* If the range is low-high, it's ok. */ if (ml->ml_rangelow.mle_type == MAC_LOMAC_TYPE_LOW && ml->ml_rangehigh.mle_type == MAC_LOMAC_TYPE_HIGH) return (0); /* It's not ok. */ return (EPERM); } static int lomac_high_single(struct mac_lomac *ml) { KASSERT((ml->ml_flags & MAC_LOMAC_FLAG_SINGLE) != 0, ("lomac_high_single: mac_lomac not single")); return (ml->ml_single.mle_type == MAC_LOMAC_TYPE_HIGH); } static int lomac_valid(struct mac_lomac *ml) { if (ml->ml_flags & MAC_LOMAC_FLAG_SINGLE) { switch (ml->ml_single.mle_type) { case MAC_LOMAC_TYPE_GRADE: case MAC_LOMAC_TYPE_EQUAL: case MAC_LOMAC_TYPE_HIGH: case MAC_LOMAC_TYPE_LOW: break; default: return (EINVAL); } } else { if (ml->ml_single.mle_type != MAC_LOMAC_TYPE_UNDEF) return (EINVAL); } if (ml->ml_flags & MAC_LOMAC_FLAG_AUX) { switch (ml->ml_auxsingle.mle_type) { case MAC_LOMAC_TYPE_GRADE: case MAC_LOMAC_TYPE_EQUAL: case MAC_LOMAC_TYPE_HIGH: case MAC_LOMAC_TYPE_LOW: break; default: return (EINVAL); } } else { if (ml->ml_auxsingle.mle_type != MAC_LOMAC_TYPE_UNDEF) return (EINVAL); } if (ml->ml_flags & MAC_LOMAC_FLAG_RANGE) { switch (ml->ml_rangelow.mle_type) { case MAC_LOMAC_TYPE_GRADE: case MAC_LOMAC_TYPE_EQUAL: case MAC_LOMAC_TYPE_HIGH: case MAC_LOMAC_TYPE_LOW: break; default: return (EINVAL); } switch (ml->ml_rangehigh.mle_type) { case MAC_LOMAC_TYPE_GRADE: case MAC_LOMAC_TYPE_EQUAL: case MAC_LOMAC_TYPE_HIGH: case MAC_LOMAC_TYPE_LOW: break; default: return (EINVAL); } if (!lomac_dominate_element(&ml->ml_rangehigh, &ml->ml_rangelow)) return (EINVAL); } else { if (ml->ml_rangelow.mle_type != MAC_LOMAC_TYPE_UNDEF || ml->ml_rangehigh.mle_type != MAC_LOMAC_TYPE_UNDEF) return (EINVAL); } return (0); } static void lomac_set_range(struct mac_lomac *ml, u_short typelow, u_short gradelow, u_short typehigh, u_short gradehigh) { ml->ml_rangelow.mle_type = typelow; ml->ml_rangelow.mle_grade = gradelow; ml->ml_rangehigh.mle_type = typehigh; ml->ml_rangehigh.mle_grade = gradehigh; ml->ml_flags |= MAC_LOMAC_FLAG_RANGE; } static void lomac_set_single(struct mac_lomac *ml, u_short type, u_short grade) { ml->ml_single.mle_type = type; ml->ml_single.mle_grade = grade; ml->ml_flags |= MAC_LOMAC_FLAG_SINGLE; } static void lomac_copy_range(struct mac_lomac *labelfrom, struct mac_lomac *labelto) { KASSERT((labelfrom->ml_flags & MAC_LOMAC_FLAG_RANGE) != 0, ("lomac_copy_range: labelfrom not range")); labelto->ml_rangelow = labelfrom->ml_rangelow; labelto->ml_rangehigh = labelfrom->ml_rangehigh; labelto->ml_flags |= MAC_LOMAC_FLAG_RANGE; } static void lomac_copy_single(struct mac_lomac *labelfrom, struct mac_lomac *labelto) { KASSERT((labelfrom->ml_flags & MAC_LOMAC_FLAG_SINGLE) != 0, ("lomac_copy_single: labelfrom not single")); labelto->ml_single = labelfrom->ml_single; labelto->ml_flags |= MAC_LOMAC_FLAG_SINGLE; } static void lomac_copy_auxsingle(struct mac_lomac *labelfrom, struct mac_lomac *labelto) { KASSERT((labelfrom->ml_flags & MAC_LOMAC_FLAG_AUX) != 0, ("lomac_copy_auxsingle: labelfrom not auxsingle")); labelto->ml_auxsingle = labelfrom->ml_auxsingle; labelto->ml_flags |= MAC_LOMAC_FLAG_AUX; } static void lomac_copy(struct mac_lomac *source, struct mac_lomac *dest) { if (source->ml_flags & MAC_LOMAC_FLAG_SINGLE) lomac_copy_single(source, dest); if (source->ml_flags & MAC_LOMAC_FLAG_AUX) lomac_copy_auxsingle(source, dest); if (source->ml_flags & MAC_LOMAC_FLAG_RANGE) lomac_copy_range(source, dest); } static int lomac_to_string(struct sbuf *sb, struct mac_lomac *ml); static int maybe_demote(struct mac_lomac *subjlabel, struct mac_lomac *objlabel, const char *actionname, const char *objname, struct vnode *vp) { struct sbuf subjlabel_sb, subjtext_sb, objlabel_sb; char *subjlabeltext, *objlabeltext, *subjtext; struct mac_lomac cached_subjlabel; struct mac_lomac_proc *subj; struct vattr va; struct proc *p; pid_t pgid; subj = PSLOT(curthread->td_proc->p_label); p = curthread->td_proc; mtx_lock(&subj->mtx); if (subj->mac_lomac.ml_flags & MAC_LOMAC_FLAG_UPDATE) { /* * Check to see if the pending demotion would be more or less * severe than this one, and keep the more severe. This can * only happen for a multi-threaded application. */ if (lomac_dominate_single(objlabel, &subj->mac_lomac)) { mtx_unlock(&subj->mtx); return (0); } } bzero(&subj->mac_lomac, sizeof(subj->mac_lomac)); /* * Always demote the single label. */ lomac_copy_single(objlabel, &subj->mac_lomac); /* * Start with the original range, then minimize each side of the * range to the point of not dominating the object. The high side * will always be demoted, of course. */ lomac_copy_range(subjlabel, &subj->mac_lomac); if (!lomac_dominate_element(&objlabel->ml_single, &subj->mac_lomac.ml_rangelow)) subj->mac_lomac.ml_rangelow = objlabel->ml_single; subj->mac_lomac.ml_rangehigh = objlabel->ml_single; subj->mac_lomac.ml_flags |= MAC_LOMAC_FLAG_UPDATE; ast_sched(curthread, TDA_MAC); /* * Avoid memory allocation while holding a mutex; cache the label. */ lomac_copy_single(&subj->mac_lomac, &cached_subjlabel); mtx_unlock(&subj->mtx); sbuf_new(&subjlabel_sb, NULL, 0, SBUF_AUTOEXTEND); lomac_to_string(&subjlabel_sb, subjlabel); sbuf_finish(&subjlabel_sb); subjlabeltext = sbuf_data(&subjlabel_sb); sbuf_new(&subjtext_sb, NULL, 0, SBUF_AUTOEXTEND); lomac_to_string(&subjtext_sb, &subj->mac_lomac); sbuf_finish(&subjtext_sb); subjtext = sbuf_data(&subjtext_sb); sbuf_new(&objlabel_sb, NULL, 0, SBUF_AUTOEXTEND); lomac_to_string(&objlabel_sb, objlabel); sbuf_finish(&objlabel_sb); objlabeltext = sbuf_data(&objlabel_sb); pgid = p->p_pgrp->pg_id; /* XXX could be stale? */ if (vp != NULL && VOP_GETATTR(vp, &va, curthread->td_ucred) == 0) { log(LOG_INFO, "LOMAC: level-%s subject p%dg%du%d:%s demoted to" " level %s after %s a level-%s %s (inode=%ju, " "mountpount=%s)\n", subjlabeltext, p->p_pid, pgid, curthread->td_ucred->cr_uid, p->p_comm, subjtext, actionname, objlabeltext, objname, (uintmax_t)va.va_fileid, vp->v_mount->mnt_stat.f_mntonname); } else { log(LOG_INFO, "LOMAC: level-%s subject p%dg%du%d:%s demoted to" " level %s after %s a level-%s %s\n", subjlabeltext, p->p_pid, pgid, curthread->td_ucred->cr_uid, p->p_comm, subjtext, actionname, objlabeltext, objname); } sbuf_delete(&subjlabel_sb); sbuf_delete(&subjtext_sb); sbuf_delete(&objlabel_sb); return (0); } /* * Relabel "to" to "from" only if "from" is a valid label (contains at least * a single), as for a relabel operation which may or may not involve a * relevant label. */ static void try_relabel(struct mac_lomac *from, struct mac_lomac *to) { if (from->ml_flags & MAC_LOMAC_FLAG_SINGLE) { bzero(to, sizeof(*to)); lomac_copy(from, to); } } static void ast_mac(struct thread *td, int tda __unused) { mac_thread_userret(td); } /* * Policy module operations. */ static void lomac_init(struct mac_policy_conf *conf __unused) { ast_register(TDA_MAC, ASTR_ASTF_REQUIRED, 0, ast_mac); } static void lomac_fini(struct mac_policy_conf *conf __unused) { ast_deregister(TDA_MAC); } /* * Label operations. */ static void lomac_init_label(struct label *label) { SLOT_SET(label, lomac_alloc(M_WAITOK)); } static int lomac_init_label_waitcheck(struct label *label, int flag) { SLOT_SET(label, lomac_alloc(flag)); if (SLOT(label) == NULL) return (ENOMEM); return (0); } static void lomac_destroy_label(struct label *label) { lomac_free(SLOT(label)); SLOT_SET(label, NULL); } static int lomac_element_to_string(struct sbuf *sb, struct mac_lomac_element *element) { switch (element->mle_type) { case MAC_LOMAC_TYPE_HIGH: return (sbuf_printf(sb, "high")); case MAC_LOMAC_TYPE_LOW: return (sbuf_printf(sb, "low")); case MAC_LOMAC_TYPE_EQUAL: return (sbuf_printf(sb, "equal")); case MAC_LOMAC_TYPE_GRADE: return (sbuf_printf(sb, "%d", element->mle_grade)); default: panic("lomac_element_to_string: invalid type (%d)", element->mle_type); } } static int lomac_to_string(struct sbuf *sb, struct mac_lomac *ml) { if (ml->ml_flags & MAC_LOMAC_FLAG_SINGLE) { if (lomac_element_to_string(sb, &ml->ml_single) == -1) return (EINVAL); } if (ml->ml_flags & MAC_LOMAC_FLAG_AUX) { if (sbuf_putc(sb, '[') == -1) return (EINVAL); if (lomac_element_to_string(sb, &ml->ml_auxsingle) == -1) return (EINVAL); if (sbuf_putc(sb, ']') == -1) return (EINVAL); } if (ml->ml_flags & MAC_LOMAC_FLAG_RANGE) { if (sbuf_putc(sb, '(') == -1) return (EINVAL); if (lomac_element_to_string(sb, &ml->ml_rangelow) == -1) return (EINVAL); if (sbuf_putc(sb, '-') == -1) return (EINVAL); if (lomac_element_to_string(sb, &ml->ml_rangehigh) == -1) return (EINVAL); if (sbuf_putc(sb, ')') == -1) return (EINVAL); } return (0); } static int lomac_externalize_label(struct label *label, char *element_name, struct sbuf *sb, int *claimed) { struct mac_lomac *ml; if (strcmp(MAC_LOMAC_LABEL_NAME, element_name) != 0) return (0); (*claimed)++; ml = SLOT(label); return (lomac_to_string(sb, ml)); } static int lomac_parse_element(struct mac_lomac_element *element, char *string) { if (strcmp(string, "high") == 0 || strcmp(string, "hi") == 0) { element->mle_type = MAC_LOMAC_TYPE_HIGH; element->mle_grade = MAC_LOMAC_TYPE_UNDEF; } else if (strcmp(string, "low") == 0 || strcmp(string, "lo") == 0) { element->mle_type = MAC_LOMAC_TYPE_LOW; element->mle_grade = MAC_LOMAC_TYPE_UNDEF; } else if (strcmp(string, "equal") == 0 || strcmp(string, "eq") == 0) { element->mle_type = MAC_LOMAC_TYPE_EQUAL; element->mle_grade = MAC_LOMAC_TYPE_UNDEF; } else { char *p0, *p1; int d; p0 = string; d = strtol(p0, &p1, 10); if (d < 0 || d > 65535) return (EINVAL); element->mle_type = MAC_LOMAC_TYPE_GRADE; element->mle_grade = d; if (p1 == p0 || *p1 != '\0') return (EINVAL); } return (0); } /* * Note: destructively consumes the string, make a local copy before calling * if that's a problem. */ static int lomac_parse(struct mac_lomac *ml, char *string) { char *range, *rangeend, *rangehigh, *rangelow, *single, *auxsingle, *auxsingleend; int error; /* Do we have a range? */ single = string; range = strchr(string, '('); if (range == single) single = NULL; auxsingle = strchr(string, '['); if (auxsingle == single) single = NULL; if (range != NULL && auxsingle != NULL) return (EINVAL); rangelow = rangehigh = NULL; if (range != NULL) { /* Nul terminate the end of the single string. */ *range = '\0'; range++; rangelow = range; rangehigh = strchr(rangelow, '-'); if (rangehigh == NULL) return (EINVAL); rangehigh++; if (*rangelow == '\0' || *rangehigh == '\0') return (EINVAL); rangeend = strchr(rangehigh, ')'); if (rangeend == NULL) return (EINVAL); if (*(rangeend + 1) != '\0') return (EINVAL); /* Nul terminate the ends of the ranges. */ *(rangehigh - 1) = '\0'; *rangeend = '\0'; } KASSERT((rangelow != NULL && rangehigh != NULL) || (rangelow == NULL && rangehigh == NULL), ("lomac_internalize_label: range mismatch")); if (auxsingle != NULL) { /* Nul terminate the end of the single string. */ *auxsingle = '\0'; auxsingle++; auxsingleend = strchr(auxsingle, ']'); if (auxsingleend == NULL) return (EINVAL); if (*(auxsingleend + 1) != '\0') return (EINVAL); /* Nul terminate the end of the auxsingle. */ *auxsingleend = '\0'; } bzero(ml, sizeof(*ml)); if (single != NULL) { error = lomac_parse_element(&ml->ml_single, single); if (error) return (error); ml->ml_flags |= MAC_LOMAC_FLAG_SINGLE; } if (auxsingle != NULL) { error = lomac_parse_element(&ml->ml_auxsingle, auxsingle); if (error) return (error); ml->ml_flags |= MAC_LOMAC_FLAG_AUX; } if (rangelow != NULL) { error = lomac_parse_element(&ml->ml_rangelow, rangelow); if (error) return (error); error = lomac_parse_element(&ml->ml_rangehigh, rangehigh); if (error) return (error); ml->ml_flags |= MAC_LOMAC_FLAG_RANGE; } error = lomac_valid(ml); if (error) return (error); return (0); } static int lomac_internalize_label(struct label *label, char *element_name, char *element_data, int *claimed) { struct mac_lomac *ml, ml_temp; int error; if (strcmp(MAC_LOMAC_LABEL_NAME, element_name) != 0) return (0); (*claimed)++; error = lomac_parse(&ml_temp, element_data); if (error) return (error); ml = SLOT(label); *ml = ml_temp; return (0); } static void lomac_copy_label(struct label *src, struct label *dest) { *SLOT(dest) = *SLOT(src); } /* * Object-specific entry point implementations are sorted alphabetically by * object type name and then by operation. */ static int lomac_bpfdesc_check_receive(struct bpf_d *d, struct label *dlabel, struct ifnet *ifp, struct label *ifplabel) { struct mac_lomac *a, *b; if (!lomac_enabled) return (0); a = SLOT(dlabel); b = SLOT(ifplabel); if (lomac_equal_single(a, b)) return (0); return (EACCES); } static void lomac_bpfdesc_create(struct ucred *cred, struct bpf_d *d, struct label *dlabel) { struct mac_lomac *source, *dest; source = SLOT(cred->cr_label); dest = SLOT(dlabel); lomac_copy_single(source, dest); } static void lomac_bpfdesc_create_mbuf(struct bpf_d *d, struct label *dlabel, struct mbuf *m, struct label *mlabel) { struct mac_lomac *source, *dest; source = SLOT(dlabel); dest = SLOT(mlabel); lomac_copy_single(source, dest); } static int lomac_cred_check_relabel(struct ucred *cred, struct label *newlabel) { struct mac_lomac *subj, *new; int error; subj = SLOT(cred->cr_label); new = SLOT(newlabel); /* * If there is a LOMAC label update for the credential, it may be an * update of the single, range, or both. */ error = lomac_atmostflags(new, MAC_LOMAC_FLAGS_BOTH); if (error) return (error); /* * If the LOMAC label is to be changed, authorize as appropriate. */ if (new->ml_flags & MAC_LOMAC_FLAGS_BOTH) { /* * Fill in the missing parts from the previous label. */ if ((new->ml_flags & MAC_LOMAC_FLAG_SINGLE) == 0) lomac_copy_single(subj, new); if ((new->ml_flags & MAC_LOMAC_FLAG_RANGE) == 0) lomac_copy_range(subj, new); /* * To change the LOMAC range on a credential, the new range * label must be in the current range. */ if (!lomac_range_in_range(new, subj)) return (EPERM); /* * To change the LOMAC single label on a credential, the new * single label must be in the new range. Implicitly from * the previous check, the new single is in the old range. */ if (!lomac_single_in_range(new, new)) return (EPERM); /* * To have EQUAL in any component of the new credential LOMAC * label, the subject must already have EQUAL in their label. */ if (lomac_contains_equal(new)) { error = lomac_subject_privileged(subj); if (error) return (error); } /* * XXXMAC: Additional consistency tests regarding the single * and range of the new label might be performed here. */ } return (0); } static int lomac_cred_check_visible(struct ucred *cr1, struct ucred *cr2) { struct mac_lomac *subj, *obj; if (!lomac_enabled) return (0); subj = SLOT(cr1->cr_label); obj = SLOT(cr2->cr_label); /* XXX: range */ if (!lomac_dominate_single(obj, subj)) return (ESRCH); return (0); } static void lomac_cred_create_init(struct ucred *cred) { struct mac_lomac *dest; dest = SLOT(cred->cr_label); lomac_set_single(dest, MAC_LOMAC_TYPE_HIGH, 0); lomac_set_range(dest, MAC_LOMAC_TYPE_LOW, 0, MAC_LOMAC_TYPE_HIGH, 0); } static void lomac_cred_create_swapper(struct ucred *cred) { struct mac_lomac *dest; dest = SLOT(cred->cr_label); lomac_set_single(dest, MAC_LOMAC_TYPE_EQUAL, 0); lomac_set_range(dest, MAC_LOMAC_TYPE_LOW, 0, MAC_LOMAC_TYPE_HIGH, 0); } static void lomac_cred_relabel(struct ucred *cred, struct label *newlabel) { struct mac_lomac *source, *dest; source = SLOT(newlabel); dest = SLOT(cred->cr_label); try_relabel(source, dest); } static void lomac_devfs_create_device(struct ucred *cred, struct mount *mp, struct cdev *dev, struct devfs_dirent *de, struct label *delabel) { struct mac_lomac *ml; const char *dn; int lomac_type; ml = SLOT(delabel); dn = devtoname(dev); if (strcmp(dn, "null") == 0 || strcmp(dn, "zero") == 0 || strcmp(dn, "random") == 0 || strncmp(dn, "fd/", strlen("fd/")) == 0 || strncmp(dn, "ttyv", strlen("ttyv")) == 0) lomac_type = MAC_LOMAC_TYPE_EQUAL; else if (ptys_equal && (strncmp(dn, "ttyp", strlen("ttyp")) == 0 || strncmp(dn, "pts/", strlen("pts/")) == 0 || strncmp(dn, "ptyp", strlen("ptyp")) == 0)) lomac_type = MAC_LOMAC_TYPE_EQUAL; else lomac_type = MAC_LOMAC_TYPE_HIGH; lomac_set_single(ml, lomac_type, 0); } static void lomac_devfs_create_directory(struct mount *mp, char *dirname, int dirnamelen, struct devfs_dirent *de, struct label *delabel) { struct mac_lomac *ml; ml = SLOT(delabel); lomac_set_single(ml, MAC_LOMAC_TYPE_HIGH, 0); } static void lomac_devfs_create_symlink(struct ucred *cred, struct mount *mp, struct devfs_dirent *dd, struct label *ddlabel, struct devfs_dirent *de, struct label *delabel) { struct mac_lomac *source, *dest; source = SLOT(cred->cr_label); dest = SLOT(delabel); lomac_copy_single(source, dest); } static void lomac_devfs_update(struct mount *mp, struct devfs_dirent *de, struct label *delabel, struct vnode *vp, struct label *vplabel) { struct mac_lomac *source, *dest; source = SLOT(vplabel); dest = SLOT(delabel); lomac_copy(source, dest); } static void lomac_devfs_vnode_associate(struct mount *mp, struct label *mplabel, struct devfs_dirent *de, struct label *delabel, struct vnode *vp, struct label *vplabel) { struct mac_lomac *source, *dest; source = SLOT(delabel); dest = SLOT(vplabel); lomac_copy_single(source, dest); } static int lomac_ifnet_check_relabel(struct ucred *cred, struct ifnet *ifp, struct label *ifplabel, struct label *newlabel) { struct mac_lomac *subj, *new; int error; subj = SLOT(cred->cr_label); new = SLOT(newlabel); /* * If there is a LOMAC label update for the interface, it may be an * update of the single, range, or both. */ error = lomac_atmostflags(new, MAC_LOMAC_FLAGS_BOTH); if (error) return (error); /* * Relabling network interfaces requires LOMAC privilege. */ error = lomac_subject_privileged(subj); if (error) return (error); /* * If the LOMAC label is to be changed, authorize as appropriate. */ if (new->ml_flags & MAC_LOMAC_FLAGS_BOTH) { /* * Fill in the missing parts from the previous label. */ if ((new->ml_flags & MAC_LOMAC_FLAG_SINGLE) == 0) lomac_copy_single(subj, new); if ((new->ml_flags & MAC_LOMAC_FLAG_RANGE) == 0) lomac_copy_range(subj, new); /* * Rely on the traditional superuser status for the LOMAC * interface relabel requirements. XXXMAC: This will go * away. * * XXXRW: This is also redundant to a higher layer check. */ error = priv_check_cred(cred, PRIV_NET_SETIFMAC); if (error) return (EPERM); /* * XXXMAC: Additional consistency tests regarding the single * and the range of the new label might be performed here. */ } return (0); } static int lomac_ifnet_check_transmit(struct ifnet *ifp, struct label *ifplabel, struct mbuf *m, struct label *mlabel) { struct mac_lomac *p, *i; if (!lomac_enabled) return (0); p = SLOT(mlabel); i = SLOT(ifplabel); return (lomac_single_in_range(p, i) ? 0 : EACCES); } static void lomac_ifnet_create(struct ifnet *ifp, struct label *ifplabel) { char tifname[IFNAMSIZ], *p, *q; char tiflist[sizeof(trusted_interfaces)]; struct mac_lomac *dest; int len, grade; dest = SLOT(ifplabel); if (if_gettype(ifp) == IFT_LOOP) { grade = MAC_LOMAC_TYPE_EQUAL; goto set; } if (trust_all_interfaces) { grade = MAC_LOMAC_TYPE_HIGH; goto set; } grade = MAC_LOMAC_TYPE_LOW; if (trusted_interfaces[0] == '\0' || !strvalid(trusted_interfaces, sizeof(trusted_interfaces))) goto set; bzero(tiflist, sizeof(tiflist)); for (p = trusted_interfaces, q = tiflist; *p != '\0'; p++, q++) if(*p != ' ' && *p != '\t') *q = *p; for (p = q = tiflist;; p++) { if (*p == ',' || *p == '\0') { len = p - q; if (len < IFNAMSIZ) { bzero(tifname, sizeof(tifname)); bcopy(q, tifname, len); if (strcmp(tifname, if_name(ifp)) == 0) { grade = MAC_LOMAC_TYPE_HIGH; break; } } else { *p = '\0'; printf("MAC/LOMAC warning: interface name " "\"%s\" is too long (must be < %d)\n", q, IFNAMSIZ); } if (*p == '\0') break; q = p + 1; } } set: lomac_set_single(dest, grade, 0); lomac_set_range(dest, grade, 0, grade, 0); } static void lomac_ifnet_create_mbuf(struct ifnet *ifp, struct label *ifplabel, struct mbuf *m, struct label *mlabel) { struct mac_lomac *source, *dest; source = SLOT(ifplabel); dest = SLOT(mlabel); lomac_copy_single(source, dest); } static void lomac_ifnet_relabel(struct ucred *cred, struct ifnet *ifp, struct label *ifplabel, struct label *newlabel) { struct mac_lomac *source, *dest; source = SLOT(newlabel); dest = SLOT(ifplabel); try_relabel(source, dest); } static int lomac_inpcb_check_deliver(struct inpcb *inp, struct label *inplabel, struct mbuf *m, struct label *mlabel) { struct mac_lomac *p, *i; if (!lomac_enabled) return (0); p = SLOT(mlabel); i = SLOT(inplabel); return (lomac_equal_single(p, i) ? 0 : EACCES); } static int lomac_inpcb_check_visible(struct ucred *cred, struct inpcb *inp, struct label *inplabel) { struct mac_lomac *subj, *obj; if (!lomac_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(inplabel); if (!lomac_dominate_single(obj, subj)) return (ENOENT); return (0); } static void lomac_inpcb_create(struct socket *so, struct label *solabel, struct inpcb *inp, struct label *inplabel) { struct mac_lomac *source, *dest; source = SLOT(solabel); dest = SLOT(inplabel); lomac_copy_single(source, dest); } static void lomac_inpcb_create_mbuf(struct inpcb *inp, struct label *inplabel, struct mbuf *m, struct label *mlabel) { struct mac_lomac *source, *dest; source = SLOT(inplabel); dest = SLOT(mlabel); lomac_copy_single(source, dest); } static void lomac_inpcb_sosetlabel(struct socket *so, struct label *solabel, struct inpcb *inp, struct label *inplabel) { struct mac_lomac *source, *dest; SOCK_LOCK_ASSERT(so); source = SLOT(solabel); dest = SLOT(inplabel); lomac_copy_single(source, dest); } static void lomac_ip6q_create(struct mbuf *m, struct label *mlabel, struct ip6q *q6, struct label *q6label) { struct mac_lomac *source, *dest; source = SLOT(mlabel); dest = SLOT(q6label); lomac_copy_single(source, dest); } static int lomac_ip6q_match(struct mbuf *m, struct label *mlabel, struct ip6q *q6, struct label *q6label) { struct mac_lomac *a, *b; a = SLOT(q6label); b = SLOT(mlabel); return (lomac_equal_single(a, b)); } static void lomac_ip6q_reassemble(struct ip6q *q6, struct label *q6label, struct mbuf *m, struct label *mlabel) { struct mac_lomac *source, *dest; source = SLOT(q6label); dest = SLOT(mlabel); /* Just use the head, since we require them all to match. */ lomac_copy_single(source, dest); } static void lomac_ip6q_update(struct mbuf *m, struct label *mlabel, struct ip6q *q6, struct label *q6label) { /* NOOP: we only accept matching labels, so no need to update */ } static void lomac_ipq_create(struct mbuf *m, struct label *mlabel, struct ipq *q, struct label *qlabel) { struct mac_lomac *source, *dest; source = SLOT(mlabel); dest = SLOT(qlabel); lomac_copy_single(source, dest); } static int lomac_ipq_match(struct mbuf *m, struct label *mlabel, struct ipq *q, struct label *qlabel) { struct mac_lomac *a, *b; a = SLOT(qlabel); b = SLOT(mlabel); return (lomac_equal_single(a, b)); } static void lomac_ipq_reassemble(struct ipq *q, struct label *qlabel, struct mbuf *m, struct label *mlabel) { struct mac_lomac *source, *dest; source = SLOT(qlabel); dest = SLOT(mlabel); /* Just use the head, since we require them all to match. */ lomac_copy_single(source, dest); } static void lomac_ipq_update(struct mbuf *m, struct label *mlabel, struct ipq *q, struct label *qlabel) { /* NOOP: we only accept matching labels, so no need to update */ } static int lomac_kld_check_load(struct ucred *cred, struct vnode *vp, struct label *vplabel) { struct mac_lomac *subj, *obj; if (!lomac_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(vplabel); if (lomac_subject_privileged(subj)) return (EPERM); if (!lomac_high_single(obj)) return (EACCES); return (0); } static void lomac_mount_create(struct ucred *cred, struct mount *mp, struct label *mplabel) { struct mac_lomac *source, *dest; source = SLOT(cred->cr_label); dest = SLOT(mplabel); lomac_copy_single(source, dest); } static void lomac_netinet_arp_send(struct ifnet *ifp, struct label *ifplabel, struct mbuf *m, struct label *mlabel) { struct mac_lomac *dest; dest = SLOT(mlabel); lomac_set_single(dest, MAC_LOMAC_TYPE_EQUAL, 0); } static void lomac_netinet_firewall_reply(struct mbuf *mrecv, struct label *mrecvlabel, struct mbuf *msend, struct label *msendlabel) { struct mac_lomac *source, *dest; source = SLOT(mrecvlabel); dest = SLOT(msendlabel); lomac_copy_single(source, dest); } static void lomac_netinet_firewall_send(struct mbuf *m, struct label *mlabel) { struct mac_lomac *dest; dest = SLOT(mlabel); /* XXX: where is the label for the firewall really coming from? */ lomac_set_single(dest, MAC_LOMAC_TYPE_EQUAL, 0); } static void lomac_netinet_fragment(struct mbuf *m, struct label *mlabel, struct mbuf *frag, struct label *fraglabel) { struct mac_lomac *source, *dest; source = SLOT(mlabel); dest = SLOT(fraglabel); lomac_copy_single(source, dest); } static void lomac_netinet_icmp_reply(struct mbuf *mrecv, struct label *mrecvlabel, struct mbuf *msend, struct label *msendlabel) { struct mac_lomac *source, *dest; source = SLOT(mrecvlabel); dest = SLOT(msendlabel); lomac_copy_single(source, dest); } static void lomac_netinet_igmp_send(struct ifnet *ifp, struct label *ifplabel, struct mbuf *m, struct label *mlabel) { struct mac_lomac *dest; dest = SLOT(mlabel); lomac_set_single(dest, MAC_LOMAC_TYPE_EQUAL, 0); } static void lomac_netinet6_nd6_send(struct ifnet *ifp, struct label *ifplabel, struct mbuf *m, struct label *mlabel) { struct mac_lomac *dest; dest = SLOT(mlabel); lomac_set_single(dest, MAC_LOMAC_TYPE_EQUAL, 0); } static int lomac_pipe_check_ioctl(struct ucred *cred, struct pipepair *pp, struct label *pplabel, unsigned long cmd, void /* caddr_t */ *data) { if (!lomac_enabled) return (0); /* XXX: This will be implemented soon... */ return (0); } static int lomac_pipe_check_read(struct ucred *cred, struct pipepair *pp, struct label *pplabel) { struct mac_lomac *subj, *obj; if (!lomac_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(pplabel); if (!lomac_dominate_single(obj, subj)) return (maybe_demote(subj, obj, "reading", "pipe", NULL)); return (0); } static int lomac_pipe_check_relabel(struct ucred *cred, struct pipepair *pp, struct label *pplabel, struct label *newlabel) { struct mac_lomac *subj, *obj, *new; int error; new = SLOT(newlabel); subj = SLOT(cred->cr_label); obj = SLOT(pplabel); /* * If there is a LOMAC label update for a pipe, it must be a single * update. */ error = lomac_atmostflags(new, MAC_LOMAC_FLAG_SINGLE); if (error) return (error); /* * To perform a relabel of a pipe (LOMAC label or not), LOMAC must * authorize the relabel. */ if (!lomac_single_in_range(obj, subj)) return (EPERM); /* * If the LOMAC label is to be changed, authorize as appropriate. */ if (new->ml_flags & MAC_LOMAC_FLAG_SINGLE) { /* * To change the LOMAC label on a pipe, the new pipe label * must be in the subject range. */ if (!lomac_single_in_range(new, subj)) return (EPERM); /* * To change the LOMAC label on a pipe to be EQUAL, the * subject must have appropriate privilege. */ if (lomac_contains_equal(new)) { error = lomac_subject_privileged(subj); if (error) return (error); } } return (0); } static int lomac_pipe_check_write(struct ucred *cred, struct pipepair *pp, struct label *pplabel) { struct mac_lomac *subj, *obj; if (!lomac_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(pplabel); if (!lomac_subject_dominate(subj, obj)) return (EACCES); return (0); } static void lomac_pipe_create(struct ucred *cred, struct pipepair *pp, struct label *pplabel) { struct mac_lomac *source, *dest; source = SLOT(cred->cr_label); dest = SLOT(pplabel); lomac_copy_single(source, dest); } static void lomac_pipe_relabel(struct ucred *cred, struct pipepair *pp, struct label *pplabel, struct label *newlabel) { struct mac_lomac *source, *dest; source = SLOT(newlabel); dest = SLOT(pplabel); try_relabel(source, dest); } /* * Some system privileges are allowed regardless of integrity grade; others * are allowed only when running with privilege with respect to the LOMAC * policy as they might otherwise allow bypassing of the integrity policy. */ static int lomac_priv_check(struct ucred *cred, int priv) { struct mac_lomac *subj; int error; if (!lomac_enabled) return (0); /* * Exempt only specific privileges from the LOMAC integrity policy. */ switch (priv) { case PRIV_KTRACE: case PRIV_MSGBUF: /* * Allow processes to manipulate basic process audit properties, and * to submit audit records. */ case PRIV_AUDIT_GETAUDIT: case PRIV_AUDIT_SETAUDIT: case PRIV_AUDIT_SUBMIT: /* * Allow processes to manipulate their regular UNIX credentials. */ case PRIV_CRED_SETUID: case PRIV_CRED_SETEUID: case PRIV_CRED_SETGID: case PRIV_CRED_SETEGID: case PRIV_CRED_SETGROUPS: case PRIV_CRED_SETREUID: case PRIV_CRED_SETREGID: case PRIV_CRED_SETRESUID: case PRIV_CRED_SETRESGID: /* * Allow processes to perform system monitoring. */ case PRIV_SEEOTHERGIDS: case PRIV_SEEOTHERUIDS: + case PRIV_SEEJAILPROC: break; /* * Allow access to general process debugging facilities. We * separately control debugging based on MAC label. */ case PRIV_DEBUG_DIFFCRED: case PRIV_DEBUG_SUGID: case PRIV_DEBUG_UNPRIV: /* * Allow manipulating jails. */ case PRIV_JAIL_ATTACH: /* * Allow privilege with respect to the Partition policy, but not the * Privs policy. */ case PRIV_MAC_PARTITION: /* * Allow privilege with respect to process resource limits and login * context. */ case PRIV_PROC_LIMIT: case PRIV_PROC_SETLOGIN: case PRIV_PROC_SETRLIMIT: /* * Allow System V and POSIX IPC privileges. */ case PRIV_IPC_READ: case PRIV_IPC_WRITE: case PRIV_IPC_ADMIN: case PRIV_IPC_MSGSIZE: case PRIV_MQ_ADMIN: /* * Allow certain scheduler manipulations -- possibly this should be * controlled by more fine-grained policy, as potentially low * integrity processes can deny CPU to higher integrity ones. */ case PRIV_SCHED_DIFFCRED: case PRIV_SCHED_SETPRIORITY: case PRIV_SCHED_RTPRIO: case PRIV_SCHED_SETPOLICY: case PRIV_SCHED_SET: case PRIV_SCHED_SETPARAM: case PRIV_SCHED_IDPRIO: /* * More IPC privileges. */ case PRIV_SEM_WRITE: /* * Allow signaling privileges subject to integrity policy. */ case PRIV_SIGNAL_DIFFCRED: case PRIV_SIGNAL_SUGID: /* * Allow access to only limited sysctls from lower integrity levels; * piggy-back on the Jail definition. */ case PRIV_SYSCTL_WRITEJAIL: /* * Allow TTY-based privileges, subject to general device access using * labels on TTY device nodes, but not console privilege. */ case PRIV_TTY_DRAINWAIT: case PRIV_TTY_DTRWAIT: case PRIV_TTY_EXCLUSIVE: case PRIV_TTY_STI: case PRIV_TTY_SETA: /* * Grant most VFS privileges, as almost all are in practice bounded * by more specific checks using labels. */ case PRIV_VFS_READ: case PRIV_VFS_WRITE: case PRIV_VFS_ADMIN: case PRIV_VFS_EXEC: case PRIV_VFS_LOOKUP: case PRIV_VFS_CHFLAGS_DEV: case PRIV_VFS_CHOWN: case PRIV_VFS_CHROOT: case PRIV_VFS_RETAINSUGID: case PRIV_VFS_EXCEEDQUOTA: case PRIV_VFS_FCHROOT: case PRIV_VFS_FHOPEN: case PRIV_VFS_FHSTATFS: case PRIV_VFS_GENERATION: case PRIV_VFS_GETFH: case PRIV_VFS_GETQUOTA: case PRIV_VFS_LINK: case PRIV_VFS_MOUNT: case PRIV_VFS_MOUNT_OWNER: case PRIV_VFS_MOUNT_PERM: case PRIV_VFS_MOUNT_SUIDDIR: case PRIV_VFS_MOUNT_NONUSER: case PRIV_VFS_SETGID: case PRIV_VFS_STICKYFILE: case PRIV_VFS_SYSFLAGS: case PRIV_VFS_UNMOUNT: /* * Allow VM privileges; it would be nice if these were subject to * resource limits. */ case PRIV_VM_MADV_PROTECT: case PRIV_VM_MLOCK: case PRIV_VM_MUNLOCK: case PRIV_VM_SWAP_NOQUOTA: case PRIV_VM_SWAP_NORLIMIT: /* * Allow some but not all network privileges. In general, dont allow * reconfiguring the network stack, just normal use. */ case PRIV_NETINET_RESERVEDPORT: case PRIV_NETINET_RAW: case PRIV_NETINET_REUSEPORT: break; /* * All remaining system privileges are allow only if the process * holds privilege with respect to the LOMAC policy. */ default: subj = SLOT(cred->cr_label); error = lomac_subject_privileged(subj); if (error) return (error); } return (0); } static int lomac_proc_check_debug(struct ucred *cred, struct proc *p) { struct mac_lomac *subj, *obj; if (!lomac_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(p->p_ucred->cr_label); /* XXX: range checks */ if (!lomac_dominate_single(obj, subj)) return (ESRCH); if (!lomac_subject_dominate(subj, obj)) return (EACCES); return (0); } static int lomac_proc_check_sched(struct ucred *cred, struct proc *p) { struct mac_lomac *subj, *obj; if (!lomac_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(p->p_ucred->cr_label); /* XXX: range checks */ if (!lomac_dominate_single(obj, subj)) return (ESRCH); if (!lomac_subject_dominate(subj, obj)) return (EACCES); return (0); } static int lomac_proc_check_signal(struct ucred *cred, struct proc *p, int signum) { struct mac_lomac *subj, *obj; if (!lomac_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(p->p_ucred->cr_label); /* XXX: range checks */ if (!lomac_dominate_single(obj, subj)) return (ESRCH); if (!lomac_subject_dominate(subj, obj)) return (EACCES); return (0); } static void lomac_proc_destroy_label(struct label *label) { mtx_destroy(&PSLOT(label)->mtx); free(PSLOT(label), M_LOMAC); PSLOT_SET(label, NULL); } static void lomac_proc_init_label(struct label *label) { PSLOT_SET(label, malloc(sizeof(struct mac_lomac_proc), M_LOMAC, M_ZERO | M_WAITOK)); mtx_init(&PSLOT(label)->mtx, "MAC/Lomac proc lock", NULL, MTX_DEF); } static int lomac_socket_check_deliver(struct socket *so, struct label *solabel, struct mbuf *m, struct label *mlabel) { struct mac_lomac *p, *s; int error; if (!lomac_enabled) return (0); p = SLOT(mlabel); s = SLOT(solabel); SOCK_LOCK(so); error = lomac_equal_single(p, s) ? 0 : EACCES; SOCK_UNLOCK(so); return (error); } static int lomac_socket_check_relabel(struct ucred *cred, struct socket *so, struct label *solabel, struct label *newlabel) { struct mac_lomac *subj, *obj, *new; int error; SOCK_LOCK_ASSERT(so); new = SLOT(newlabel); subj = SLOT(cred->cr_label); obj = SLOT(solabel); /* * If there is a LOMAC label update for the socket, it may be an * update of single. */ error = lomac_atmostflags(new, MAC_LOMAC_FLAG_SINGLE); if (error) return (error); /* * To relabel a socket, the old socket single must be in the subject * range. */ if (!lomac_single_in_range(obj, subj)) return (EPERM); /* * If the LOMAC label is to be changed, authorize as appropriate. */ if (new->ml_flags & MAC_LOMAC_FLAG_SINGLE) { /* * To relabel a socket, the new socket single must be in the * subject range. */ if (!lomac_single_in_range(new, subj)) return (EPERM); /* * To change the LOMAC label on the socket to contain EQUAL, * the subject must have appropriate privilege. */ if (lomac_contains_equal(new)) { error = lomac_subject_privileged(subj); if (error) return (error); } } return (0); } static int lomac_socket_check_visible(struct ucred *cred, struct socket *so, struct label *solabel) { struct mac_lomac *subj, *obj; if (!lomac_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(solabel); SOCK_LOCK(so); if (!lomac_dominate_single(obj, subj)) { SOCK_UNLOCK(so); return (ENOENT); } SOCK_UNLOCK(so); return (0); } static void lomac_socket_create(struct ucred *cred, struct socket *so, struct label *solabel) { struct mac_lomac *source, *dest; source = SLOT(cred->cr_label); dest = SLOT(solabel); lomac_copy_single(source, dest); } static void lomac_socket_create_mbuf(struct socket *so, struct label *solabel, struct mbuf *m, struct label *mlabel) { struct mac_lomac *source, *dest; source = SLOT(solabel); dest = SLOT(mlabel); SOCK_LOCK(so); lomac_copy_single(source, dest); SOCK_UNLOCK(so); } static void lomac_socket_newconn(struct socket *oldso, struct label *oldsolabel, struct socket *newso, struct label *newsolabel) { struct mac_lomac source, *dest; SOCK_LOCK(oldso); source = *SLOT(oldsolabel); SOCK_UNLOCK(oldso); dest = SLOT(newsolabel); SOCK_LOCK(newso); lomac_copy_single(&source, dest); SOCK_UNLOCK(newso); } static void lomac_socket_relabel(struct ucred *cred, struct socket *so, struct label *solabel, struct label *newlabel) { struct mac_lomac *source, *dest; SOCK_LOCK_ASSERT(so); source = SLOT(newlabel); dest = SLOT(solabel); try_relabel(source, dest); } static void lomac_socketpeer_set_from_mbuf(struct mbuf *m, struct label *mlabel, struct socket *so, struct label *sopeerlabel) { struct mac_lomac *source, *dest; source = SLOT(mlabel); dest = SLOT(sopeerlabel); SOCK_LOCK(so); lomac_copy_single(source, dest); SOCK_UNLOCK(so); } static void lomac_socketpeer_set_from_socket(struct socket *oldso, struct label *oldsolabel, struct socket *newso, struct label *newsopeerlabel) { struct mac_lomac source, *dest; SOCK_LOCK(oldso); source = *SLOT(oldsolabel); SOCK_UNLOCK(oldso); dest = SLOT(newsopeerlabel); SOCK_LOCK(newso); lomac_copy_single(&source, dest); SOCK_UNLOCK(newso); } static void lomac_syncache_create(struct label *label, struct inpcb *inp) { struct mac_lomac *source, *dest; source = SLOT(inp->inp_label); dest = SLOT(label); lomac_copy(source, dest); } static void lomac_syncache_create_mbuf(struct label *sc_label, struct mbuf *m, struct label *mlabel) { struct mac_lomac *source, *dest; source = SLOT(sc_label); dest = SLOT(mlabel); lomac_copy(source, dest); } static int lomac_system_check_acct(struct ucred *cred, struct vnode *vp, struct label *vplabel) { struct mac_lomac *subj, *obj; if (!lomac_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(vplabel); if (lomac_subject_privileged(subj)) return (EPERM); if (!lomac_high_single(obj)) return (EACCES); return (0); } static int lomac_system_check_auditctl(struct ucred *cred, struct vnode *vp, struct label *vplabel) { struct mac_lomac *subj, *obj; if (!lomac_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(vplabel); if (lomac_subject_privileged(subj)) return (EPERM); if (!lomac_high_single(obj)) return (EACCES); return (0); } static int lomac_system_check_swapoff(struct ucred *cred, struct vnode *vp, struct label *vplabel) { struct mac_lomac *subj; if (!lomac_enabled) return (0); subj = SLOT(cred->cr_label); if (lomac_subject_privileged(subj)) return (EPERM); return (0); } static int lomac_system_check_swapon(struct ucred *cred, struct vnode *vp, struct label *vplabel) { struct mac_lomac *subj, *obj; if (!lomac_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(vplabel); if (lomac_subject_privileged(subj)) return (EPERM); if (!lomac_high_single(obj)) return (EACCES); return (0); } static int lomac_system_check_sysctl(struct ucred *cred, struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req) { struct mac_lomac *subj; if (!lomac_enabled) return (0); subj = SLOT(cred->cr_label); /* * Treat sysctl variables without CTLFLAG_ANYBODY flag as lomac/high, * but also require privilege to change them. */ if (req->newptr != NULL && (oidp->oid_kind & CTLFLAG_ANYBODY) == 0) { #ifdef notdef if (!lomac_subject_dominate_high(subj)) return (EACCES); #endif if (lomac_subject_privileged(subj)) return (EPERM); } return (0); } static void lomac_thread_userret(struct thread *td) { struct proc *p = td->td_proc; struct mac_lomac_proc *subj = PSLOT(p->p_label); struct ucred *newcred, *oldcred; int dodrop; mtx_lock(&subj->mtx); if (subj->mac_lomac.ml_flags & MAC_LOMAC_FLAG_UPDATE) { dodrop = 0; mtx_unlock(&subj->mtx); newcred = crget(); PROC_LOCK(p); mtx_lock(&subj->mtx); /* * Check if we lost the race while allocating the cred. */ if ((subj->mac_lomac.ml_flags & MAC_LOMAC_FLAG_UPDATE) == 0) { crfree(newcred); goto out; } oldcred = p->p_ucred; crcopy(newcred, oldcred); crhold(newcred); lomac_copy(&subj->mac_lomac, SLOT(newcred->cr_label)); proc_set_cred(p, newcred); crfree(oldcred); dodrop = 1; out: mtx_unlock(&subj->mtx); PROC_UNLOCK(p); if (dodrop) mac_proc_vm_revoke(curthread); } else { mtx_unlock(&subj->mtx); } } static int lomac_vnode_associate_extattr(struct mount *mp, struct label *mplabel, struct vnode *vp, struct label *vplabel) { struct mac_lomac ml_temp, *source, *dest; int buflen, error; source = SLOT(mplabel); dest = SLOT(vplabel); buflen = sizeof(ml_temp); bzero(&ml_temp, buflen); error = vn_extattr_get(vp, IO_NODELOCKED, MAC_LOMAC_EXTATTR_NAMESPACE, MAC_LOMAC_EXTATTR_NAME, &buflen, (char *)&ml_temp, curthread); if (error == ENOATTR || error == EOPNOTSUPP) { /* Fall back to the mntlabel. */ lomac_copy_single(source, dest); return (0); } else if (error) return (error); if (buflen != sizeof(ml_temp)) { if (buflen != sizeof(ml_temp) - sizeof(ml_temp.ml_auxsingle)) { printf("lomac_vnode_associate_extattr: bad size %d\n", buflen); return (EPERM); } bzero(&ml_temp.ml_auxsingle, sizeof(ml_temp.ml_auxsingle)); buflen = sizeof(ml_temp); (void)vn_extattr_set(vp, IO_NODELOCKED, MAC_LOMAC_EXTATTR_NAMESPACE, MAC_LOMAC_EXTATTR_NAME, buflen, (char *)&ml_temp, curthread); } if (lomac_valid(&ml_temp) != 0) { printf("lomac_vnode_associate_extattr: invalid\n"); return (EPERM); } if ((ml_temp.ml_flags & MAC_LOMAC_FLAGS_BOTH) != MAC_LOMAC_FLAG_SINGLE) { printf("lomac_vnode_associate_extattr: not single\n"); return (EPERM); } lomac_copy_single(&ml_temp, dest); return (0); } static void lomac_vnode_associate_singlelabel(struct mount *mp, struct label *mplabel, struct vnode *vp, struct label *vplabel) { struct mac_lomac *source, *dest; source = SLOT(mplabel); dest = SLOT(vplabel); lomac_copy_single(source, dest); } static int lomac_vnode_check_create(struct ucred *cred, struct vnode *dvp, struct label *dvplabel, struct componentname *cnp, struct vattr *vap) { struct mac_lomac *subj, *obj; if (!lomac_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(dvplabel); if (!lomac_subject_dominate(subj, obj)) return (EACCES); if (obj->ml_flags & MAC_LOMAC_FLAG_AUX && !lomac_dominate_element(&subj->ml_single, &obj->ml_auxsingle)) return (EACCES); return (0); } static int lomac_vnode_check_deleteacl(struct ucred *cred, struct vnode *vp, struct label *vplabel, acl_type_t type) { struct mac_lomac *subj, *obj; if (!lomac_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(vplabel); if (!lomac_subject_dominate(subj, obj)) return (EACCES); return (0); } static int lomac_vnode_check_link(struct ucred *cred, struct vnode *dvp, struct label *dvplabel, struct vnode *vp, struct label *vplabel, struct componentname *cnp) { struct mac_lomac *subj, *obj; if (!lomac_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(dvplabel); if (!lomac_subject_dominate(subj, obj)) return (EACCES); obj = SLOT(vplabel); if (!lomac_subject_dominate(subj, obj)) return (EACCES); return (0); } static int lomac_vnode_check_mmap(struct ucred *cred, struct vnode *vp, struct label *vplabel, int prot, int flags) { struct mac_lomac *subj, *obj; /* * Rely on the use of open()-time protections to handle * non-revocation cases. */ if (!lomac_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(vplabel); if (((prot & VM_PROT_WRITE) != 0) && ((flags & MAP_SHARED) != 0)) { if (!lomac_subject_dominate(subj, obj)) return (EACCES); } if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) { if (!lomac_dominate_single(obj, subj)) return (maybe_demote(subj, obj, "mapping", "file", vp)); } return (0); } static void lomac_vnode_check_mmap_downgrade(struct ucred *cred, struct vnode *vp, struct label *vplabel, /* XXX vm_prot_t */ int *prot) { struct mac_lomac *subj, *obj; /* * Rely on the use of open()-time protections to handle * non-revocation cases. */ if (!lomac_enabled || !revocation_enabled) return; subj = SLOT(cred->cr_label); obj = SLOT(vplabel); if (!lomac_subject_dominate(subj, obj)) *prot &= ~VM_PROT_WRITE; } static int lomac_vnode_check_open(struct ucred *cred, struct vnode *vp, struct label *vplabel, accmode_t accmode) { struct mac_lomac *subj, *obj; if (!lomac_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(vplabel); /* XXX privilege override for admin? */ if (accmode & VMODIFY_PERMS) { if (!lomac_subject_dominate(subj, obj)) return (EACCES); } return (0); } static int lomac_vnode_check_read(struct ucred *active_cred, struct ucred *file_cred, struct vnode *vp, struct label *vplabel) { struct mac_lomac *subj, *obj; if (!lomac_enabled || !revocation_enabled) return (0); subj = SLOT(active_cred->cr_label); obj = SLOT(vplabel); if (!lomac_dominate_single(obj, subj)) return (maybe_demote(subj, obj, "reading", "file", vp)); return (0); } static int lomac_vnode_check_relabel(struct ucred *cred, struct vnode *vp, struct label *vplabel, struct label *newlabel) { struct mac_lomac *old, *new, *subj; int error; old = SLOT(vplabel); new = SLOT(newlabel); subj = SLOT(cred->cr_label); /* * If there is a LOMAC label update for the vnode, it must be a * single label, with an optional explicit auxiliary single. */ error = lomac_atmostflags(new, MAC_LOMAC_FLAG_SINGLE | MAC_LOMAC_FLAG_AUX); if (error) return (error); /* * To perform a relabel of the vnode (LOMAC label or not), LOMAC must * authorize the relabel. */ if (!lomac_single_in_range(old, subj)) return (EPERM); /* * If the LOMAC label is to be changed, authorize as appropriate. */ if (new->ml_flags & MAC_LOMAC_FLAG_SINGLE) { /* * To change the LOMAC label on a vnode, the new vnode label * must be in the subject range. */ if (!lomac_single_in_range(new, subj)) return (EPERM); /* * To change the LOMAC label on the vnode to be EQUAL, the * subject must have appropriate privilege. */ if (lomac_contains_equal(new)) { error = lomac_subject_privileged(subj); if (error) return (error); } } if (new->ml_flags & MAC_LOMAC_FLAG_AUX) { /* * Fill in the missing parts from the previous label. */ if ((new->ml_flags & MAC_LOMAC_FLAG_SINGLE) == 0) lomac_copy_single(subj, new); /* * To change the auxiliary LOMAC label on a vnode, the new * vnode label must be in the subject range. */ if (!lomac_auxsingle_in_range(new, subj)) return (EPERM); /* * To change the auxiliary LOMAC label on the vnode to be * EQUAL, the subject must have appropriate privilege. */ if (lomac_contains_equal(new)) { error = lomac_subject_privileged(subj); if (error) return (error); } } return (0); } static int lomac_vnode_check_rename_from(struct ucred *cred, struct vnode *dvp, struct label *dvplabel, struct vnode *vp, struct label *vplabel, struct componentname *cnp) { struct mac_lomac *subj, *obj; if (!lomac_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(dvplabel); if (!lomac_subject_dominate(subj, obj)) return (EACCES); obj = SLOT(vplabel); if (!lomac_subject_dominate(subj, obj)) return (EACCES); return (0); } static int lomac_vnode_check_rename_to(struct ucred *cred, struct vnode *dvp, struct label *dvplabel, struct vnode *vp, struct label *vplabel, int samedir, struct componentname *cnp) { struct mac_lomac *subj, *obj; if (!lomac_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(dvplabel); if (!lomac_subject_dominate(subj, obj)) return (EACCES); if (vp != NULL) { obj = SLOT(vplabel); if (!lomac_subject_dominate(subj, obj)) return (EACCES); } return (0); } static int lomac_vnode_check_revoke(struct ucred *cred, struct vnode *vp, struct label *vplabel) { struct mac_lomac *subj, *obj; if (!lomac_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(vplabel); if (!lomac_subject_dominate(subj, obj)) return (EACCES); return (0); } static int lomac_vnode_check_setacl(struct ucred *cred, struct vnode *vp, struct label *vplabel, acl_type_t type, struct acl *acl) { struct mac_lomac *subj, *obj; if (!lomac_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(vplabel); if (!lomac_subject_dominate(subj, obj)) return (EACCES); return (0); } static int lomac_vnode_check_setextattr(struct ucred *cred, struct vnode *vp, struct label *vplabel, int attrnamespace, const char *name) { struct mac_lomac *subj, *obj; if (!lomac_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(vplabel); if (!lomac_subject_dominate(subj, obj)) return (EACCES); /* XXX: protect the MAC EA in a special way? */ return (0); } static int lomac_vnode_check_setflags(struct ucred *cred, struct vnode *vp, struct label *vplabel, u_long flags) { struct mac_lomac *subj, *obj; if (!lomac_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(vplabel); if (!lomac_subject_dominate(subj, obj)) return (EACCES); return (0); } static int lomac_vnode_check_setmode(struct ucred *cred, struct vnode *vp, struct label *vplabel, mode_t mode) { struct mac_lomac *subj, *obj; if (!lomac_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(vplabel); if (!lomac_subject_dominate(subj, obj)) return (EACCES); return (0); } static int lomac_vnode_check_setowner(struct ucred *cred, struct vnode *vp, struct label *vplabel, uid_t uid, gid_t gid) { struct mac_lomac *subj, *obj; if (!lomac_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(vplabel); if (!lomac_subject_dominate(subj, obj)) return (EACCES); return (0); } static int lomac_vnode_check_setutimes(struct ucred *cred, struct vnode *vp, struct label *vplabel, struct timespec atime, struct timespec mtime) { struct mac_lomac *subj, *obj; if (!lomac_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(vplabel); if (!lomac_subject_dominate(subj, obj)) return (EACCES); return (0); } static int lomac_vnode_check_unlink(struct ucred *cred, struct vnode *dvp, struct label *dvplabel, struct vnode *vp, struct label *vplabel, struct componentname *cnp) { struct mac_lomac *subj, *obj; if (!lomac_enabled) return (0); subj = SLOT(cred->cr_label); obj = SLOT(dvplabel); if (!lomac_subject_dominate(subj, obj)) return (EACCES); obj = SLOT(vplabel); if (!lomac_subject_dominate(subj, obj)) return (EACCES); return (0); } static int lomac_vnode_check_write(struct ucred *active_cred, struct ucred *file_cred, struct vnode *vp, struct label *vplabel) { struct mac_lomac *subj, *obj; if (!lomac_enabled || !revocation_enabled) return (0); subj = SLOT(active_cred->cr_label); obj = SLOT(vplabel); if (!lomac_subject_dominate(subj, obj)) return (EACCES); return (0); } static int lomac_vnode_create_extattr(struct ucred *cred, struct mount *mp, struct label *mplabel, struct vnode *dvp, struct label *dvplabel, struct vnode *vp, struct label *vplabel, struct componentname *cnp) { struct mac_lomac *source, *dest, *dir, temp; size_t buflen; int error; buflen = sizeof(temp); bzero(&temp, buflen); source = SLOT(cred->cr_label); dest = SLOT(vplabel); dir = SLOT(dvplabel); if (dir->ml_flags & MAC_LOMAC_FLAG_AUX) { lomac_copy_auxsingle(dir, &temp); lomac_set_single(&temp, dir->ml_auxsingle.mle_type, dir->ml_auxsingle.mle_grade); } else { lomac_copy_single(source, &temp); } error = vn_extattr_set(vp, IO_NODELOCKED, MAC_LOMAC_EXTATTR_NAMESPACE, MAC_LOMAC_EXTATTR_NAME, buflen, (char *)&temp, curthread); if (error == 0) lomac_copy(&temp, dest); return (error); } static void lomac_vnode_execve_transition(struct ucred *old, struct ucred *new, struct vnode *vp, struct label *vplabel, struct label *interpvplabel, struct image_params *imgp, struct label *execlabel) { struct mac_lomac *source, *dest, *obj, *robj; source = SLOT(old->cr_label); dest = SLOT(new->cr_label); obj = SLOT(vplabel); robj = interpvplabel != NULL ? SLOT(interpvplabel) : obj; lomac_copy(source, dest); /* * If there's an auxiliary label on the real object, respect it and * assume that this level should be assumed immediately if a higher * level is currently in place. */ if (robj->ml_flags & MAC_LOMAC_FLAG_AUX && !lomac_dominate_element(&robj->ml_auxsingle, &dest->ml_single) && lomac_auxsingle_in_range(robj, dest)) lomac_set_single(dest, robj->ml_auxsingle.mle_type, robj->ml_auxsingle.mle_grade); /* * Restructuring to use the execve transitioning mechanism instead of * the normal demotion mechanism here would be difficult, so just * copy the label over and perform standard demotion. This is also * non-optimal because it will result in the intermediate label "new" * being created and immediately recycled. */ if (lomac_enabled && revocation_enabled && !lomac_dominate_single(obj, source)) (void)maybe_demote(source, obj, "executing", "file", vp); } static int lomac_vnode_execve_will_transition(struct ucred *old, struct vnode *vp, struct label *vplabel, struct label *interpvplabel, struct image_params *imgp, struct label *execlabel) { struct mac_lomac *subj, *obj, *robj; if (!lomac_enabled || !revocation_enabled) return (0); subj = SLOT(old->cr_label); obj = SLOT(vplabel); robj = interpvplabel != NULL ? SLOT(interpvplabel) : obj; return ((robj->ml_flags & MAC_LOMAC_FLAG_AUX && !lomac_dominate_element(&robj->ml_auxsingle, &subj->ml_single) && lomac_auxsingle_in_range(robj, subj)) || !lomac_dominate_single(obj, subj)); } static void lomac_vnode_relabel(struct ucred *cred, struct vnode *vp, struct label *vplabel, struct label *newlabel) { struct mac_lomac *source, *dest; source = SLOT(newlabel); dest = SLOT(vplabel); try_relabel(source, dest); } static int lomac_vnode_setlabel_extattr(struct ucred *cred, struct vnode *vp, struct label *vplabel, struct label *intlabel) { struct mac_lomac *source, temp; size_t buflen; int error; buflen = sizeof(temp); bzero(&temp, buflen); source = SLOT(intlabel); if ((source->ml_flags & MAC_LOMAC_FLAG_SINGLE) == 0) return (0); lomac_copy_single(source, &temp); error = vn_extattr_set(vp, IO_NODELOCKED, MAC_LOMAC_EXTATTR_NAMESPACE, MAC_LOMAC_EXTATTR_NAME, buflen, (char *)&temp, curthread); return (error); } static struct mac_policy_ops lomac_ops = { .mpo_init = lomac_init, .mpo_destroy = lomac_fini, .mpo_bpfdesc_check_receive = lomac_bpfdesc_check_receive, .mpo_bpfdesc_create = lomac_bpfdesc_create, .mpo_bpfdesc_create_mbuf = lomac_bpfdesc_create_mbuf, .mpo_bpfdesc_destroy_label = lomac_destroy_label, .mpo_bpfdesc_init_label = lomac_init_label, .mpo_cred_check_relabel = lomac_cred_check_relabel, .mpo_cred_check_visible = lomac_cred_check_visible, .mpo_cred_copy_label = lomac_copy_label, .mpo_cred_create_swapper = lomac_cred_create_swapper, .mpo_cred_create_init = lomac_cred_create_init, .mpo_cred_destroy_label = lomac_destroy_label, .mpo_cred_externalize_label = lomac_externalize_label, .mpo_cred_init_label = lomac_init_label, .mpo_cred_internalize_label = lomac_internalize_label, .mpo_cred_relabel = lomac_cred_relabel, .mpo_devfs_create_device = lomac_devfs_create_device, .mpo_devfs_create_directory = lomac_devfs_create_directory, .mpo_devfs_create_symlink = lomac_devfs_create_symlink, .mpo_devfs_destroy_label = lomac_destroy_label, .mpo_devfs_init_label = lomac_init_label, .mpo_devfs_update = lomac_devfs_update, .mpo_devfs_vnode_associate = lomac_devfs_vnode_associate, .mpo_ifnet_check_relabel = lomac_ifnet_check_relabel, .mpo_ifnet_check_transmit = lomac_ifnet_check_transmit, .mpo_ifnet_copy_label = lomac_copy_label, .mpo_ifnet_create = lomac_ifnet_create, .mpo_ifnet_create_mbuf = lomac_ifnet_create_mbuf, .mpo_ifnet_destroy_label = lomac_destroy_label, .mpo_ifnet_externalize_label = lomac_externalize_label, .mpo_ifnet_init_label = lomac_init_label, .mpo_ifnet_internalize_label = lomac_internalize_label, .mpo_ifnet_relabel = lomac_ifnet_relabel, .mpo_syncache_create = lomac_syncache_create, .mpo_syncache_destroy_label = lomac_destroy_label, .mpo_syncache_init_label = lomac_init_label_waitcheck, .mpo_inpcb_check_deliver = lomac_inpcb_check_deliver, .mpo_inpcb_check_visible = lomac_inpcb_check_visible, .mpo_inpcb_create = lomac_inpcb_create, .mpo_inpcb_create_mbuf = lomac_inpcb_create_mbuf, .mpo_inpcb_destroy_label = lomac_destroy_label, .mpo_inpcb_init_label = lomac_init_label_waitcheck, .mpo_inpcb_sosetlabel = lomac_inpcb_sosetlabel, .mpo_ip6q_create = lomac_ip6q_create, .mpo_ip6q_destroy_label = lomac_destroy_label, .mpo_ip6q_init_label = lomac_init_label_waitcheck, .mpo_ip6q_match = lomac_ip6q_match, .mpo_ip6q_reassemble = lomac_ip6q_reassemble, .mpo_ip6q_update = lomac_ip6q_update, .mpo_ipq_create = lomac_ipq_create, .mpo_ipq_destroy_label = lomac_destroy_label, .mpo_ipq_init_label = lomac_init_label_waitcheck, .mpo_ipq_match = lomac_ipq_match, .mpo_ipq_reassemble = lomac_ipq_reassemble, .mpo_ipq_update = lomac_ipq_update, .mpo_kld_check_load = lomac_kld_check_load, .mpo_mbuf_copy_label = lomac_copy_label, .mpo_mbuf_destroy_label = lomac_destroy_label, .mpo_mbuf_init_label = lomac_init_label_waitcheck, .mpo_mount_create = lomac_mount_create, .mpo_mount_destroy_label = lomac_destroy_label, .mpo_mount_init_label = lomac_init_label, .mpo_netinet_arp_send = lomac_netinet_arp_send, .mpo_netinet_firewall_reply = lomac_netinet_firewall_reply, .mpo_netinet_firewall_send = lomac_netinet_firewall_send, .mpo_netinet_fragment = lomac_netinet_fragment, .mpo_netinet_icmp_reply = lomac_netinet_icmp_reply, .mpo_netinet_igmp_send = lomac_netinet_igmp_send, .mpo_netinet6_nd6_send = lomac_netinet6_nd6_send, .mpo_pipe_check_ioctl = lomac_pipe_check_ioctl, .mpo_pipe_check_read = lomac_pipe_check_read, .mpo_pipe_check_relabel = lomac_pipe_check_relabel, .mpo_pipe_check_write = lomac_pipe_check_write, .mpo_pipe_copy_label = lomac_copy_label, .mpo_pipe_create = lomac_pipe_create, .mpo_pipe_destroy_label = lomac_destroy_label, .mpo_pipe_externalize_label = lomac_externalize_label, .mpo_pipe_init_label = lomac_init_label, .mpo_pipe_internalize_label = lomac_internalize_label, .mpo_pipe_relabel = lomac_pipe_relabel, .mpo_priv_check = lomac_priv_check, .mpo_proc_check_debug = lomac_proc_check_debug, .mpo_proc_check_sched = lomac_proc_check_sched, .mpo_proc_check_signal = lomac_proc_check_signal, .mpo_proc_destroy_label = lomac_proc_destroy_label, .mpo_proc_init_label = lomac_proc_init_label, .mpo_socket_check_deliver = lomac_socket_check_deliver, .mpo_socket_check_relabel = lomac_socket_check_relabel, .mpo_socket_check_visible = lomac_socket_check_visible, .mpo_socket_copy_label = lomac_copy_label, .mpo_socket_create = lomac_socket_create, .mpo_socket_create_mbuf = lomac_socket_create_mbuf, .mpo_socket_destroy_label = lomac_destroy_label, .mpo_socket_externalize_label = lomac_externalize_label, .mpo_socket_init_label = lomac_init_label_waitcheck, .mpo_socket_internalize_label = lomac_internalize_label, .mpo_socket_newconn = lomac_socket_newconn, .mpo_socket_relabel = lomac_socket_relabel, .mpo_socketpeer_destroy_label = lomac_destroy_label, .mpo_socketpeer_externalize_label = lomac_externalize_label, .mpo_socketpeer_init_label = lomac_init_label_waitcheck, .mpo_socketpeer_set_from_mbuf = lomac_socketpeer_set_from_mbuf, .mpo_socketpeer_set_from_socket = lomac_socketpeer_set_from_socket, .mpo_syncache_create_mbuf = lomac_syncache_create_mbuf, .mpo_system_check_acct = lomac_system_check_acct, .mpo_system_check_auditctl = lomac_system_check_auditctl, .mpo_system_check_swapoff = lomac_system_check_swapoff, .mpo_system_check_swapon = lomac_system_check_swapon, .mpo_system_check_sysctl = lomac_system_check_sysctl, .mpo_thread_userret = lomac_thread_userret, .mpo_vnode_associate_extattr = lomac_vnode_associate_extattr, .mpo_vnode_associate_singlelabel = lomac_vnode_associate_singlelabel, .mpo_vnode_check_access = lomac_vnode_check_open, .mpo_vnode_check_create = lomac_vnode_check_create, .mpo_vnode_check_deleteacl = lomac_vnode_check_deleteacl, .mpo_vnode_check_link = lomac_vnode_check_link, .mpo_vnode_check_mmap = lomac_vnode_check_mmap, .mpo_vnode_check_mmap_downgrade = lomac_vnode_check_mmap_downgrade, .mpo_vnode_check_open = lomac_vnode_check_open, .mpo_vnode_check_read = lomac_vnode_check_read, .mpo_vnode_check_relabel = lomac_vnode_check_relabel, .mpo_vnode_check_rename_from = lomac_vnode_check_rename_from, .mpo_vnode_check_rename_to = lomac_vnode_check_rename_to, .mpo_vnode_check_revoke = lomac_vnode_check_revoke, .mpo_vnode_check_setacl = lomac_vnode_check_setacl, .mpo_vnode_check_setextattr = lomac_vnode_check_setextattr, .mpo_vnode_check_setflags = lomac_vnode_check_setflags, .mpo_vnode_check_setmode = lomac_vnode_check_setmode, .mpo_vnode_check_setowner = lomac_vnode_check_setowner, .mpo_vnode_check_setutimes = lomac_vnode_check_setutimes, .mpo_vnode_check_unlink = lomac_vnode_check_unlink, .mpo_vnode_check_write = lomac_vnode_check_write, .mpo_vnode_copy_label = lomac_copy_label, .mpo_vnode_create_extattr = lomac_vnode_create_extattr, .mpo_vnode_destroy_label = lomac_destroy_label, .mpo_vnode_execve_transition = lomac_vnode_execve_transition, .mpo_vnode_execve_will_transition = lomac_vnode_execve_will_transition, .mpo_vnode_externalize_label = lomac_externalize_label, .mpo_vnode_init_label = lomac_init_label, .mpo_vnode_internalize_label = lomac_internalize_label, .mpo_vnode_relabel = lomac_vnode_relabel, .mpo_vnode_setlabel_extattr = lomac_vnode_setlabel_extattr, }; MAC_POLICY_SET(&lomac_ops, mac_lomac, "TrustedBSD MAC/LOMAC", MPC_LOADTIME_FLAG_NOTLATE, &lomac_slot); diff --git a/sys/sys/priv.h b/sys/sys/priv.h index 45cb5bab4275..a61de8d32fe0 100644 --- a/sys/sys/priv.h +++ b/sys/sys/priv.h @@ -1,558 +1,559 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2006 nCircle Network Security, Inc. * All rights reserved. * * This software was developed by Robert N. M. Watson for the TrustedBSD * Project under contract to nCircle Network Security, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR, NCIRCLE NETWORK SECURITY, * INC., OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * Privilege checking interface for BSD kernel. */ #ifndef _SYS_PRIV_H_ #define _SYS_PRIV_H_ /* * Privilege list, sorted loosely by kernel subsystem. * * Think carefully before adding or reusing one of these privileges -- are * there existing instances referring to the same privilege? Third party * vendors may request the assignment of privileges to be used in loadable * modules. Particular numeric privilege assignments are part of the * loadable kernel module ABI, and should not be changed across minor * releases. * * When adding a new privilege, remember to determine if it's appropriate * for use in jail, and update the privilege switch in prison_priv_check() * in kern_jail.c as necessary. */ /* * Track beginning of privilege list. */ #define _PRIV_LOWEST 1 /* * The remaining privileges typically correspond to one or a small * number of specific privilege checks, and have (relatively) precise * meanings. They are loosely sorted into a set of base system * privileges, such as the ability to reboot, and then loosely by * subsystem, indicated by a subsystem name. */ #define _PRIV_ROOT 1 /* Removed. */ #define PRIV_ACCT 2 /* Manage process accounting. */ #define PRIV_MAXFILES 3 /* Exceed system open files limit. */ #define PRIV_MAXPROC 4 /* Exceed system processes limit. */ #define PRIV_KTRACE 5 /* Set/clear KTRFAC_ROOT on ktrace. */ #define PRIV_SETDUMPER 6 /* Configure dump device. */ #define PRIV_REBOOT 8 /* Can reboot system. */ #define PRIV_SWAPON 9 /* Can swapon(). */ #define PRIV_SWAPOFF 10 /* Can swapoff(). */ #define PRIV_MSGBUF 11 /* Can read kernel message buffer. */ #define PRIV_IO 12 /* Can perform low-level I/O. */ #define PRIV_KEYBOARD 13 /* Reprogram keyboard. */ #define PRIV_DRIVER 14 /* Low-level driver privilege. */ #define PRIV_ADJTIME 15 /* Set time adjustment. */ #define PRIV_NTP_ADJTIME 16 /* Set NTP time adjustment. */ #define PRIV_CLOCK_SETTIME 17 /* Can call clock_settime. */ #define PRIV_SETTIMEOFDAY 18 /* Can call settimeofday. */ #define _PRIV_SETHOSTID 19 /* Removed. */ #define _PRIV_SETDOMAINNAME 20 /* Removed. */ /* * Audit subsystem privileges. */ #define PRIV_AUDIT_CONTROL 40 /* Can configure audit. */ #define PRIV_AUDIT_FAILSTOP 41 /* Can run during audit fail stop. */ #define PRIV_AUDIT_GETAUDIT 42 /* Can get proc audit properties. */ #define PRIV_AUDIT_SETAUDIT 43 /* Can set proc audit properties. */ #define PRIV_AUDIT_SUBMIT 44 /* Can submit an audit record. */ /* * Credential management privileges. */ #define PRIV_CRED_SETUID 50 /* setuid. */ #define PRIV_CRED_SETEUID 51 /* seteuid to !ruid and !svuid. */ #define PRIV_CRED_SETGID 52 /* setgid. */ #define PRIV_CRED_SETEGID 53 /* setgid to !rgid and !svgid. */ #define PRIV_CRED_SETGROUPS 54 /* Set process additional groups. */ #define PRIV_CRED_SETREUID 55 /* setreuid. */ #define PRIV_CRED_SETREGID 56 /* setregid. */ #define PRIV_CRED_SETRESUID 57 /* setresuid. */ #define PRIV_CRED_SETRESGID 58 /* setresgid. */ #define PRIV_SEEOTHERGIDS 59 /* Exempt bsd.seeothergids. */ #define PRIV_SEEOTHERUIDS 60 /* Exempt bsd.seeotheruids. */ +#define PRIV_SEEJAILPROC 61 /* Exempt from bsd.see_jail_proc. */ /* * Debugging privileges. */ #define PRIV_DEBUG_DIFFCRED 80 /* Exempt debugging other users. */ #define PRIV_DEBUG_SUGID 81 /* Exempt debugging setuid proc. */ #define PRIV_DEBUG_UNPRIV 82 /* Exempt unprivileged debug limit. */ #define PRIV_DEBUG_DENIED 83 /* Exempt P2_NOTRACE. */ /* * Dtrace privileges. */ #define PRIV_DTRACE_KERNEL 90 /* Allow use of DTrace on the kernel. */ #define PRIV_DTRACE_PROC 91 /* Allow attaching DTrace to process. */ #define PRIV_DTRACE_USER 92 /* Process may submit DTrace events. */ /* * Firmware privilegs. */ #define PRIV_FIRMWARE_LOAD 100 /* Can load firmware. */ /* * Jail privileges. */ #define PRIV_JAIL_ATTACH 110 /* Attach to a jail. */ #define PRIV_JAIL_SET 111 /* Set jail parameters. */ #define PRIV_JAIL_REMOVE 112 /* Remove a jail. */ /* * Kernel environment privileges. */ #define PRIV_KENV_SET 120 /* Set kernel env. variables. */ #define PRIV_KENV_UNSET 121 /* Unset kernel env. variables. */ /* * Loadable kernel module privileges. */ #define PRIV_KLD_LOAD 130 /* Load a kernel module. */ #define PRIV_KLD_UNLOAD 131 /* Unload a kernel module. */ /* * Privileges associated with the MAC Framework and specific MAC policy * modules. */ #define PRIV_MAC_PARTITION 140 /* Privilege in mac_partition policy. */ #define PRIV_MAC_PRIVS 141 /* Privilege in the mac_privs policy. */ /* * Process-related privileges. */ #define PRIV_PROC_LIMIT 160 /* Exceed user process limit. */ #define PRIV_PROC_SETLOGIN 161 /* Can call setlogin. */ #define PRIV_PROC_SETRLIMIT 162 /* Can raise resources limits. */ #define PRIV_PROC_SETLOGINCLASS 163 /* Can call setloginclass(2). */ /* * System V IPC privileges. */ #define PRIV_IPC_READ 170 /* Can override IPC read perm. */ #define PRIV_IPC_WRITE 171 /* Can override IPC write perm. */ #define PRIV_IPC_ADMIN 172 /* Can override IPC owner-only perm. */ #define PRIV_IPC_MSGSIZE 173 /* Exempt IPC message queue limit. */ /* * POSIX message queue privileges. */ #define PRIV_MQ_ADMIN 180 /* Can override msgq owner-only perm. */ /* * Performance monitoring counter privileges. */ #define PRIV_PMC_MANAGE 190 /* Can administer PMC. */ #define PRIV_PMC_SYSTEM 191 /* Can allocate a system-wide PMC. */ /* * Scheduling privileges. */ #define PRIV_SCHED_DIFFCRED 200 /* Exempt scheduling other users. */ #define PRIV_SCHED_SETPRIORITY 201 /* Can set lower nice value for proc. */ #define PRIV_SCHED_RTPRIO 202 /* Can set real time scheduling. */ #define PRIV_SCHED_SETPOLICY 203 /* Can set scheduler policy. */ #define PRIV_SCHED_SET 204 /* Can set thread scheduler. */ #define PRIV_SCHED_SETPARAM 205 /* Can set thread scheduler params. */ #define PRIV_SCHED_CPUSET 206 /* Can manipulate cpusets. */ #define PRIV_SCHED_CPUSET_INTR 207 /* Can adjust IRQ to CPU binding. */ #define PRIV_SCHED_IDPRIO 208 /* Can set idle time scheduling. */ /* * POSIX semaphore privileges. */ #define PRIV_SEM_WRITE 220 /* Can override sem write perm. */ /* * Signal privileges. */ #define PRIV_SIGNAL_DIFFCRED 230 /* Exempt signalling other users. */ #define PRIV_SIGNAL_SUGID 231 /* Non-conserv signal setuid proc. */ /* * Sysctl privileges. */ #define PRIV_SYSCTL_DEBUG 240 /* Can invoke sysctl.debug. */ #define PRIV_SYSCTL_WRITE 241 /* Can write sysctls. */ #define PRIV_SYSCTL_WRITEJAIL 242 /* Can write sysctls, jail permitted. */ /* * TTY privileges. */ #define PRIV_TTY_CONSOLE 250 /* Set console to tty. */ #define PRIV_TTY_DRAINWAIT 251 /* Set tty drain wait time. */ #define PRIV_TTY_DTRWAIT 252 /* Set DTR wait on tty. */ #define PRIV_TTY_EXCLUSIVE 253 /* Override tty exclusive flag. */ #define _PRIV_TTY_PRISON 254 /* Removed. */ #define PRIV_TTY_STI 255 /* Simulate input on another tty. */ #define PRIV_TTY_SETA 256 /* Set tty termios structure. */ /* * UFS-specific privileges. */ #define PRIV_UFS_EXTATTRCTL 270 /* Can configure EAs on UFS1. */ #define PRIV_UFS_QUOTAOFF 271 /* quotaoff(). */ #define PRIV_UFS_QUOTAON 272 /* quotaon(). */ #define PRIV_UFS_SETUSE 273 /* setuse(). */ /* * ZFS-specific privileges. */ #define PRIV_ZFS_POOL_CONFIG 280 /* Can configure ZFS pools. */ #define PRIV_ZFS_INJECT 281 /* Can inject faults in the ZFS fault injection framework. */ #define PRIV_ZFS_JAIL 282 /* Can attach/detach ZFS file systems to/from jails. */ /* * NFS-specific privileges. */ #define PRIV_NFS_DAEMON 290 /* Can become the NFS daemon. */ #define PRIV_NFS_LOCKD 291 /* Can become NFS lock daemon. */ /* * VFS privileges. */ #define PRIV_VFS_READ 310 /* Override vnode DAC read perm. */ #define PRIV_VFS_WRITE 311 /* Override vnode DAC write perm. */ #define PRIV_VFS_ADMIN 312 /* Override vnode DAC admin perm. */ #define PRIV_VFS_EXEC 313 /* Override vnode DAC exec perm. */ #define PRIV_VFS_LOOKUP 314 /* Override vnode DAC lookup perm. */ #define PRIV_VFS_BLOCKRESERVE 315 /* Can use free block reserve. */ #define PRIV_VFS_CHFLAGS_DEV 316 /* Can chflags() a device node. */ #define PRIV_VFS_CHOWN 317 /* Can set user; group to non-member. */ #define PRIV_VFS_CHROOT 318 /* chroot(). */ #define PRIV_VFS_RETAINSUGID 319 /* Can retain sugid bits on change. */ #define PRIV_VFS_EXCEEDQUOTA 320 /* Exempt from quota restrictions. */ #define PRIV_VFS_EXTATTR_SYSTEM 321 /* Operate on system EA namespace. */ #define PRIV_VFS_FCHROOT 322 /* fchroot(). */ #define PRIV_VFS_FHOPEN 323 /* Can fhopen(). */ #define PRIV_VFS_FHSTAT 324 /* Can fhstat(). */ #define PRIV_VFS_FHSTATFS 325 /* Can fhstatfs(). */ #define PRIV_VFS_GENERATION 326 /* stat() returns generation number. */ #define PRIV_VFS_GETFH 327 /* Can retrieve file handles. */ #define PRIV_VFS_GETQUOTA 328 /* getquota(). */ #define PRIV_VFS_LINK 329 /* bsd.hardlink_check_uid */ #define PRIV_VFS_MKNOD_BAD 330 /* Was: mknod() can mark bad inodes. */ #define PRIV_VFS_MKNOD_DEV 331 /* Can mknod() to create dev nodes. */ #define PRIV_VFS_MKNOD_WHT 332 /* Can mknod() to create whiteout. */ #define PRIV_VFS_MOUNT 333 /* Can mount(). */ #define PRIV_VFS_MOUNT_OWNER 334 /* Can manage other users' file systems. */ #define PRIV_VFS_MOUNT_EXPORTED 335 /* Can set MNT_EXPORTED on mount. */ #define PRIV_VFS_MOUNT_PERM 336 /* Override dev node perms at mount. */ #define PRIV_VFS_MOUNT_SUIDDIR 337 /* Can set MNT_SUIDDIR on mount. */ #define PRIV_VFS_MOUNT_NONUSER 338 /* Can perform a non-user mount. */ #define PRIV_VFS_SETGID 339 /* Can setgid if not in group. */ #define PRIV_VFS_SETQUOTA 340 /* setquota(). */ #define PRIV_VFS_STICKYFILE 341 /* Can set sticky bit on file. */ #define PRIV_VFS_SYSFLAGS 342 /* Can modify system flags. */ #define PRIV_VFS_UNMOUNT 343 /* Can unmount(). */ #define PRIV_VFS_STAT 344 /* Override vnode MAC stat perm. */ #define PRIV_VFS_READ_DIR 345 /* Can read(2) a dirfd, needs sysctl. */ /* * Virtual memory privileges. */ #define PRIV_VM_MADV_PROTECT 360 /* Can set MADV_PROTECT. */ #define PRIV_VM_MLOCK 361 /* Can mlock(), mlockall(). */ #define PRIV_VM_MUNLOCK 362 /* Can munlock(), munlockall(). */ #define PRIV_VM_SWAP_NOQUOTA 363 /* * Can override the global * swap reservation limits. */ #define PRIV_VM_SWAP_NORLIMIT 364 /* * Can override the per-uid * swap reservation limits. */ /* * Device file system privileges. */ #define PRIV_DEVFS_RULE 370 /* Can manage devfs rules. */ #define PRIV_DEVFS_SYMLINK 371 /* Can create symlinks in devfs. */ /* * Random number generator privileges. */ #define PRIV_RANDOM_RESEED 380 /* Closing /dev/random reseeds. */ /* * Network stack privileges. */ #define PRIV_NET_BRIDGE 390 /* Administer bridge. */ #define PRIV_NET_GRE 391 /* Administer GRE. */ #define _PRIV_NET_PPP 392 /* Removed. */ #define _PRIV_NET_SLIP 393 /* Removed. */ #define PRIV_NET_BPF 394 /* Monitor BPF. */ #define PRIV_NET_RAW 395 /* Open raw socket. */ #define PRIV_NET_ROUTE 396 /* Administer routing. */ #define PRIV_NET_TAP 397 /* Can open tap device. */ #define PRIV_NET_SETIFMTU 398 /* Set interface MTU. */ #define PRIV_NET_SETIFFLAGS 399 /* Set interface flags. */ #define PRIV_NET_SETIFCAP 400 /* Set interface capabilities. */ #define PRIV_NET_SETIFNAME 401 /* Set interface name. */ #define PRIV_NET_SETIFMETRIC 402 /* Set interface metrics. */ #define PRIV_NET_SETIFPHYS 403 /* Set interface physical layer prop. */ #define PRIV_NET_SETIFMAC 404 /* Set interface MAC label. */ #define PRIV_NET_ADDMULTI 405 /* Add multicast addr. to ifnet. */ #define PRIV_NET_DELMULTI 406 /* Delete multicast addr. from ifnet. */ #define PRIV_NET_HWIOCTL 407 /* Issue hardware ioctl on ifnet. */ #define PRIV_NET_SETLLADDR 408 /* Set interface link-level address. */ #define PRIV_NET_ADDIFGROUP 409 /* Add new interface group. */ #define PRIV_NET_DELIFGROUP 410 /* Delete interface group. */ #define PRIV_NET_IFCREATE 411 /* Create cloned interface. */ #define PRIV_NET_IFDESTROY 412 /* Destroy cloned interface. */ #define PRIV_NET_ADDIFADDR 413 /* Add protocol addr to interface. */ #define PRIV_NET_DELIFADDR 414 /* Delete protocol addr on interface. */ #define PRIV_NET_LAGG 415 /* Administer lagg interface. */ #define PRIV_NET_GIF 416 /* Administer gif interface. */ #define PRIV_NET_SETIFVNET 417 /* Move interface to vnet. */ #define PRIV_NET_SETIFDESCR 418 /* Set interface description. */ #define PRIV_NET_SETIFFIB 419 /* Set interface fib. */ #define PRIV_NET_VXLAN 420 /* Administer vxlan. */ #define PRIV_NET_SETLANPCP 421 /* Set LAN priority. */ #define PRIV_NET_SETVLANPCP PRIV_NET_SETLANPCP /* Alias Set VLAN priority */ #define PRIV_NET_OVPN 422 /* Administer OpenVPN DCO. */ #define PRIV_NET_ME 423 /* Administer ME interface. */ #define PRIV_NET_WG 424 /* Administer WireGuard interface. */ /* * 802.11-related privileges. */ #define PRIV_NET80211_VAP_GETKEY 440 /* Query VAP 802.11 keys. */ #define PRIV_NET80211_VAP_MANAGE 441 /* Administer 802.11 VAP */ #define PRIV_NET80211_VAP_SETMAC 442 /* Set VAP MAC address */ #define PRIV_NET80211_CREATE_VAP 443 /* Create a new VAP */ /* * Placeholder for AppleTalk privileges, not supported anymore. */ #define _PRIV_NETATALK_RESERVEDPORT 450 /* Bind low port number. */ /* * ATM privileges. */ #define PRIV_NETATM_CFG 460 #define PRIV_NETATM_ADD 461 #define PRIV_NETATM_DEL 462 #define PRIV_NETATM_SET 463 /* * Bluetooth privileges. */ #define PRIV_NETBLUETOOTH_RAW 470 /* Open raw bluetooth socket. */ /* * Netgraph and netgraph module privileges. */ #define PRIV_NETGRAPH_CONTROL 480 /* Open netgraph control socket. */ #define PRIV_NETGRAPH_TTY 481 /* Configure tty for netgraph. */ /* * IPv4 and IPv6 privileges. */ #define PRIV_NETINET_RESERVEDPORT 490 /* Bind low port number. */ #define PRIV_NETINET_IPFW 491 /* Administer IPFW firewall. */ #define PRIV_NETINET_DIVERT 492 /* Open IP divert socket. */ #define PRIV_NETINET_PF 493 /* Administer pf firewall. */ #define PRIV_NETINET_DUMMYNET 494 /* Administer DUMMYNET. */ #define PRIV_NETINET_CARP 495 /* Administer CARP. */ #define PRIV_NETINET_MROUTE 496 /* Administer multicast routing. */ #define PRIV_NETINET_RAW 497 /* Open netinet raw socket. */ #define PRIV_NETINET_GETCRED 498 /* Query netinet pcb credentials. */ #define PRIV_NETINET_ADDRCTRL6 499 /* Administer IPv6 address scopes. */ #define PRIV_NETINET_ND6 500 /* Administer IPv6 neighbor disc. */ #define PRIV_NETINET_SCOPE6 501 /* Administer IPv6 address scopes. */ #define PRIV_NETINET_ALIFETIME6 502 /* Administer IPv6 address lifetimes. */ #define PRIV_NETINET_IPSEC 503 /* Administer IPSEC. */ #define PRIV_NETINET_REUSEPORT 504 /* Allow [rapid] port/address reuse. */ #define PRIV_NETINET_SETHDROPTS 505 /* Set certain IPv4/6 header options. */ #define PRIV_NETINET_BINDANY 506 /* Allow bind to any address. */ #define PRIV_NETINET_HASHKEY 507 /* Get and set hash keys for IPv4/6. */ /* * Placeholders for IPX/SPX privileges, not supported any more. */ #define _PRIV_NETIPX_RESERVEDPORT 520 /* Bind low port number. */ #define _PRIV_NETIPX_RAW 521 /* Open netipx raw socket. */ /* * NCP privileges. */ #define PRIV_NETNCP 530 /* Use another user's connection. */ /* * SMB privileges. */ #define PRIV_NETSMB 540 /* Use another user's connection. */ /* * VM86 privileges. */ #define PRIV_VM86_INTCALL 550 /* Allow invoking vm86 int handlers. */ /* * Set of reserved privilege values, which will be allocated to code as * needed, in order to avoid renumbering later privileges due to insertion. */ #define _PRIV_RESERVED0 560 #define _PRIV_RESERVED1 561 #define _PRIV_RESERVED2 562 #define _PRIV_RESERVED3 563 #define _PRIV_RESERVED4 564 #define _PRIV_RESERVED5 565 #define _PRIV_RESERVED6 566 #define _PRIV_RESERVED7 567 #define _PRIV_RESERVED8 568 #define _PRIV_RESERVED9 569 #define _PRIV_RESERVED10 570 #define _PRIV_RESERVED11 571 #define _PRIV_RESERVED12 572 #define _PRIV_RESERVED13 573 #define _PRIV_RESERVED14 574 #define _PRIV_RESERVED15 575 /* * Define a set of valid privilege numbers that can be used by loadable * modules that don't yet have privilege reservations. Ideally, these should * not be used, since their meaning is opaque to any policies that are aware * of specific privileges, such as jail, and as such may be arbitrarily * denied. */ #define PRIV_MODULE0 600 #define PRIV_MODULE1 601 #define PRIV_MODULE2 602 #define PRIV_MODULE3 603 #define PRIV_MODULE4 604 #define PRIV_MODULE5 605 #define PRIV_MODULE6 606 #define PRIV_MODULE7 607 #define PRIV_MODULE8 608 #define PRIV_MODULE9 609 #define PRIV_MODULE10 610 #define PRIV_MODULE11 611 #define PRIV_MODULE12 612 #define PRIV_MODULE13 613 #define PRIV_MODULE14 614 #define PRIV_MODULE15 615 /* * DDB(4) privileges. */ #define PRIV_DDB_CAPTURE 620 /* Allow reading of DDB capture log. */ /* * Arla/nnpfs privileges. */ #define PRIV_NNPFS_DEBUG 630 /* Perforn ARLA_VIOC_NNPFSDEBUG. */ /* * cpuctl(4) privileges. */ #define PRIV_CPUCTL_WRMSR 640 /* Write model-specific register. */ #define PRIV_CPUCTL_UPDATE 641 /* Update cpu microcode. */ /* * Capi4BSD privileges. */ #define PRIV_C4B_RESET_CTLR 650 /* Load firmware, reset controller. */ #define PRIV_C4B_TRACE 651 /* Unrestricted CAPI message tracing. */ /* * OpenAFS privileges. */ #define PRIV_AFS_ADMIN 660 /* Can change AFS client settings. */ #define PRIV_AFS_DAEMON 661 /* Can become the AFS daemon. */ /* * Resource Limits privileges. */ #define PRIV_RCTL_GET_RACCT 670 #define PRIV_RCTL_GET_RULES 671 #define PRIV_RCTL_GET_LIMITS 672 #define PRIV_RCTL_ADD_RULE 673 #define PRIV_RCTL_REMOVE_RULE 674 /* * mem(4) privileges. */ #define PRIV_KMEM_READ 680 /* Open mem/kmem for reading. */ #define PRIV_KMEM_WRITE 681 /* Open mem/kmem for writing. */ /* * Kernel debugger privileges. */ #define PRIV_KDB_SET_BACKEND 690 /* Allow setting KDB backend. */ /* * veriexec override privileges - very rare! */ #define PRIV_VERIEXEC_DIRECT 700 /* Can override 'indirect' */ #define PRIV_VERIEXEC_NOVERIFY 701 /* Can override O_VERIFY */ #define PRIV_VERIEXEC_CONTROL 702 /* Can configure veriexec */ /* * Track end of privilege list. */ #define _PRIV_HIGHEST 703 /* * Validate that a named privilege is known by the privilege system. Invalid * privileges presented to the privilege system by a priv_check interface * will result in a panic. This is only approximate due to sparse allocation * of the privilege space. */ #define PRIV_VALID(x) ((x) > _PRIV_LOWEST && (x) < _PRIV_HIGHEST) #ifdef _KERNEL /* * Privilege check interfaces, modeled after historic suser() interfaces, but * with the addition of a specific privilege name. No flags are currently * defined for the API. Historically, flags specified using the real uid * instead of the effective uid, and whether or not the check should be * allowed in jail. */ struct thread; struct ucred; int priv_check(struct thread *td, int priv); int priv_check_cred(struct ucred *cred, int priv); int priv_check_cred_vfs_lookup(struct ucred *cred); int priv_check_cred_vfs_lookup_nomac(struct ucred *cred); int priv_check_cred_vfs_generation(struct ucred *cred); #endif #endif /* !_SYS_PRIV_H_ */