Index: head/sys/kern/sys_pipe.c =================================================================== --- head/sys/kern/sys_pipe.c (revision 367832) +++ head/sys/kern/sys_pipe.c (revision 367833) @@ -1,1814 +1,1813 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 1996 John S. Dyson * Copyright (c) 2012 Giovanni Trematerra * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice immediately at the beginning of the file, without modification, * this list of conditions, and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Absolutely no warranty of function or purpose is made by the author * John S. Dyson. * 4. Modifications may be freely made to this file if the above conditions * are met. */ /* * This file contains a high-performance replacement for the socket-based * pipes scheme originally used in FreeBSD/4.4Lite. It does not support * all features of sockets, but does do everything that pipes normally * do. */ /* * This code has two modes of operation, a small write mode and a large * write mode. The small write mode acts like conventional pipes with * a kernel buffer. If the buffer is less than PIPE_MINDIRECT, then the * "normal" pipe buffering is done. If the buffer is between PIPE_MINDIRECT * and PIPE_SIZE in size, the sending process pins the underlying pages in * memory, and the receiving process copies directly from these pinned pages * in the sending process. * * If the sending process receives a signal, it is possible that it will * go away, and certainly its address space can change, because control * is returned back to the user-mode side. In that case, the pipe code * arranges to copy the buffer supplied by the user process, to a pageable * kernel buffer, and the receiving process will grab the data from the * pageable kernel buffer. Since signals don't happen all that often, * the copy operation is normally eliminated. * * The constant PIPE_MINDIRECT is chosen to make sure that buffering will * happen for small transfers so that the system will not spend all of * its time context switching. * * In order to limit the resource use of pipes, two sysctls exist: * * kern.ipc.maxpipekva - This is a hard limit on the amount of pageable * address space available to us in pipe_map. This value is normally * autotuned, but may also be loader tuned. * * kern.ipc.pipekva - This read-only sysctl tracks the current amount of * memory in use by pipes. * * Based on how large pipekva is relative to maxpipekva, the following * will happen: * * 0% - 50%: * New pipes are given 16K of memory backing, pipes may dynamically * grow to as large as 64K where needed. * 50% - 75%: * New pipes are given 4K (or PAGE_SIZE) of memory backing, * existing pipes may NOT grow. * 75% - 100%: * New pipes are given 4K (or PAGE_SIZE) of memory backing, * existing pipes will be shrunk down to 4K whenever possible. * * Resizing may be disabled by setting kern.ipc.piperesizeallowed=0. If * that is set, the only resize that will occur is the 0 -> SMALL_PIPE_SIZE * resize which MUST occur for reverse-direction pipes when they are * first used. * * Additional information about the current state of pipes may be obtained * from kern.ipc.pipes, kern.ipc.pipefragretry, kern.ipc.pipeallocfail, * and kern.ipc.piperesizefail. * * Locking rules: There are two locks present here: A mutex, used via * PIPE_LOCK, and a flag, used via pipelock(). All locking is done via * the flag, as mutexes can not persist over uiomove. The mutex * exists only to guard access to the flag, and is not in itself a * locking mechanism. Also note that there is only a single mutex for * both directions of a pipe. * * As pipelock() may have to sleep before it can acquire the flag, it * is important to reread all data after a call to pipelock(); everything * in the structure may have changed. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Use this define if you want to disable *fancy* VM things. Expect an * approx 30% decrease in transfer rate. This could be useful for * NetBSD or OpenBSD. */ /* #define PIPE_NODIRECT */ #define PIPE_PEER(pipe) \ - (((pipe)->pipe_state & PIPE_NAMED) ? (pipe) : ((pipe)->pipe_peer)) + (((pipe)->pipe_type & PIPE_TYPE_NAMED) ? (pipe) : ((pipe)->pipe_peer)) /* * interfaces to the outside world */ static fo_rdwr_t pipe_read; static fo_rdwr_t pipe_write; static fo_truncate_t pipe_truncate; static fo_ioctl_t pipe_ioctl; static fo_poll_t pipe_poll; static fo_kqfilter_t pipe_kqfilter; static fo_stat_t pipe_stat; static fo_close_t pipe_close; static fo_chmod_t pipe_chmod; static fo_chown_t pipe_chown; static fo_fill_kinfo_t pipe_fill_kinfo; struct fileops pipeops = { .fo_read = pipe_read, .fo_write = pipe_write, .fo_truncate = pipe_truncate, .fo_ioctl = pipe_ioctl, .fo_poll = pipe_poll, .fo_kqfilter = pipe_kqfilter, .fo_stat = pipe_stat, .fo_close = pipe_close, .fo_chmod = pipe_chmod, .fo_chown = pipe_chown, .fo_sendfile = invfo_sendfile, .fo_fill_kinfo = pipe_fill_kinfo, .fo_flags = DFLAG_PASSABLE }; static void filt_pipedetach(struct knote *kn); static void filt_pipedetach_notsup(struct knote *kn); static int filt_pipenotsup(struct knote *kn, long hint); static int filt_piperead(struct knote *kn, long hint); static int filt_pipewrite(struct knote *kn, long hint); static struct filterops pipe_nfiltops = { .f_isfd = 1, .f_detach = filt_pipedetach_notsup, .f_event = filt_pipenotsup }; static struct filterops pipe_rfiltops = { .f_isfd = 1, .f_detach = filt_pipedetach, .f_event = filt_piperead }; static struct filterops pipe_wfiltops = { .f_isfd = 1, .f_detach = filt_pipedetach, .f_event = filt_pipewrite }; /* * Default pipe buffer size(s), this can be kind-of large now because pipe * space is pageable. The pipe code will try to maintain locality of * reference for performance reasons, so small amounts of outstanding I/O * will not wipe the cache. */ #define MINPIPESIZE (PIPE_SIZE/3) #define MAXPIPESIZE (2*PIPE_SIZE/3) static long amountpipekva; static int pipefragretry; static int pipeallocfail; static int piperesizefail; static int piperesizeallowed = 1; SYSCTL_LONG(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &maxpipekva, 0, "Pipe KVA limit"); SYSCTL_LONG(_kern_ipc, OID_AUTO, pipekva, CTLFLAG_RD, &amountpipekva, 0, "Pipe KVA usage"); SYSCTL_INT(_kern_ipc, OID_AUTO, pipefragretry, CTLFLAG_RD, &pipefragretry, 0, "Pipe allocation retries due to fragmentation"); SYSCTL_INT(_kern_ipc, OID_AUTO, pipeallocfail, CTLFLAG_RD, &pipeallocfail, 0, "Pipe allocation failures"); SYSCTL_INT(_kern_ipc, OID_AUTO, piperesizefail, CTLFLAG_RD, &piperesizefail, 0, "Pipe resize failures"); SYSCTL_INT(_kern_ipc, OID_AUTO, piperesizeallowed, CTLFLAG_RW, &piperesizeallowed, 0, "Pipe resizing allowed"); static void pipeinit(void *dummy __unused); static void pipeclose(struct pipe *cpipe); static void pipe_free_kmem(struct pipe *cpipe); static int pipe_create(struct pipe *pipe, bool backing); static int pipe_paircreate(struct thread *td, struct pipepair **p_pp); static __inline int pipelock(struct pipe *cpipe, int catch); static __inline void pipeunlock(struct pipe *cpipe); static void pipe_timestamp(struct timespec *tsp); #ifndef PIPE_NODIRECT static int pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio); static void pipe_destroy_write_buffer(struct pipe *wpipe); static int pipe_direct_write(struct pipe *wpipe, struct uio *uio); static void pipe_clone_write_buffer(struct pipe *wpipe); #endif static int pipespace(struct pipe *cpipe, int size); static int pipespace_new(struct pipe *cpipe, int size); static int pipe_zone_ctor(void *mem, int size, void *arg, int flags); static int pipe_zone_init(void *mem, int size, int flags); static void pipe_zone_fini(void *mem, int size); static uma_zone_t pipe_zone; static struct unrhdr64 pipeino_unr; static dev_t pipedev_ino; SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL); static void pipeinit(void *dummy __unused) { pipe_zone = uma_zcreate("pipe", sizeof(struct pipepair), pipe_zone_ctor, NULL, pipe_zone_init, pipe_zone_fini, UMA_ALIGN_PTR, 0); KASSERT(pipe_zone != NULL, ("pipe_zone not initialized")); new_unrhdr64(&pipeino_unr, 1); pipedev_ino = devfs_alloc_cdp_inode(); KASSERT(pipedev_ino > 0, ("pipe dev inode not initialized")); } static int pipe_zone_ctor(void *mem, int size, void *arg, int flags) { struct pipepair *pp; struct pipe *rpipe, *wpipe; KASSERT(size == sizeof(*pp), ("pipe_zone_ctor: wrong size")); pp = (struct pipepair *)mem; /* * We zero both pipe endpoints to make sure all the kmem pointers * are NULL, flag fields are zero'd, etc. We timestamp both * endpoints with the same time. */ rpipe = &pp->pp_rpipe; bzero(rpipe, sizeof(*rpipe)); pipe_timestamp(&rpipe->pipe_ctime); rpipe->pipe_atime = rpipe->pipe_mtime = rpipe->pipe_ctime; wpipe = &pp->pp_wpipe; bzero(wpipe, sizeof(*wpipe)); wpipe->pipe_ctime = rpipe->pipe_ctime; wpipe->pipe_atime = wpipe->pipe_mtime = rpipe->pipe_ctime; rpipe->pipe_peer = wpipe; rpipe->pipe_pair = pp; wpipe->pipe_peer = rpipe; wpipe->pipe_pair = pp; /* * Mark both endpoints as present; they will later get free'd * one at a time. When both are free'd, then the whole pair * is released. */ rpipe->pipe_present = PIPE_ACTIVE; wpipe->pipe_present = PIPE_ACTIVE; /* * Eventually, the MAC Framework may initialize the label * in ctor or init, but for now we do it elswhere to avoid * blocking in ctor or init. */ pp->pp_label = NULL; return (0); } static int pipe_zone_init(void *mem, int size, int flags) { struct pipepair *pp; KASSERT(size == sizeof(*pp), ("pipe_zone_init: wrong size")); pp = (struct pipepair *)mem; mtx_init(&pp->pp_mtx, "pipe mutex", NULL, MTX_DEF | MTX_NEW); return (0); } static void pipe_zone_fini(void *mem, int size) { struct pipepair *pp; KASSERT(size == sizeof(*pp), ("pipe_zone_fini: wrong size")); pp = (struct pipepair *)mem; mtx_destroy(&pp->pp_mtx); } static int pipe_paircreate(struct thread *td, struct pipepair **p_pp) { struct pipepair *pp; struct pipe *rpipe, *wpipe; int error; *p_pp = pp = uma_zalloc(pipe_zone, M_WAITOK); #ifdef MAC /* * The MAC label is shared between the connected endpoints. As a * result mac_pipe_init() and mac_pipe_create() are called once * for the pair, and not on the endpoints. */ mac_pipe_init(pp); mac_pipe_create(td->td_ucred, pp); #endif rpipe = &pp->pp_rpipe; wpipe = &pp->pp_wpipe; knlist_init_mtx(&rpipe->pipe_sel.si_note, PIPE_MTX(rpipe)); knlist_init_mtx(&wpipe->pipe_sel.si_note, PIPE_MTX(wpipe)); /* * Only the forward direction pipe is backed by big buffer by * default. */ error = pipe_create(rpipe, true); if (error != 0) goto fail; error = pipe_create(wpipe, false); if (error != 0) { /* * This cleanup leaves the pipe inode number for rpipe * still allocated, but never used. We do not free * inode numbers for opened pipes, which is required * for correctness because numbers must be unique. * But also it avoids any memory use by the unr * allocator, so stashing away the transient inode * number is reasonable. */ pipe_free_kmem(rpipe); goto fail; } rpipe->pipe_state |= PIPE_DIRECTOK; wpipe->pipe_state |= PIPE_DIRECTOK; return (0); fail: knlist_destroy(&rpipe->pipe_sel.si_note); knlist_destroy(&wpipe->pipe_sel.si_note); #ifdef MAC mac_pipe_destroy(pp); #endif return (error); } int pipe_named_ctor(struct pipe **ppipe, struct thread *td) { struct pipepair *pp; int error; error = pipe_paircreate(td, &pp); if (error != 0) return (error); - pp->pp_rpipe.pipe_state |= PIPE_NAMED; + pp->pp_rpipe.pipe_type |= PIPE_TYPE_NAMED; *ppipe = &pp->pp_rpipe; return (0); } void pipe_dtor(struct pipe *dpipe) { struct pipe *peer; - peer = (dpipe->pipe_state & PIPE_NAMED) != 0 ? dpipe->pipe_peer : NULL; + peer = (dpipe->pipe_type & PIPE_TYPE_NAMED) != 0 ? dpipe->pipe_peer : NULL; funsetown(&dpipe->pipe_sigio); pipeclose(dpipe); if (peer != NULL) { funsetown(&peer->pipe_sigio); pipeclose(peer); } } /* * Get a timestamp. * * This used to be vfs_timestamp but the higher precision is unnecessary and * can very negatively affect performance in virtualized environments (e.g., on * vms running on amd64 when using the rdtscp instruction). */ static void pipe_timestamp(struct timespec *tsp) { getnanotime(tsp); } /* * The pipe system call for the DTYPE_PIPE type of pipes. If we fail, let * the zone pick up the pieces via pipeclose(). */ int kern_pipe(struct thread *td, int fildes[2], int flags, struct filecaps *fcaps1, struct filecaps *fcaps2) { struct file *rf, *wf; struct pipe *rpipe, *wpipe; struct pipepair *pp; int fd, fflags, error; error = pipe_paircreate(td, &pp); if (error != 0) return (error); rpipe = &pp->pp_rpipe; wpipe = &pp->pp_wpipe; error = falloc_caps(td, &rf, &fd, flags, fcaps1); if (error) { pipeclose(rpipe); pipeclose(wpipe); return (error); } /* An extra reference on `rf' has been held for us by falloc_caps(). */ fildes[0] = fd; fflags = FREAD | FWRITE; if ((flags & O_NONBLOCK) != 0) fflags |= FNONBLOCK; /* * Warning: once we've gotten past allocation of the fd for the * read-side, we can only drop the read side via fdrop() in order * to avoid races against processes which manage to dup() the read * side while we are blocked trying to allocate the write side. */ finit(rf, fflags, DTYPE_PIPE, rpipe, &pipeops); error = falloc_caps(td, &wf, &fd, flags, fcaps2); if (error) { fdclose(td, rf, fildes[0]); fdrop(rf, td); /* rpipe has been closed by fdrop(). */ pipeclose(wpipe); return (error); } /* An extra reference on `wf' has been held for us by falloc_caps(). */ finit(wf, fflags, DTYPE_PIPE, wpipe, &pipeops); fdrop(wf, td); fildes[1] = fd; fdrop(rf, td); return (0); } #ifdef COMPAT_FREEBSD10 /* ARGSUSED */ int freebsd10_pipe(struct thread *td, struct freebsd10_pipe_args *uap __unused) { int error; int fildes[2]; error = kern_pipe(td, fildes, 0, NULL, NULL); if (error) return (error); td->td_retval[0] = fildes[0]; td->td_retval[1] = fildes[1]; return (0); } #endif int sys_pipe2(struct thread *td, struct pipe2_args *uap) { int error, fildes[2]; if (uap->flags & ~(O_CLOEXEC | O_NONBLOCK)) return (EINVAL); error = kern_pipe(td, fildes, uap->flags, NULL, NULL); if (error) return (error); error = copyout(fildes, uap->fildes, 2 * sizeof(int)); if (error) { (void)kern_close(td, fildes[0]); (void)kern_close(td, fildes[1]); } return (error); } /* * Allocate kva for pipe circular buffer, the space is pageable * This routine will 'realloc' the size of a pipe safely, if it fails * it will retain the old buffer. * If it fails it will return ENOMEM. */ static int pipespace_new(struct pipe *cpipe, int size) { caddr_t buffer; int error, cnt, firstseg; static int curfail = 0; static struct timeval lastfail; KASSERT(!mtx_owned(PIPE_MTX(cpipe)), ("pipespace: pipe mutex locked")); KASSERT(!(cpipe->pipe_state & PIPE_DIRECTW), ("pipespace: resize of direct writes not allowed")); retry: cnt = cpipe->pipe_buffer.cnt; if (cnt > size) size = cnt; size = round_page(size); buffer = (caddr_t) vm_map_min(pipe_map); error = vm_map_find(pipe_map, NULL, 0, (vm_offset_t *)&buffer, size, 0, VMFS_ANY_SPACE, VM_PROT_RW, VM_PROT_RW, 0); if (error != KERN_SUCCESS) { if (cpipe->pipe_buffer.buffer == NULL && size > SMALL_PIPE_SIZE) { size = SMALL_PIPE_SIZE; pipefragretry++; goto retry; } if (cpipe->pipe_buffer.buffer == NULL) { pipeallocfail++; if (ppsratecheck(&lastfail, &curfail, 1)) printf("kern.ipc.maxpipekva exceeded; see tuning(7)\n"); } else { piperesizefail++; } return (ENOMEM); } /* copy data, then free old resources if we're resizing */ if (cnt > 0) { if (cpipe->pipe_buffer.in <= cpipe->pipe_buffer.out) { firstseg = cpipe->pipe_buffer.size - cpipe->pipe_buffer.out; bcopy(&cpipe->pipe_buffer.buffer[cpipe->pipe_buffer.out], buffer, firstseg); if ((cnt - firstseg) > 0) bcopy(cpipe->pipe_buffer.buffer, &buffer[firstseg], cpipe->pipe_buffer.in); } else { bcopy(&cpipe->pipe_buffer.buffer[cpipe->pipe_buffer.out], buffer, cnt); } } pipe_free_kmem(cpipe); cpipe->pipe_buffer.buffer = buffer; cpipe->pipe_buffer.size = size; cpipe->pipe_buffer.in = cnt; cpipe->pipe_buffer.out = 0; cpipe->pipe_buffer.cnt = cnt; atomic_add_long(&amountpipekva, cpipe->pipe_buffer.size); return (0); } /* * Wrapper for pipespace_new() that performs locking assertions. */ static int pipespace(struct pipe *cpipe, int size) { KASSERT(cpipe->pipe_state & PIPE_LOCKFL, ("Unlocked pipe passed to pipespace")); return (pipespace_new(cpipe, size)); } /* * lock a pipe for I/O, blocking other access */ static __inline int pipelock(struct pipe *cpipe, int catch) { int error; PIPE_LOCK_ASSERT(cpipe, MA_OWNED); while (cpipe->pipe_state & PIPE_LOCKFL) { cpipe->pipe_state |= PIPE_LWANT; error = msleep(cpipe, PIPE_MTX(cpipe), catch ? (PRIBIO | PCATCH) : PRIBIO, "pipelk", 0); if (error != 0) return (error); } cpipe->pipe_state |= PIPE_LOCKFL; return (0); } /* * unlock a pipe I/O lock */ static __inline void pipeunlock(struct pipe *cpipe) { PIPE_LOCK_ASSERT(cpipe, MA_OWNED); KASSERT(cpipe->pipe_state & PIPE_LOCKFL, ("Unlocked pipe passed to pipeunlock")); cpipe->pipe_state &= ~PIPE_LOCKFL; if (cpipe->pipe_state & PIPE_LWANT) { cpipe->pipe_state &= ~PIPE_LWANT; wakeup(cpipe); } } void pipeselwakeup(struct pipe *cpipe) { PIPE_LOCK_ASSERT(cpipe, MA_OWNED); if (cpipe->pipe_state & PIPE_SEL) { selwakeuppri(&cpipe->pipe_sel, PSOCK); if (!SEL_WAITING(&cpipe->pipe_sel)) cpipe->pipe_state &= ~PIPE_SEL; } if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio) pgsigio(&cpipe->pipe_sigio, SIGIO, 0); KNOTE_LOCKED(&cpipe->pipe_sel.si_note, 0); } /* * Initialize and allocate VM and memory for pipe. The structure * will start out zero'd from the ctor, so we just manage the kmem. */ static int pipe_create(struct pipe *pipe, bool large_backing) { int error; error = pipespace_new(pipe, !large_backing || amountpipekva > maxpipekva / 2 ? SMALL_PIPE_SIZE : PIPE_SIZE); if (error == 0) pipe->pipe_ino = alloc_unr64(&pipeino_unr); return (error); } /* ARGSUSED */ static int pipe_read(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags, struct thread *td) { struct pipe *rpipe; int error; int nread = 0; int size; rpipe = fp->f_data; PIPE_LOCK(rpipe); ++rpipe->pipe_busy; error = pipelock(rpipe, 1); if (error) goto unlocked_error; #ifdef MAC error = mac_pipe_check_read(active_cred, rpipe->pipe_pair); if (error) goto locked_error; #endif if (amountpipekva > (3 * maxpipekva) / 4) { if ((rpipe->pipe_state & PIPE_DIRECTW) == 0 && rpipe->pipe_buffer.size > SMALL_PIPE_SIZE && rpipe->pipe_buffer.cnt <= SMALL_PIPE_SIZE && piperesizeallowed == 1) { PIPE_UNLOCK(rpipe); pipespace(rpipe, SMALL_PIPE_SIZE); PIPE_LOCK(rpipe); } } while (uio->uio_resid) { /* * normal pipe buffer receive */ if (rpipe->pipe_buffer.cnt > 0) { size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out; if (size > rpipe->pipe_buffer.cnt) size = rpipe->pipe_buffer.cnt; if (size > uio->uio_resid) size = uio->uio_resid; PIPE_UNLOCK(rpipe); error = uiomove( &rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], size, uio); PIPE_LOCK(rpipe); if (error) break; rpipe->pipe_buffer.out += size; if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) rpipe->pipe_buffer.out = 0; rpipe->pipe_buffer.cnt -= size; /* * If there is no more to read in the pipe, reset * its pointers to the beginning. This improves * cache hit stats. */ if (rpipe->pipe_buffer.cnt == 0) { rpipe->pipe_buffer.in = 0; rpipe->pipe_buffer.out = 0; } nread += size; #ifndef PIPE_NODIRECT /* * Direct copy, bypassing a kernel buffer. */ } else if ((size = rpipe->pipe_pages.cnt) != 0) { if (size > uio->uio_resid) size = (u_int) uio->uio_resid; PIPE_UNLOCK(rpipe); error = uiomove_fromphys(rpipe->pipe_pages.ms, rpipe->pipe_pages.pos, size, uio); PIPE_LOCK(rpipe); if (error) break; nread += size; rpipe->pipe_pages.pos += size; rpipe->pipe_pages.cnt -= size; if (rpipe->pipe_pages.cnt == 0) { rpipe->pipe_state &= ~PIPE_WANTW; wakeup(rpipe); } #endif } else { /* * detect EOF condition * read returns 0 on EOF, no need to set error */ if (rpipe->pipe_state & PIPE_EOF) break; /* * If the "write-side" has been blocked, wake it up now. */ if (rpipe->pipe_state & PIPE_WANTW) { rpipe->pipe_state &= ~PIPE_WANTW; wakeup(rpipe); } /* * Break if some data was read. */ if (nread > 0) break; /* * Unlock the pipe buffer for our remaining processing. * We will either break out with an error or we will * sleep and relock to loop. */ pipeunlock(rpipe); /* * Handle non-blocking mode operation or * wait for more data. */ if (fp->f_flag & FNONBLOCK) { error = EAGAIN; } else { rpipe->pipe_state |= PIPE_WANTR; if ((error = msleep(rpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH, "piperd", 0)) == 0) error = pipelock(rpipe, 1); } if (error) goto unlocked_error; } } #ifdef MAC locked_error: #endif pipeunlock(rpipe); /* XXX: should probably do this before getting any locks. */ if (error == 0) pipe_timestamp(&rpipe->pipe_atime); unlocked_error: --rpipe->pipe_busy; /* * PIPE_WANT processing only makes sense if pipe_busy is 0. */ if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) { rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW); wakeup(rpipe); } else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) { /* * Handle write blocking hysteresis. */ if (rpipe->pipe_state & PIPE_WANTW) { rpipe->pipe_state &= ~PIPE_WANTW; wakeup(rpipe); } } /* * Only wake up writers if there was actually something read. * Otherwise, when calling read(2) at EOF, a spurious wakeup occurs. */ if (nread > 0 && rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt >= PIPE_BUF) pipeselwakeup(rpipe); PIPE_UNLOCK(rpipe); return (error); } #ifndef PIPE_NODIRECT /* * Map the sending processes' buffer into kernel space and wire it. * This is similar to a physical write operation. */ static int pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio) { u_int size; int i; PIPE_LOCK_ASSERT(wpipe, MA_OWNED); KASSERT((wpipe->pipe_state & PIPE_DIRECTW) == 0, ("%s: PIPE_DIRECTW set on %p", __func__, wpipe)); KASSERT(wpipe->pipe_pages.cnt == 0, ("%s: pipe map for %p contains residual data", __func__, wpipe)); if (uio->uio_iov->iov_len > wpipe->pipe_buffer.size) size = wpipe->pipe_buffer.size; else size = uio->uio_iov->iov_len; wpipe->pipe_state |= PIPE_DIRECTW; PIPE_UNLOCK(wpipe); i = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map, (vm_offset_t)uio->uio_iov->iov_base, size, VM_PROT_READ, wpipe->pipe_pages.ms, PIPENPAGES); PIPE_LOCK(wpipe); if (i < 0) { wpipe->pipe_state &= ~PIPE_DIRECTW; return (EFAULT); } wpipe->pipe_pages.npages = i; wpipe->pipe_pages.pos = ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK; wpipe->pipe_pages.cnt = size; uio->uio_iov->iov_len -= size; uio->uio_iov->iov_base = (char *)uio->uio_iov->iov_base + size; if (uio->uio_iov->iov_len == 0) uio->uio_iov++; uio->uio_resid -= size; uio->uio_offset += size; return (0); } /* * Unwire the process buffer. */ static void pipe_destroy_write_buffer(struct pipe *wpipe) { PIPE_LOCK_ASSERT(wpipe, MA_OWNED); KASSERT((wpipe->pipe_state & PIPE_DIRECTW) != 0, ("%s: PIPE_DIRECTW not set on %p", __func__, wpipe)); KASSERT(wpipe->pipe_pages.cnt == 0, ("%s: pipe map for %p contains residual data", __func__, wpipe)); wpipe->pipe_state &= ~PIPE_DIRECTW; vm_page_unhold_pages(wpipe->pipe_pages.ms, wpipe->pipe_pages.npages); wpipe->pipe_pages.npages = 0; } /* * In the case of a signal, the writing process might go away. This * code copies the data into the circular buffer so that the source * pages can be freed without loss of data. */ static void pipe_clone_write_buffer(struct pipe *wpipe) { struct uio uio; struct iovec iov; int size; int pos; PIPE_LOCK_ASSERT(wpipe, MA_OWNED); KASSERT((wpipe->pipe_state & PIPE_DIRECTW) != 0, ("%s: PIPE_DIRECTW not set on %p", __func__, wpipe)); size = wpipe->pipe_pages.cnt; pos = wpipe->pipe_pages.pos; wpipe->pipe_pages.cnt = 0; wpipe->pipe_buffer.in = size; wpipe->pipe_buffer.out = 0; wpipe->pipe_buffer.cnt = size; PIPE_UNLOCK(wpipe); iov.iov_base = wpipe->pipe_buffer.buffer; iov.iov_len = size; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = 0; uio.uio_resid = size; uio.uio_segflg = UIO_SYSSPACE; uio.uio_rw = UIO_READ; uio.uio_td = curthread; uiomove_fromphys(wpipe->pipe_pages.ms, pos, size, &uio); PIPE_LOCK(wpipe); pipe_destroy_write_buffer(wpipe); } /* * This implements the pipe buffer write mechanism. Note that only * a direct write OR a normal pipe write can be pending at any given time. * If there are any characters in the pipe buffer, the direct write will * be deferred until the receiving process grabs all of the bytes from * the pipe buffer. Then the direct mapping write is set-up. */ static int pipe_direct_write(struct pipe *wpipe, struct uio *uio) { int error; retry: PIPE_LOCK_ASSERT(wpipe, MA_OWNED); error = pipelock(wpipe, 1); if (error != 0) goto error1; if ((wpipe->pipe_state & PIPE_EOF) != 0) { error = EPIPE; pipeunlock(wpipe); goto error1; } if (wpipe->pipe_state & PIPE_DIRECTW) { if (wpipe->pipe_state & PIPE_WANTR) { wpipe->pipe_state &= ~PIPE_WANTR; wakeup(wpipe); } pipeselwakeup(wpipe); wpipe->pipe_state |= PIPE_WANTW; pipeunlock(wpipe); error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH, "pipdww", 0); if (error) goto error1; else goto retry; } if (wpipe->pipe_buffer.cnt > 0) { if (wpipe->pipe_state & PIPE_WANTR) { wpipe->pipe_state &= ~PIPE_WANTR; wakeup(wpipe); } pipeselwakeup(wpipe); wpipe->pipe_state |= PIPE_WANTW; pipeunlock(wpipe); error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH, "pipdwc", 0); if (error) goto error1; else goto retry; } error = pipe_build_write_buffer(wpipe, uio); if (error) { pipeunlock(wpipe); goto error1; } while (wpipe->pipe_pages.cnt != 0 && (wpipe->pipe_state & PIPE_EOF) == 0) { if (wpipe->pipe_state & PIPE_WANTR) { wpipe->pipe_state &= ~PIPE_WANTR; wakeup(wpipe); } pipeselwakeup(wpipe); wpipe->pipe_state |= PIPE_WANTW; pipeunlock(wpipe); error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH, "pipdwt", 0); pipelock(wpipe, 0); if (error != 0) break; } if ((wpipe->pipe_state & PIPE_EOF) != 0) { wpipe->pipe_pages.cnt = 0; pipe_destroy_write_buffer(wpipe); pipeselwakeup(wpipe); error = EPIPE; } else if (error == EINTR || error == ERESTART) { pipe_clone_write_buffer(wpipe); } else { pipe_destroy_write_buffer(wpipe); } pipeunlock(wpipe); KASSERT((wpipe->pipe_state & PIPE_DIRECTW) == 0, ("pipe %p leaked PIPE_DIRECTW", wpipe)); return (error); error1: wakeup(wpipe); return (error); } #endif static int pipe_write(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags, struct thread *td) { struct pipe *wpipe, *rpipe; ssize_t orig_resid; int desiredsize, error; rpipe = fp->f_data; wpipe = PIPE_PEER(rpipe); PIPE_LOCK(rpipe); error = pipelock(wpipe, 1); if (error) { PIPE_UNLOCK(rpipe); return (error); } /* * detect loss of pipe read side, issue SIGPIPE if lost. */ if (wpipe->pipe_present != PIPE_ACTIVE || (wpipe->pipe_state & PIPE_EOF)) { pipeunlock(wpipe); PIPE_UNLOCK(rpipe); return (EPIPE); } #ifdef MAC error = mac_pipe_check_write(active_cred, wpipe->pipe_pair); if (error) { pipeunlock(wpipe); PIPE_UNLOCK(rpipe); return (error); } #endif ++wpipe->pipe_busy; /* Choose a larger size if it's advantageous */ desiredsize = max(SMALL_PIPE_SIZE, wpipe->pipe_buffer.size); while (desiredsize < wpipe->pipe_buffer.cnt + uio->uio_resid) { if (piperesizeallowed != 1) break; if (amountpipekva > maxpipekva / 2) break; if (desiredsize == BIG_PIPE_SIZE) break; desiredsize = desiredsize * 2; } /* Choose a smaller size if we're in a OOM situation */ if (amountpipekva > (3 * maxpipekva) / 4 && wpipe->pipe_buffer.size > SMALL_PIPE_SIZE && wpipe->pipe_buffer.cnt <= SMALL_PIPE_SIZE && piperesizeallowed == 1) desiredsize = SMALL_PIPE_SIZE; /* Resize if the above determined that a new size was necessary */ if (desiredsize != wpipe->pipe_buffer.size && (wpipe->pipe_state & PIPE_DIRECTW) == 0) { PIPE_UNLOCK(wpipe); pipespace(wpipe, desiredsize); PIPE_LOCK(wpipe); } MPASS(wpipe->pipe_buffer.size != 0); pipeunlock(wpipe); orig_resid = uio->uio_resid; while (uio->uio_resid) { int space; pipelock(wpipe, 0); if (wpipe->pipe_state & PIPE_EOF) { pipeunlock(wpipe); error = EPIPE; break; } #ifndef PIPE_NODIRECT /* * If the transfer is large, we can gain performance if * we do process-to-process copies directly. * If the write is non-blocking, we don't use the * direct write mechanism. * * The direct write mechanism will detect the reader going * away on us. */ if (uio->uio_segflg == UIO_USERSPACE && uio->uio_iov->iov_len >= PIPE_MINDIRECT && wpipe->pipe_buffer.size >= PIPE_MINDIRECT && (fp->f_flag & FNONBLOCK) == 0) { pipeunlock(wpipe); error = pipe_direct_write(wpipe, uio); if (error) break; continue; } #endif /* * Pipe buffered writes cannot be coincidental with * direct writes. We wait until the currently executing * direct write is completed before we start filling the * pipe buffer. We break out if a signal occurs or the * reader goes away. */ if (wpipe->pipe_pages.cnt != 0) { if (wpipe->pipe_state & PIPE_WANTR) { wpipe->pipe_state &= ~PIPE_WANTR; wakeup(wpipe); } pipeselwakeup(wpipe); wpipe->pipe_state |= PIPE_WANTW; pipeunlock(wpipe); error = msleep(wpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH, "pipbww", 0); if (error) break; else continue; } space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; /* Writes of size <= PIPE_BUF must be atomic. */ if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF)) space = 0; if (space > 0) { int size; /* Transfer size */ int segsize; /* first segment to transfer */ /* * Transfer size is minimum of uio transfer * and free space in pipe buffer. */ if (space > uio->uio_resid) size = uio->uio_resid; else size = space; /* * First segment to transfer is minimum of * transfer size and contiguous space in * pipe buffer. If first segment to transfer * is less than the transfer size, we've got * a wraparound in the buffer. */ segsize = wpipe->pipe_buffer.size - wpipe->pipe_buffer.in; if (segsize > size) segsize = size; /* Transfer first segment */ PIPE_UNLOCK(rpipe); error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], segsize, uio); PIPE_LOCK(rpipe); if (error == 0 && segsize < size) { KASSERT(wpipe->pipe_buffer.in + segsize == wpipe->pipe_buffer.size, ("Pipe buffer wraparound disappeared")); /* * Transfer remaining part now, to * support atomic writes. Wraparound * happened. */ PIPE_UNLOCK(rpipe); error = uiomove( &wpipe->pipe_buffer.buffer[0], size - segsize, uio); PIPE_LOCK(rpipe); } if (error == 0) { wpipe->pipe_buffer.in += size; if (wpipe->pipe_buffer.in >= wpipe->pipe_buffer.size) { KASSERT(wpipe->pipe_buffer.in == size - segsize + wpipe->pipe_buffer.size, ("Expected wraparound bad")); wpipe->pipe_buffer.in = size - segsize; } wpipe->pipe_buffer.cnt += size; KASSERT(wpipe->pipe_buffer.cnt <= wpipe->pipe_buffer.size, ("Pipe buffer overflow")); } pipeunlock(wpipe); if (error != 0) break; } else { /* * If the "read-side" has been blocked, wake it up now. */ if (wpipe->pipe_state & PIPE_WANTR) { wpipe->pipe_state &= ~PIPE_WANTR; wakeup(wpipe); } /* * don't block on non-blocking I/O */ if (fp->f_flag & FNONBLOCK) { error = EAGAIN; pipeunlock(wpipe); break; } /* * We have no more space and have something to offer, * wake up select/poll. */ pipeselwakeup(wpipe); wpipe->pipe_state |= PIPE_WANTW; pipeunlock(wpipe); error = msleep(wpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH, "pipewr", 0); if (error != 0) break; } } pipelock(wpipe, 0); --wpipe->pipe_busy; if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) { wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); wakeup(wpipe); } else if (wpipe->pipe_buffer.cnt > 0) { /* * If we have put any characters in the buffer, we wake up * the reader. */ if (wpipe->pipe_state & PIPE_WANTR) { wpipe->pipe_state &= ~PIPE_WANTR; wakeup(wpipe); } } /* * Don't return EPIPE if any byte was written. * EINTR and other interrupts are handled by generic I/O layer. * Do not pretend that I/O succeeded for obvious user error * like EFAULT. */ if (uio->uio_resid != orig_resid && error == EPIPE) error = 0; if (error == 0) pipe_timestamp(&wpipe->pipe_mtime); /* * We have something to offer, * wake up select/poll. */ if (wpipe->pipe_buffer.cnt) pipeselwakeup(wpipe); pipeunlock(wpipe); PIPE_UNLOCK(rpipe); return (error); } /* ARGSUSED */ static int pipe_truncate(struct file *fp, off_t length, struct ucred *active_cred, struct thread *td) { struct pipe *cpipe; int error; cpipe = fp->f_data; - if (cpipe->pipe_state & PIPE_NAMED) + if (cpipe->pipe_type & PIPE_TYPE_NAMED) error = vnops.fo_truncate(fp, length, active_cred, td); else error = invfo_truncate(fp, length, active_cred, td); return (error); } /* * we implement a very minimal set of ioctls for compatibility with sockets. */ static int pipe_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *active_cred, struct thread *td) { struct pipe *mpipe = fp->f_data; int error; PIPE_LOCK(mpipe); #ifdef MAC error = mac_pipe_check_ioctl(active_cred, mpipe->pipe_pair, cmd, data); if (error) { PIPE_UNLOCK(mpipe); return (error); } #endif error = 0; switch (cmd) { case FIONBIO: break; case FIOASYNC: if (*(int *)data) { mpipe->pipe_state |= PIPE_ASYNC; } else { mpipe->pipe_state &= ~PIPE_ASYNC; } break; case FIONREAD: if (!(fp->f_flag & FREAD)) { *(int *)data = 0; PIPE_UNLOCK(mpipe); return (0); } if (mpipe->pipe_pages.cnt != 0) *(int *)data = mpipe->pipe_pages.cnt; else *(int *)data = mpipe->pipe_buffer.cnt; break; case FIOSETOWN: PIPE_UNLOCK(mpipe); error = fsetown(*(int *)data, &mpipe->pipe_sigio); goto out_unlocked; case FIOGETOWN: *(int *)data = fgetown(&mpipe->pipe_sigio); break; /* This is deprecated, FIOSETOWN should be used instead. */ case TIOCSPGRP: PIPE_UNLOCK(mpipe); error = fsetown(-(*(int *)data), &mpipe->pipe_sigio); goto out_unlocked; /* This is deprecated, FIOGETOWN should be used instead. */ case TIOCGPGRP: *(int *)data = -fgetown(&mpipe->pipe_sigio); break; default: error = ENOTTY; break; } PIPE_UNLOCK(mpipe); out_unlocked: return (error); } static int pipe_poll(struct file *fp, int events, struct ucred *active_cred, struct thread *td) { struct pipe *rpipe; struct pipe *wpipe; int levents, revents; #ifdef MAC int error; #endif revents = 0; rpipe = fp->f_data; wpipe = PIPE_PEER(rpipe); PIPE_LOCK(rpipe); #ifdef MAC error = mac_pipe_check_poll(active_cred, rpipe->pipe_pair); if (error) goto locked_error; #endif if (fp->f_flag & FREAD && events & (POLLIN | POLLRDNORM)) if (rpipe->pipe_pages.cnt > 0 || rpipe->pipe_buffer.cnt > 0) revents |= events & (POLLIN | POLLRDNORM); if (fp->f_flag & FWRITE && events & (POLLOUT | POLLWRNORM)) if (wpipe->pipe_present != PIPE_ACTIVE || (wpipe->pipe_state & PIPE_EOF) || ((wpipe->pipe_state & PIPE_DIRECTW) == 0 && ((wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF || wpipe->pipe_buffer.size == 0))) revents |= events & (POLLOUT | POLLWRNORM); levents = events & (POLLIN | POLLINIGNEOF | POLLPRI | POLLRDNORM | POLLRDBAND); - if (rpipe->pipe_state & PIPE_NAMED && fp->f_flag & FREAD && levents && + if (rpipe->pipe_type & PIPE_TYPE_NAMED && fp->f_flag & FREAD && levents && fp->f_pipegen == rpipe->pipe_wgen) events |= POLLINIGNEOF; if ((events & POLLINIGNEOF) == 0) { if (rpipe->pipe_state & PIPE_EOF) { if (fp->f_flag & FREAD) revents |= (events & (POLLIN | POLLRDNORM)); if (wpipe->pipe_present != PIPE_ACTIVE || (wpipe->pipe_state & PIPE_EOF)) revents |= POLLHUP; } } if (revents == 0) { /* * Add ourselves regardless of eventmask as we have to return * POLLHUP even if it was not asked for. */ if ((fp->f_flag & FREAD) != 0) { selrecord(td, &rpipe->pipe_sel); if (SEL_WAITING(&rpipe->pipe_sel)) rpipe->pipe_state |= PIPE_SEL; } if ((fp->f_flag & FWRITE) != 0) { selrecord(td, &wpipe->pipe_sel); if (SEL_WAITING(&wpipe->pipe_sel)) wpipe->pipe_state |= PIPE_SEL; } } #ifdef MAC locked_error: #endif PIPE_UNLOCK(rpipe); return (revents); } /* * We shouldn't need locks here as we're doing a read and this should * be a natural race. */ static int pipe_stat(struct file *fp, struct stat *ub, struct ucred *active_cred, struct thread *td) { struct pipe *pipe; #ifdef MAC int error; #endif pipe = fp->f_data; - PIPE_LOCK(pipe); #ifdef MAC - error = mac_pipe_check_stat(active_cred, pipe->pipe_pair); - if (error) { + if (mac_pipe_check_stat_enabled()) { + PIPE_LOCK(pipe); + error = mac_pipe_check_stat(active_cred, pipe->pipe_pair); PIPE_UNLOCK(pipe); - return (error); + if (error) { + return (error); + } } #endif /* For named pipes ask the underlying filesystem. */ - if (pipe->pipe_state & PIPE_NAMED) { - PIPE_UNLOCK(pipe); + if (pipe->pipe_type & PIPE_TYPE_NAMED) { return (vnops.fo_stat(fp, ub, active_cred, td)); } - PIPE_UNLOCK(pipe); - bzero(ub, sizeof(*ub)); ub->st_mode = S_IFIFO; ub->st_blksize = PAGE_SIZE; if (pipe->pipe_pages.cnt != 0) ub->st_size = pipe->pipe_pages.cnt; else ub->st_size = pipe->pipe_buffer.cnt; ub->st_blocks = howmany(ub->st_size, ub->st_blksize); ub->st_atim = pipe->pipe_atime; ub->st_mtim = pipe->pipe_mtime; ub->st_ctim = pipe->pipe_ctime; ub->st_uid = fp->f_cred->cr_uid; ub->st_gid = fp->f_cred->cr_gid; ub->st_dev = pipedev_ino; ub->st_ino = pipe->pipe_ino; /* * Left as 0: st_nlink, st_rdev, st_flags, st_gen. */ return (0); } /* ARGSUSED */ static int pipe_close(struct file *fp, struct thread *td) { if (fp->f_vnode != NULL) return vnops.fo_close(fp, td); fp->f_ops = &badfileops; pipe_dtor(fp->f_data); fp->f_data = NULL; return (0); } static int pipe_chmod(struct file *fp, mode_t mode, struct ucred *active_cred, struct thread *td) { struct pipe *cpipe; int error; cpipe = fp->f_data; - if (cpipe->pipe_state & PIPE_NAMED) + if (cpipe->pipe_type & PIPE_TYPE_NAMED) error = vn_chmod(fp, mode, active_cred, td); else error = invfo_chmod(fp, mode, active_cred, td); return (error); } static int pipe_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred, struct thread *td) { struct pipe *cpipe; int error; cpipe = fp->f_data; - if (cpipe->pipe_state & PIPE_NAMED) + if (cpipe->pipe_type & PIPE_TYPE_NAMED) error = vn_chown(fp, uid, gid, active_cred, td); else error = invfo_chown(fp, uid, gid, active_cred, td); return (error); } static int pipe_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp) { struct pipe *pi; if (fp->f_type == DTYPE_FIFO) return (vn_fill_kinfo(fp, kif, fdp)); kif->kf_type = KF_TYPE_PIPE; pi = fp->f_data; kif->kf_un.kf_pipe.kf_pipe_addr = (uintptr_t)pi; kif->kf_un.kf_pipe.kf_pipe_peer = (uintptr_t)pi->pipe_peer; kif->kf_un.kf_pipe.kf_pipe_buffer_cnt = pi->pipe_buffer.cnt; return (0); } static void pipe_free_kmem(struct pipe *cpipe) { KASSERT(!mtx_owned(PIPE_MTX(cpipe)), ("pipe_free_kmem: pipe mutex locked")); if (cpipe->pipe_buffer.buffer != NULL) { atomic_subtract_long(&amountpipekva, cpipe->pipe_buffer.size); vm_map_remove(pipe_map, (vm_offset_t)cpipe->pipe_buffer.buffer, (vm_offset_t)cpipe->pipe_buffer.buffer + cpipe->pipe_buffer.size); cpipe->pipe_buffer.buffer = NULL; } #ifndef PIPE_NODIRECT { cpipe->pipe_pages.cnt = 0; cpipe->pipe_pages.pos = 0; cpipe->pipe_pages.npages = 0; } #endif } /* * shutdown the pipe */ static void pipeclose(struct pipe *cpipe) { struct pipepair *pp; struct pipe *ppipe; KASSERT(cpipe != NULL, ("pipeclose: cpipe == NULL")); PIPE_LOCK(cpipe); pipelock(cpipe, 0); pp = cpipe->pipe_pair; /* * If the other side is blocked, wake it up saying that * we want to close it down. */ cpipe->pipe_state |= PIPE_EOF; while (cpipe->pipe_busy) { wakeup(cpipe); cpipe->pipe_state |= PIPE_WANT; pipeunlock(cpipe); msleep(cpipe, PIPE_MTX(cpipe), PRIBIO, "pipecl", 0); pipelock(cpipe, 0); } pipeselwakeup(cpipe); /* * Disconnect from peer, if any. */ ppipe = cpipe->pipe_peer; if (ppipe->pipe_present == PIPE_ACTIVE) { ppipe->pipe_state |= PIPE_EOF; wakeup(ppipe); pipeselwakeup(ppipe); } /* * Mark this endpoint as free. Release kmem resources. We * don't mark this endpoint as unused until we've finished * doing that, or the pipe might disappear out from under * us. */ PIPE_UNLOCK(cpipe); pipe_free_kmem(cpipe); PIPE_LOCK(cpipe); cpipe->pipe_present = PIPE_CLOSING; pipeunlock(cpipe); /* * knlist_clear() may sleep dropping the PIPE_MTX. Set the * PIPE_FINALIZED, that allows other end to free the * pipe_pair, only after the knotes are completely dismantled. */ knlist_clear(&cpipe->pipe_sel.si_note, 1); cpipe->pipe_present = PIPE_FINALIZED; seldrain(&cpipe->pipe_sel); knlist_destroy(&cpipe->pipe_sel.si_note); /* * If both endpoints are now closed, release the memory for the * pipe pair. If not, unlock. */ if (ppipe->pipe_present == PIPE_FINALIZED) { PIPE_UNLOCK(cpipe); #ifdef MAC mac_pipe_destroy(pp); #endif uma_zfree(pipe_zone, cpipe->pipe_pair); } else PIPE_UNLOCK(cpipe); } /*ARGSUSED*/ static int pipe_kqfilter(struct file *fp, struct knote *kn) { struct pipe *cpipe; /* * If a filter is requested that is not supported by this file * descriptor, don't return an error, but also don't ever generate an * event. */ if ((kn->kn_filter == EVFILT_READ) && !(fp->f_flag & FREAD)) { kn->kn_fop = &pipe_nfiltops; return (0); } if ((kn->kn_filter == EVFILT_WRITE) && !(fp->f_flag & FWRITE)) { kn->kn_fop = &pipe_nfiltops; return (0); } cpipe = fp->f_data; PIPE_LOCK(cpipe); switch (kn->kn_filter) { case EVFILT_READ: kn->kn_fop = &pipe_rfiltops; break; case EVFILT_WRITE: kn->kn_fop = &pipe_wfiltops; if (cpipe->pipe_peer->pipe_present != PIPE_ACTIVE) { /* other end of pipe has been closed */ PIPE_UNLOCK(cpipe); return (EPIPE); } cpipe = PIPE_PEER(cpipe); break; default: PIPE_UNLOCK(cpipe); return (EINVAL); } kn->kn_hook = cpipe; knlist_add(&cpipe->pipe_sel.si_note, kn, 1); PIPE_UNLOCK(cpipe); return (0); } static void filt_pipedetach(struct knote *kn) { struct pipe *cpipe = kn->kn_hook; PIPE_LOCK(cpipe); knlist_remove(&cpipe->pipe_sel.si_note, kn, 1); PIPE_UNLOCK(cpipe); } /*ARGSUSED*/ static int filt_piperead(struct knote *kn, long hint) { struct file *fp = kn->kn_fp; struct pipe *rpipe = kn->kn_hook; PIPE_LOCK_ASSERT(rpipe, MA_OWNED); kn->kn_data = rpipe->pipe_buffer.cnt; if (kn->kn_data == 0) kn->kn_data = rpipe->pipe_pages.cnt; if ((rpipe->pipe_state & PIPE_EOF) != 0 && - ((rpipe->pipe_state & PIPE_NAMED) == 0 || + ((rpipe->pipe_type & PIPE_TYPE_NAMED) == 0 || fp->f_pipegen != rpipe->pipe_wgen)) { kn->kn_flags |= EV_EOF; return (1); } kn->kn_flags &= ~EV_EOF; return (kn->kn_data > 0); } /*ARGSUSED*/ static int filt_pipewrite(struct knote *kn, long hint) { struct pipe *wpipe = kn->kn_hook; /* * If this end of the pipe is closed, the knote was removed from the * knlist and the list lock (i.e., the pipe lock) is therefore not held. */ if (wpipe->pipe_present == PIPE_ACTIVE || - (wpipe->pipe_state & PIPE_NAMED) != 0) { + (wpipe->pipe_type & PIPE_TYPE_NAMED) != 0) { PIPE_LOCK_ASSERT(wpipe, MA_OWNED); if (wpipe->pipe_state & PIPE_DIRECTW) { kn->kn_data = 0; } else if (wpipe->pipe_buffer.size > 0) { kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; } else { kn->kn_data = PIPE_BUF; } } if (wpipe->pipe_present != PIPE_ACTIVE || (wpipe->pipe_state & PIPE_EOF)) { kn->kn_flags |= EV_EOF; return (1); } kn->kn_flags &= ~EV_EOF; return (kn->kn_data >= PIPE_BUF); } static void filt_pipedetach_notsup(struct knote *kn) { } static int filt_pipenotsup(struct knote *kn, long hint) { return (0); } Index: head/sys/security/mac/mac_framework.c =================================================================== --- head/sys/security/mac/mac_framework.c (revision 367832) +++ head/sys/security/mac/mac_framework.c (revision 367833) @@ -1,727 +1,733 @@ /*- * Copyright (c) 1999-2002, 2006, 2009 Robert N. M. Watson * Copyright (c) 2001 Ilmar S. Habibulin * Copyright (c) 2001-2005 Networks Associates Technology, Inc. * Copyright (c) 2005-2006 SPARTA, Inc. * Copyright (c) 2008-2009 Apple Inc. * All rights reserved. * * This software was developed by Robert Watson and Ilmar Habibulin for the * TrustedBSD Project. * * This software was developed for the FreeBSD Project in part by Network * Associates Laboratories, the Security Research Division of Network * Associates, Inc. under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), * as part of the DARPA CHATS research program. * * This software was enhanced by SPARTA ISSO under SPAWAR contract * N66001-04-C-6019 ("SEFOS"). * * This software was developed at the University of Cambridge Computer * Laboratory with support from a grant from Google, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /*- * Framework for extensible kernel access control. This file contains core * kernel infrastructure for the TrustedBSD MAC Framework, including policy * registration, versioning, locking, error composition operator, and system * calls. * * The MAC Framework implements three programming interfaces: * * - The kernel MAC interface, defined in mac_framework.h, and invoked * throughout the kernel to request security decisions, notify of security * related events, etc. * * - The MAC policy module interface, defined in mac_policy.h, which is * implemented by MAC policy modules and invoked by the MAC Framework to * forward kernel security requests and notifications to policy modules. * * - The user MAC API, defined in mac.h, which allows user programs to query * and set label state on objects. * * The majority of the MAC Framework implementation may be found in * src/sys/security/mac. Sample policy modules may be found in * src/sys/security/mac_*. */ #include "opt_mac.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * DTrace SDT providers for MAC. */ SDT_PROVIDER_DEFINE(mac); SDT_PROVIDER_DEFINE(mac_framework); SDT_PROBE_DEFINE2(mac, , policy, modevent, "int", "struct mac_policy_conf *"); SDT_PROBE_DEFINE1(mac, , policy, register, "struct mac_policy_conf *"); SDT_PROBE_DEFINE1(mac, , policy, unregister, "struct mac_policy_conf *"); /* * Root sysctl node for all MAC and MAC policy controls. */ SYSCTL_NODE(_security, OID_AUTO, mac, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "TrustedBSD MAC policy controls"); /* * Declare that the kernel provides MAC support, version 3 (FreeBSD 7.x). * This permits modules to refuse to be loaded if the necessary support isn't * present, even if it's pre-boot. */ MODULE_VERSION(kernel_mac_support, MAC_VERSION); static unsigned int mac_version = MAC_VERSION; SYSCTL_UINT(_security_mac, OID_AUTO, version, CTLFLAG_RD, &mac_version, 0, ""); /* * Flags for inlined checks. Note this would be best hotpatched at runtime. * The following is a band-aid. * * Use FPFLAG for hooks running in commonly executed paths and FPFLAG_RARE * for the rest. */ #define FPFLAG(f) \ bool __read_frequently mac_##f##_fp_flag #define FPFLAG_RARE(f) \ bool __read_mostly mac_##f##_fp_flag FPFLAG(priv_check); FPFLAG(priv_grant); FPFLAG(vnode_check_lookup); FPFLAG(vnode_check_open); FPFLAG(vnode_check_stat); FPFLAG(vnode_check_read); FPFLAG(vnode_check_write); FPFLAG(vnode_check_mmap); FPFLAG_RARE(vnode_check_poll); FPFLAG_RARE(vnode_check_rename_from); FPFLAG_RARE(vnode_check_access); +FPFLAG_RARE(pipe_check_stat); +FPFLAG_RARE(pipe_check_poll); #undef FPFLAG #undef FPFLAG_RARE /* * Labels consist of a indexed set of "slots", which are allocated policies * as required. The MAC Framework maintains a bitmask of slots allocated so * far to prevent reuse. Slots cannot be reused, as the MAC Framework * guarantees that newly allocated slots in labels will be NULL unless * otherwise initialized, and because we do not have a mechanism to garbage * collect slots on policy unload. As labeled policies tend to be statically * loaded during boot, and not frequently unloaded and reloaded, this is not * generally an issue. */ #if MAC_MAX_SLOTS > 32 #error "MAC_MAX_SLOTS too large" #endif static unsigned int mac_max_slots = MAC_MAX_SLOTS; static unsigned int mac_slot_offsets_free = (1 << MAC_MAX_SLOTS) - 1; SYSCTL_UINT(_security_mac, OID_AUTO, max_slots, CTLFLAG_RD, &mac_max_slots, 0, ""); /* * Has the kernel started generating labeled objects yet? All read/write * access to this variable is serialized during the boot process. Following * the end of serialization, we don't update this flag; no locking. */ static int mac_late = 0; /* * Each policy declares a mask of object types requiring labels to be * allocated for them. For convenience, we combine and cache the bitwise or * of the per-policy object flags to track whether we will allocate a label * for an object type at run-time. */ uint64_t mac_labeled; SYSCTL_UQUAD(_security_mac, OID_AUTO, labeled, CTLFLAG_RD, &mac_labeled, 0, "Mask of object types being labeled"); MALLOC_DEFINE(M_MACTEMP, "mactemp", "MAC temporary label storage"); /* * MAC policy modules are placed in one of two lists: mac_static_policy_list, * for policies that are loaded early and cannot be unloaded, and * mac_policy_list, which holds policies either loaded later in the boot * cycle or that may be unloaded. The static policy list does not require * locks to iterate over, but the dynamic list requires synchronization. * Support for dynamic policy loading can be compiled out using the * MAC_STATIC kernel option. * * The dynamic policy list is protected by two locks: modifying the list * requires both locks to be held exclusively. One of the locks, * mac_policy_rm, is acquired over policy entry points that will never sleep; * the other, mac_policy_rms, is acquired over policy entry points that may * sleep. The former category will be used when kernel locks may be held * over calls to the MAC Framework, during network processing in ithreads, * etc. The latter will tend to involve potentially blocking memory * allocations, extended attribute I/O, etc. */ #ifndef MAC_STATIC static struct rmlock mac_policy_rm; /* Non-sleeping entry points. */ static struct rmslock mac_policy_rms; /* Sleeping entry points. */ #endif struct mac_policy_list_head mac_policy_list; struct mac_policy_list_head mac_static_policy_list; u_int mac_policy_count; /* Registered policy count. */ static void mac_policy_xlock(void); static void mac_policy_xlock_assert(void); static void mac_policy_xunlock(void); void mac_policy_slock_nosleep(struct rm_priotracker *tracker) { #ifndef MAC_STATIC if (!mac_late) return; rm_rlock(&mac_policy_rm, tracker); #endif } void mac_policy_slock_sleep(void) { WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "mac_policy_slock_sleep"); #ifndef MAC_STATIC if (!mac_late) return; rms_rlock(&mac_policy_rms); #endif } void mac_policy_sunlock_nosleep(struct rm_priotracker *tracker) { #ifndef MAC_STATIC if (!mac_late) return; rm_runlock(&mac_policy_rm, tracker); #endif } void mac_policy_sunlock_sleep(void) { #ifndef MAC_STATIC if (!mac_late) return; rms_runlock(&mac_policy_rms); #endif } static void mac_policy_xlock(void) { WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "mac_policy_xlock()"); #ifndef MAC_STATIC if (!mac_late) return; rms_wlock(&mac_policy_rms); rm_wlock(&mac_policy_rm); #endif } static void mac_policy_xunlock(void) { #ifndef MAC_STATIC if (!mac_late) return; rm_wunlock(&mac_policy_rm); rms_wunlock(&mac_policy_rms); #endif } static void mac_policy_xlock_assert(void) { #ifndef MAC_STATIC if (!mac_late) return; rm_assert(&mac_policy_rm, RA_WLOCKED); #endif } /* * Initialize the MAC subsystem, including appropriate SMP locks. */ static void mac_init(void) { LIST_INIT(&mac_static_policy_list); LIST_INIT(&mac_policy_list); mac_labelzone_init(); #ifndef MAC_STATIC rm_init_flags(&mac_policy_rm, "mac_policy_rm", RM_NOWITNESS | RM_RECURSE); rms_init(&mac_policy_rms, "mac_policy_rms"); #endif } /* * For the purposes of modules that want to know if they were loaded "early", * set the mac_late flag once we've processed modules either linked into the * kernel, or loaded before the kernel startup. */ static void mac_late_init(void) { mac_late = 1; } /* * Given a policy, derive from its set of non-NULL label init methods what * object types the policy is interested in. */ static uint64_t mac_policy_getlabeled(struct mac_policy_conf *mpc) { uint64_t labeled; #define MPC_FLAG(method, flag) \ if (mpc->mpc_ops->mpo_ ## method != NULL) \ labeled |= (flag); \ labeled = 0; MPC_FLAG(cred_init_label, MPC_OBJECT_CRED); MPC_FLAG(proc_init_label, MPC_OBJECT_PROC); MPC_FLAG(vnode_init_label, MPC_OBJECT_VNODE); MPC_FLAG(inpcb_init_label, MPC_OBJECT_INPCB); MPC_FLAG(socket_init_label, MPC_OBJECT_SOCKET); MPC_FLAG(devfs_init_label, MPC_OBJECT_DEVFS); MPC_FLAG(mbuf_init_label, MPC_OBJECT_MBUF); MPC_FLAG(ipq_init_label, MPC_OBJECT_IPQ); MPC_FLAG(ifnet_init_label, MPC_OBJECT_IFNET); MPC_FLAG(bpfdesc_init_label, MPC_OBJECT_BPFDESC); MPC_FLAG(pipe_init_label, MPC_OBJECT_PIPE); MPC_FLAG(mount_init_label, MPC_OBJECT_MOUNT); MPC_FLAG(posixsem_init_label, MPC_OBJECT_POSIXSEM); MPC_FLAG(posixshm_init_label, MPC_OBJECT_POSIXSHM); MPC_FLAG(sysvmsg_init_label, MPC_OBJECT_SYSVMSG); MPC_FLAG(sysvmsq_init_label, MPC_OBJECT_SYSVMSQ); MPC_FLAG(sysvsem_init_label, MPC_OBJECT_SYSVSEM); MPC_FLAG(sysvshm_init_label, MPC_OBJECT_SYSVSHM); MPC_FLAG(syncache_init_label, MPC_OBJECT_SYNCACHE); MPC_FLAG(ip6q_init_label, MPC_OBJECT_IP6Q); #undef MPC_FLAG return (labeled); } /* * When policies are loaded or unloaded, walk the list of registered policies * and built mac_labeled, a bitmask representing the union of all objects * requiring labels across all policies. */ static void mac_policy_update(void) { struct mac_policy_conf *mpc; mac_policy_xlock_assert(); mac_labeled = 0; mac_policy_count = 0; LIST_FOREACH(mpc, &mac_static_policy_list, mpc_list) { mac_labeled |= mac_policy_getlabeled(mpc); mac_policy_count++; } LIST_FOREACH(mpc, &mac_policy_list, mpc_list) { mac_labeled |= mac_policy_getlabeled(mpc); mac_policy_count++; } } /* * There are frequently used code paths which check for rarely installed * policies. Gross hack below enables doing it in a cheap manner. */ #define FPO(f) (offsetof(struct mac_policy_ops, mpo_##f) / sizeof(uintptr_t)) struct mac_policy_fastpath_elem { int count; bool *flag; size_t offset; }; struct mac_policy_fastpath_elem mac_policy_fastpath_array[] = { { .offset = FPO(priv_check), .flag = &mac_priv_check_fp_flag }, { .offset = FPO(priv_grant), .flag = &mac_priv_grant_fp_flag }, { .offset = FPO(vnode_check_lookup), .flag = &mac_vnode_check_lookup_fp_flag }, { .offset = FPO(vnode_check_open), .flag = &mac_vnode_check_open_fp_flag }, { .offset = FPO(vnode_check_stat), .flag = &mac_vnode_check_stat_fp_flag }, { .offset = FPO(vnode_check_read), .flag = &mac_vnode_check_read_fp_flag }, { .offset = FPO(vnode_check_write), .flag = &mac_vnode_check_write_fp_flag }, { .offset = FPO(vnode_check_mmap), .flag = &mac_vnode_check_mmap_fp_flag }, { .offset = FPO(vnode_check_poll), .flag = &mac_vnode_check_poll_fp_flag }, { .offset = FPO(vnode_check_rename_from), .flag = &mac_vnode_check_rename_from_fp_flag }, { .offset = FPO(vnode_check_access), .flag = &mac_vnode_check_access_fp_flag }, + { .offset = FPO(pipe_check_stat), + .flag = &mac_pipe_check_stat_fp_flag }, + { .offset = FPO(pipe_check_poll), + .flag = &mac_pipe_check_poll_fp_flag }, }; static void mac_policy_fastpath_enable(struct mac_policy_fastpath_elem *mpfe) { MPASS(mpfe->count >= 0); mpfe->count++; if (mpfe->count == 1) { MPASS(*mpfe->flag == false); *mpfe->flag = true; } } static void mac_policy_fastpath_disable(struct mac_policy_fastpath_elem *mpfe) { MPASS(mpfe->count >= 1); mpfe->count--; if (mpfe->count == 0) { MPASS(*mpfe->flag == true); *mpfe->flag = false; } } static void mac_policy_fastpath_register(struct mac_policy_conf *mpc) { struct mac_policy_fastpath_elem *mpfe; uintptr_t **ops; int i; mac_policy_xlock_assert(); ops = (uintptr_t **)mpc->mpc_ops; for (i = 0; i < nitems(mac_policy_fastpath_array); i++) { mpfe = &mac_policy_fastpath_array[i]; if (ops[mpfe->offset] != NULL) mac_policy_fastpath_enable(mpfe); } } static void mac_policy_fastpath_unregister(struct mac_policy_conf *mpc) { struct mac_policy_fastpath_elem *mpfe; uintptr_t **ops; int i; mac_policy_xlock_assert(); ops = (uintptr_t **)mpc->mpc_ops; for (i = 0; i < nitems(mac_policy_fastpath_array); i++) { mpfe = &mac_policy_fastpath_array[i]; if (ops[mpfe->offset] != NULL) mac_policy_fastpath_disable(mpfe); } } #undef FPO static int mac_policy_register(struct mac_policy_conf *mpc) { struct mac_policy_conf *tmpc; int error, slot, static_entry; error = 0; /* * We don't technically need exclusive access while !mac_late, but * hold it for assertion consistency. */ mac_policy_xlock(); /* * If the module can potentially be unloaded, or we're loading late, * we have to stick it in the non-static list and pay an extra * performance overhead. Otherwise, we can pay a light locking cost * and stick it in the static list. */ static_entry = (!mac_late && !(mpc->mpc_loadtime_flags & MPC_LOADTIME_FLAG_UNLOADOK)); if (static_entry) { LIST_FOREACH(tmpc, &mac_static_policy_list, mpc_list) { if (strcmp(tmpc->mpc_name, mpc->mpc_name) == 0) { error = EEXIST; goto out; } } } else { LIST_FOREACH(tmpc, &mac_policy_list, mpc_list) { if (strcmp(tmpc->mpc_name, mpc->mpc_name) == 0) { error = EEXIST; goto out; } } } if (mpc->mpc_field_off != NULL) { slot = ffs(mac_slot_offsets_free); if (slot == 0) { error = ENOMEM; goto out; } slot--; mac_slot_offsets_free &= ~(1 << slot); *mpc->mpc_field_off = slot; } mpc->mpc_runtime_flags |= MPC_RUNTIME_FLAG_REGISTERED; /* * If we're loading a MAC module after the framework has initialized, * it has to go into the dynamic list. If we're loading it before * we've finished initializing, it can go into the static list with * weaker locker requirements. */ if (static_entry) LIST_INSERT_HEAD(&mac_static_policy_list, mpc, mpc_list); else LIST_INSERT_HEAD(&mac_policy_list, mpc, mpc_list); /* * Per-policy initialization. Currently, this takes place under the * exclusive lock, so policies must not sleep in their init method. * In the future, we may want to separate "init" from "start", with * "init" occurring without the lock held. Likewise, on tear-down, * breaking out "stop" from "destroy". */ if (mpc->mpc_ops->mpo_init != NULL) (*(mpc->mpc_ops->mpo_init))(mpc); mac_policy_fastpath_register(mpc); mac_policy_update(); SDT_PROBE1(mac, , policy, register, mpc); printf("Security policy loaded: %s (%s)\n", mpc->mpc_fullname, mpc->mpc_name); out: mac_policy_xunlock(); return (error); } static int mac_policy_unregister(struct mac_policy_conf *mpc) { /* * If we fail the load, we may get a request to unload. Check to see * if we did the run-time registration, and if not, silently succeed. */ mac_policy_xlock(); if ((mpc->mpc_runtime_flags & MPC_RUNTIME_FLAG_REGISTERED) == 0) { mac_policy_xunlock(); return (0); } #if 0 /* * Don't allow unloading modules with private data. */ if (mpc->mpc_field_off != NULL) { mac_policy_xunlock(); return (EBUSY); } #endif /* * Only allow the unload to proceed if the module is unloadable by * its own definition. */ if ((mpc->mpc_loadtime_flags & MPC_LOADTIME_FLAG_UNLOADOK) == 0) { mac_policy_xunlock(); return (EBUSY); } mac_policy_fastpath_unregister(mpc); if (mpc->mpc_ops->mpo_destroy != NULL) (*(mpc->mpc_ops->mpo_destroy))(mpc); LIST_REMOVE(mpc, mpc_list); mpc->mpc_runtime_flags &= ~MPC_RUNTIME_FLAG_REGISTERED; mac_policy_update(); mac_policy_xunlock(); SDT_PROBE1(mac, , policy, unregister, mpc); printf("Security policy unload: %s (%s)\n", mpc->mpc_fullname, mpc->mpc_name); return (0); } /* * Allow MAC policy modules to register during boot, etc. */ int mac_policy_modevent(module_t mod, int type, void *data) { struct mac_policy_conf *mpc; int error; error = 0; mpc = (struct mac_policy_conf *) data; #ifdef MAC_STATIC if (mac_late) { printf("mac_policy_modevent: MAC_STATIC and late\n"); return (EBUSY); } #endif SDT_PROBE2(mac, , policy, modevent, type, mpc); switch (type) { case MOD_LOAD: if (mpc->mpc_loadtime_flags & MPC_LOADTIME_FLAG_NOTLATE && mac_late) { printf("mac_policy_modevent: can't load %s policy " "after booting\n", mpc->mpc_name); error = EBUSY; break; } error = mac_policy_register(mpc); break; case MOD_UNLOAD: /* Don't unregister the module if it was never registered. */ if ((mpc->mpc_runtime_flags & MPC_RUNTIME_FLAG_REGISTERED) != 0) error = mac_policy_unregister(mpc); else error = 0; break; default: error = EOPNOTSUPP; break; } return (error); } /* * Define an error value precedence, and given two arguments, selects the * value with the higher precedence. */ int mac_error_select(int error1, int error2) { /* Certain decision-making errors take top priority. */ if (error1 == EDEADLK || error2 == EDEADLK) return (EDEADLK); /* Invalid arguments should be reported where possible. */ if (error1 == EINVAL || error2 == EINVAL) return (EINVAL); /* Precedence goes to "visibility", with both process and file. */ if (error1 == ESRCH || error2 == ESRCH) return (ESRCH); if (error1 == ENOENT || error2 == ENOENT) return (ENOENT); /* Precedence goes to DAC/MAC protections. */ if (error1 == EACCES || error2 == EACCES) return (EACCES); /* Precedence goes to privilege. */ if (error1 == EPERM || error2 == EPERM) return (EPERM); /* Precedence goes to error over success; otherwise, arbitrary. */ if (error1 != 0) return (error1); return (error2); } int mac_check_structmac_consistent(struct mac *mac) { /* Require that labels have a non-zero length. */ if (mac->m_buflen > MAC_MAX_LABEL_BUF_LEN || mac->m_buflen <= sizeof("")) return (EINVAL); return (0); } SYSINIT(mac, SI_SUB_MAC, SI_ORDER_FIRST, mac_init, NULL); SYSINIT(mac_late, SI_SUB_MAC_LATE, SI_ORDER_FIRST, mac_late_init, NULL); Index: head/sys/security/mac/mac_framework.h =================================================================== --- head/sys/security/mac/mac_framework.h (revision 367832) +++ head/sys/security/mac/mac_framework.h (revision 367833) @@ -1,622 +1,643 @@ /*- * Copyright (c) 1999-2002, 2007-2011 Robert N. M. Watson * Copyright (c) 2001-2005 Networks Associates Technology, Inc. * Copyright (c) 2005-2006 SPARTA, Inc. * All rights reserved. * * This software was developed by Robert Watson for the TrustedBSD Project. * * This software was developed for the FreeBSD Project in part by Network * Associates Laboratories, the Security Research Division of Network * Associates, Inc. under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), * as part of the DARPA CHATS research program. * * This software was enhanced by SPARTA ISSO under SPAWAR contract * N66001-04-C-6019 ("SEFOS"). * * This software was developed at the University of Cambridge Computer * Laboratory with support from a grant from Google, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * Kernel interface for Mandatory Access Control -- how kernel services * interact with the TrustedBSD MAC Framework. */ #ifndef _SECURITY_MAC_MAC_FRAMEWORK_H_ #define _SECURITY_MAC_MAC_FRAMEWORK_H_ #ifndef _KERNEL #error "no user-serviceable parts inside" #endif struct auditinfo; struct auditinfo_addr; struct bpf_d; struct cdev; struct componentname; struct devfs_dirent; struct ifnet; struct ifreq; struct image_params; struct inpcb; struct ip6q; struct ipq; struct ksem; struct label; struct m_tag; struct mac; struct mbuf; struct mount; struct msg; struct msqid_kernel; struct proc; struct semid_kernel; struct shmfd; struct shmid_kernel; struct sockaddr; struct socket; struct sysctl_oid; struct sysctl_req; struct pipepair; struct thread; struct timespec; struct ucred; struct vattr; struct vnode; struct vop_setlabel_args; #include /* XXX acl_type_t */ #include /* accmode_t */ /* * Entry points to the TrustedBSD MAC Framework from the remainder of the * kernel: entry points are named based on a principle object type and an * action relating to it. They are sorted alphabetically first by object * type and then action. In some situations, the principle object type is * obvious, and in other cases, less so as multiple objects may be inolved * in the operation. */ int mac_bpfdesc_check_receive(struct bpf_d *d, struct ifnet *ifp); void mac_bpfdesc_create(struct ucred *cred, struct bpf_d *d); void mac_bpfdesc_create_mbuf(struct bpf_d *d, struct mbuf *m); void mac_bpfdesc_destroy(struct bpf_d *); void mac_bpfdesc_init(struct bpf_d *); void mac_cred_associate_nfsd(struct ucred *cred); int mac_cred_check_setaudit(struct ucred *cred, struct auditinfo *ai); int mac_cred_check_setaudit_addr(struct ucred *cred, struct auditinfo_addr *aia); int mac_cred_check_setauid(struct ucred *cred, uid_t auid); int mac_cred_check_setegid(struct ucred *cred, gid_t egid); int mac_cred_check_seteuid(struct ucred *cred, uid_t euid); int mac_cred_check_setgid(struct ucred *cred, gid_t gid); int mac_cred_check_setgroups(struct ucred *cred, int ngroups, gid_t *gidset); int mac_cred_check_setregid(struct ucred *cred, gid_t rgid, gid_t egid); int mac_cred_check_setresgid(struct ucred *cred, gid_t rgid, gid_t egid, gid_t sgid); int mac_cred_check_setresuid(struct ucred *cred, uid_t ruid, uid_t euid, uid_t suid); int mac_cred_check_setreuid(struct ucred *cred, uid_t ruid, uid_t euid); int mac_cred_check_setuid(struct ucred *cred, uid_t uid); int mac_cred_check_visible(struct ucred *cr1, struct ucred *cr2); void mac_cred_copy(struct ucred *cr1, struct ucred *cr2); void mac_cred_create_init(struct ucred *cred); void mac_cred_create_swapper(struct ucred *cred); void mac_cred_destroy(struct ucred *); void mac_cred_init(struct ucred *); void mac_devfs_create_device(struct ucred *cred, struct mount *mp, struct cdev *dev, struct devfs_dirent *de); void mac_devfs_create_directory(struct mount *mp, char *dirname, int dirnamelen, struct devfs_dirent *de); void mac_devfs_create_symlink(struct ucred *cred, struct mount *mp, struct devfs_dirent *dd, struct devfs_dirent *de); void mac_devfs_destroy(struct devfs_dirent *); void mac_devfs_init(struct devfs_dirent *); void mac_devfs_update(struct mount *mp, struct devfs_dirent *de, struct vnode *vp); void mac_devfs_vnode_associate(struct mount *mp, struct devfs_dirent *de, struct vnode *vp); int mac_ifnet_check_transmit(struct ifnet *ifp, struct mbuf *m); void mac_ifnet_create(struct ifnet *ifp); void mac_ifnet_create_mbuf(struct ifnet *ifp, struct mbuf *m); void mac_ifnet_destroy(struct ifnet *); void mac_ifnet_init(struct ifnet *); int mac_ifnet_ioctl_get(struct ucred *cred, struct ifreq *ifr, struct ifnet *ifp); int mac_ifnet_ioctl_set(struct ucred *cred, struct ifreq *ifr, struct ifnet *ifp); int mac_inpcb_check_deliver(struct inpcb *inp, struct mbuf *m); int mac_inpcb_check_visible(struct ucred *cred, struct inpcb *inp); void mac_inpcb_create(struct socket *so, struct inpcb *inp); void mac_inpcb_create_mbuf(struct inpcb *inp, struct mbuf *m); void mac_inpcb_destroy(struct inpcb *); int mac_inpcb_init(struct inpcb *, int); void mac_inpcb_sosetlabel(struct socket *so, struct inpcb *inp); void mac_ip6q_create(struct mbuf *m, struct ip6q *q6); void mac_ip6q_destroy(struct ip6q *q6); int mac_ip6q_init(struct ip6q *q6, int); int mac_ip6q_match(struct mbuf *m, struct ip6q *q6); void mac_ip6q_reassemble(struct ip6q *q6, struct mbuf *m); void mac_ip6q_update(struct mbuf *m, struct ip6q *q6); void mac_ipq_create(struct mbuf *m, struct ipq *q); void mac_ipq_destroy(struct ipq *q); int mac_ipq_init(struct ipq *q, int); int mac_ipq_match(struct mbuf *m, struct ipq *q); void mac_ipq_reassemble(struct ipq *q, struct mbuf *m); void mac_ipq_update(struct mbuf *m, struct ipq *q); int mac_kenv_check_dump(struct ucred *cred); int mac_kenv_check_get(struct ucred *cred, char *name); int mac_kenv_check_set(struct ucred *cred, char *name, char *value); int mac_kenv_check_unset(struct ucred *cred, char *name); int mac_kld_check_load(struct ucred *cred, struct vnode *vp); int mac_kld_check_stat(struct ucred *cred); void mac_mbuf_copy(struct mbuf *, struct mbuf *); int mac_mbuf_init(struct mbuf *, int); void mac_mbuf_tag_copy(struct m_tag *, struct m_tag *); void mac_mbuf_tag_destroy(struct m_tag *); int mac_mbuf_tag_init(struct m_tag *, int); int mac_mount_check_stat(struct ucred *cred, struct mount *mp); void mac_mount_create(struct ucred *cred, struct mount *mp); void mac_mount_destroy(struct mount *); void mac_mount_init(struct mount *); void mac_netinet_arp_send(struct ifnet *ifp, struct mbuf *m); void mac_netinet_firewall_reply(struct mbuf *mrecv, struct mbuf *msend); void mac_netinet_firewall_send(struct mbuf *m); void mac_netinet_fragment(struct mbuf *m, struct mbuf *frag); void mac_netinet_icmp_reply(struct mbuf *mrecv, struct mbuf *msend); void mac_netinet_icmp_replyinplace(struct mbuf *m); void mac_netinet_igmp_send(struct ifnet *ifp, struct mbuf *m); void mac_netinet_tcp_reply(struct mbuf *m); void mac_netinet6_nd6_send(struct ifnet *ifp, struct mbuf *m); int mac_pipe_check_ioctl(struct ucred *cred, struct pipepair *pp, unsigned long cmd, void *data); -int mac_pipe_check_poll(struct ucred *cred, struct pipepair *pp); -int mac_pipe_check_read(struct ucred *cred, struct pipepair *pp); +int mac_pipe_check_poll_impl(struct ucred *cred, struct pipepair *pp); +#ifdef MAC +extern bool mac_pipe_check_poll_fp_flag; +#else +#define mac_pipe_check_poll_fp_flag 0 +#endif +#define mac_pipe_check_poll_enabled() __predict_false(mac_pipe_check_poll_fp_flag) +static inline int +mac_pipe_check_poll(struct ucred *cred, struct pipepair *pp) +{ + + if (mac_pipe_check_poll_enabled()) + return (mac_pipe_check_poll_impl(cred, pp)); + return (0); +} + +#ifdef MAC +extern bool mac_pipe_check_stat_fp_flag; +#else +#define mac_pipe_check_stat_fp_flag 0 +#endif +#define mac_pipe_check_stat_enabled() __predict_false(mac_pipe_check_stat_fp_flag) int mac_pipe_check_stat(struct ucred *cred, struct pipepair *pp); +int mac_pipe_check_read(struct ucred *cred, struct pipepair *pp); int mac_pipe_check_write(struct ucred *cred, struct pipepair *pp); void mac_pipe_create(struct ucred *cred, struct pipepair *pp); void mac_pipe_destroy(struct pipepair *); void mac_pipe_init(struct pipepair *); int mac_pipe_label_set(struct ucred *cred, struct pipepair *pp, struct label *label); int mac_posixsem_check_getvalue(struct ucred *active_cred, struct ucred *file_cred, struct ksem *ks); int mac_posixsem_check_open(struct ucred *cred, struct ksem *ks); int mac_posixsem_check_post(struct ucred *active_cred, struct ucred *file_cred, struct ksem *ks); int mac_posixsem_check_setmode(struct ucred *cred, struct ksem *ks, mode_t mode); int mac_posixsem_check_setowner(struct ucred *cred, struct ksem *ks, uid_t uid, gid_t gid); int mac_posixsem_check_stat(struct ucred *active_cred, struct ucred *file_cred, struct ksem *ks); int mac_posixsem_check_unlink(struct ucred *cred, struct ksem *ks); int mac_posixsem_check_wait(struct ucred *active_cred, struct ucred *file_cred, struct ksem *ks); void mac_posixsem_create(struct ucred *cred, struct ksem *ks); void mac_posixsem_destroy(struct ksem *); void mac_posixsem_init(struct ksem *); int mac_posixshm_check_create(struct ucred *cred, const char *path); int mac_posixshm_check_mmap(struct ucred *cred, struct shmfd *shmfd, int prot, int flags); int mac_posixshm_check_open(struct ucred *cred, struct shmfd *shmfd, accmode_t accmode); int mac_posixshm_check_read(struct ucred *active_cred, struct ucred *file_cred, struct shmfd *shmfd); int mac_posixshm_check_setmode(struct ucred *cred, struct shmfd *shmfd, mode_t mode); int mac_posixshm_check_setowner(struct ucred *cred, struct shmfd *shmfd, uid_t uid, gid_t gid); int mac_posixshm_check_stat(struct ucred *active_cred, struct ucred *file_cred, struct shmfd *shmfd); int mac_posixshm_check_truncate(struct ucred *active_cred, struct ucred *file_cred, struct shmfd *shmfd); int mac_posixshm_check_unlink(struct ucred *cred, struct shmfd *shmfd); int mac_posixshm_check_write(struct ucred *active_cred, struct ucred *file_cred, struct shmfd *shmfd); void mac_posixshm_create(struct ucred *cred, struct shmfd *shmfd); void mac_posixshm_destroy(struct shmfd *); void mac_posixshm_init(struct shmfd *); int mac_priv_check_impl(struct ucred *cred, int priv); #ifdef MAC extern bool mac_priv_check_fp_flag; #else #define mac_priv_check_fp_flag 0 #endif #define mac_priv_check_enabled() __predict_false(mac_priv_check_fp_flag) static inline int mac_priv_check(struct ucred *cred, int priv) { if (mac_priv_check_enabled()) return (mac_priv_check_impl(cred, priv)); return (0); } int mac_priv_grant_impl(struct ucred *cred, int priv); #ifdef MAC extern bool mac_priv_grant_fp_flag; #else #define mac_priv_grant_fp_flag 0 #endif #define mac_priv_grant_enabled() __predict_false(mac_priv_grant_fp_flag) static inline int mac_priv_grant(struct ucred *cred, int priv) { if (mac_priv_grant_enabled()) return (mac_priv_grant_impl(cred, priv)); return (EPERM); } int mac_proc_check_debug(struct ucred *cred, struct proc *p); int mac_proc_check_sched(struct ucred *cred, struct proc *p); int mac_proc_check_signal(struct ucred *cred, struct proc *p, int signum); int mac_proc_check_wait(struct ucred *cred, struct proc *p); void mac_proc_destroy(struct proc *); void mac_proc_init(struct proc *); void mac_proc_vm_revoke(struct thread *td); int mac_execve_enter(struct image_params *imgp, struct mac *mac_p); void mac_execve_exit(struct image_params *imgp); void mac_execve_interpreter_enter(struct vnode *interpvp, struct label **interplabel); void mac_execve_interpreter_exit(struct label *interpvplabel); int mac_socket_check_accept(struct ucred *cred, struct socket *so); int mac_socket_check_bind(struct ucred *cred, struct socket *so, struct sockaddr *sa); int mac_socket_check_connect(struct ucred *cred, struct socket *so, struct sockaddr *sa); int mac_socket_check_create(struct ucred *cred, int domain, int type, int proto); int mac_socket_check_deliver(struct socket *so, struct mbuf *m); int mac_socket_check_listen(struct ucred *cred, struct socket *so); int mac_socket_check_poll(struct ucred *cred, struct socket *so); int mac_socket_check_receive(struct ucred *cred, struct socket *so); int mac_socket_check_send(struct ucred *cred, struct socket *so); int mac_socket_check_stat(struct ucred *cred, struct socket *so); int mac_socket_check_visible(struct ucred *cred, struct socket *so); void mac_socket_create_mbuf(struct socket *so, struct mbuf *m); void mac_socket_create(struct ucred *cred, struct socket *so); void mac_socket_destroy(struct socket *); int mac_socket_init(struct socket *, int); void mac_socket_newconn(struct socket *oldso, struct socket *newso); int mac_getsockopt_label(struct ucred *cred, struct socket *so, struct mac *extmac); int mac_getsockopt_peerlabel(struct ucred *cred, struct socket *so, struct mac *extmac); int mac_setsockopt_label(struct ucred *cred, struct socket *so, struct mac *extmac); void mac_socketpeer_set_from_mbuf(struct mbuf *m, struct socket *so); void mac_socketpeer_set_from_socket(struct socket *oldso, struct socket *newso); void mac_syncache_create(struct label *l, struct inpcb *inp); void mac_syncache_create_mbuf(struct label *l, struct mbuf *m); void mac_syncache_destroy(struct label **l); int mac_syncache_init(struct label **l); int mac_system_check_acct(struct ucred *cred, struct vnode *vp); int mac_system_check_audit(struct ucred *cred, void *record, int length); int mac_system_check_auditctl(struct ucred *cred, struct vnode *vp); int mac_system_check_auditon(struct ucred *cred, int cmd); int mac_system_check_reboot(struct ucred *cred, int howto); int mac_system_check_swapon(struct ucred *cred, struct vnode *vp); int mac_system_check_swapoff(struct ucred *cred, struct vnode *vp); int mac_system_check_sysctl(struct ucred *cred, struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); void mac_sysvmsg_cleanup(struct msg *msgptr); void mac_sysvmsg_create(struct ucred *cred, struct msqid_kernel *msqkptr, struct msg *msgptr); void mac_sysvmsg_destroy(struct msg *); void mac_sysvmsg_init(struct msg *); int mac_sysvmsq_check_msgmsq(struct ucred *cred, struct msg *msgptr, struct msqid_kernel *msqkptr); int mac_sysvmsq_check_msgrcv(struct ucred *cred, struct msg *msgptr); int mac_sysvmsq_check_msgrmid(struct ucred *cred, struct msg *msgptr); int mac_sysvmsq_check_msqctl(struct ucred *cred, struct msqid_kernel *msqkptr, int cmd); int mac_sysvmsq_check_msqget(struct ucred *cred, struct msqid_kernel *msqkptr); int mac_sysvmsq_check_msqrcv(struct ucred *cred, struct msqid_kernel *msqkptr); int mac_sysvmsq_check_msqsnd(struct ucred *cred, struct msqid_kernel *msqkptr); void mac_sysvmsq_cleanup(struct msqid_kernel *msqkptr); void mac_sysvmsq_create(struct ucred *cred, struct msqid_kernel *msqkptr); void mac_sysvmsq_destroy(struct msqid_kernel *); void mac_sysvmsq_init(struct msqid_kernel *); int mac_sysvsem_check_semctl(struct ucred *cred, struct semid_kernel *semakptr, int cmd); int mac_sysvsem_check_semget(struct ucred *cred, struct semid_kernel *semakptr); int mac_sysvsem_check_semop(struct ucred *cred, struct semid_kernel *semakptr, size_t accesstype); void mac_sysvsem_cleanup(struct semid_kernel *semakptr); void mac_sysvsem_create(struct ucred *cred, struct semid_kernel *semakptr); void mac_sysvsem_destroy(struct semid_kernel *); void mac_sysvsem_init(struct semid_kernel *); int mac_sysvshm_check_shmat(struct ucred *cred, struct shmid_kernel *shmsegptr, int shmflg); int mac_sysvshm_check_shmctl(struct ucred *cred, struct shmid_kernel *shmsegptr, int cmd); int mac_sysvshm_check_shmdt(struct ucred *cred, struct shmid_kernel *shmsegptr); int mac_sysvshm_check_shmget(struct ucred *cred, struct shmid_kernel *shmsegptr, int shmflg); void mac_sysvshm_cleanup(struct shmid_kernel *shmsegptr); void mac_sysvshm_create(struct ucred *cred, struct shmid_kernel *shmsegptr); void mac_sysvshm_destroy(struct shmid_kernel *); void mac_sysvshm_init(struct shmid_kernel *); void mac_thread_userret(struct thread *td); #if defined(MAC) && defined(DEBUG_VFS_LOCKS) void mac_vnode_assert_locked(struct vnode *vp, const char *func); #else #define mac_vnode_assert_locked(vp, func) do { } while (0) #endif int mac_vnode_associate_extattr(struct mount *mp, struct vnode *vp); void mac_vnode_associate_singlelabel(struct mount *mp, struct vnode *vp); int mac_vnode_check_access_impl(struct ucred *cred, struct vnode *dvp, accmode_t accmode); extern bool mac_vnode_check_access_fp_flag; #define mac_vnode_check_access_enabled() __predict_false(mac_vnode_check_access_fp_flag) static inline int mac_vnode_check_access(struct ucred *cred, struct vnode *dvp, accmode_t accmode) { mac_vnode_assert_locked(dvp, "mac_vnode_check_access"); if (mac_vnode_check_access_enabled()) return (mac_vnode_check_access_impl(cred, dvp, accmode)); return (0); } int mac_vnode_check_chdir(struct ucred *cred, struct vnode *dvp); int mac_vnode_check_chroot(struct ucred *cred, struct vnode *dvp); int mac_vnode_check_create(struct ucred *cred, struct vnode *dvp, struct componentname *cnp, struct vattr *vap); int mac_vnode_check_deleteacl(struct ucred *cred, struct vnode *vp, acl_type_t type); int mac_vnode_check_deleteextattr(struct ucred *cred, struct vnode *vp, int attrnamespace, const char *name); int mac_vnode_check_exec(struct ucred *cred, struct vnode *vp, struct image_params *imgp); int mac_vnode_check_getacl(struct ucred *cred, struct vnode *vp, acl_type_t type); int mac_vnode_check_getextattr(struct ucred *cred, struct vnode *vp, int attrnamespace, const char *name); int mac_vnode_check_link(struct ucred *cred, struct vnode *dvp, struct vnode *vp, struct componentname *cnp); int mac_vnode_check_listextattr(struct ucred *cred, struct vnode *vp, int attrnamespace); int mac_vnode_check_lookup_impl(struct ucred *cred, struct vnode *dvp, struct componentname *cnp); #ifdef MAC extern bool mac_vnode_check_lookup_fp_flag; #else #define mac_vnode_check_lookup_fp_flag 0 #endif #define mac_vnode_check_lookup_enabled() __predict_false(mac_vnode_check_lookup_fp_flag) static inline int mac_vnode_check_lookup(struct ucred *cred, struct vnode *dvp, struct componentname *cnp) { mac_vnode_assert_locked(dvp, "mac_vnode_check_lookup"); if (mac_vnode_check_lookup_enabled()) return (mac_vnode_check_lookup_impl(cred, dvp, cnp)); return (0); } int mac_vnode_check_mmap_impl(struct ucred *cred, struct vnode *vp, int prot, int flags); #ifdef MAC extern bool mac_vnode_check_mmap_fp_flag; #else #define mac_vnode_check_mmap_fp_flag 0 #endif #define mac_vnode_check_mmap_enabled() __predict_false(mac_vnode_check_mmap_fp_flag) static inline int mac_vnode_check_mmap(struct ucred *cred, struct vnode *vp, int prot, int flags) { mac_vnode_assert_locked(vp, "mac_vnode_check_mmap"); if (mac_vnode_check_mmap_enabled()) return (mac_vnode_check_mmap_impl(cred, vp, prot, flags)); return (0); } int mac_vnode_check_open_impl(struct ucred *cred, struct vnode *vp, accmode_t accmode); #ifdef MAC extern bool mac_vnode_check_open_fp_flag; #else #define mac_vnode_check_open_fp_flag 0 #endif #define mac_vnode_check_open_enabled() __predict_false(mac_vnode_check_open_fp_flag) static inline int mac_vnode_check_open(struct ucred *cred, struct vnode *vp, accmode_t accmode) { mac_vnode_assert_locked(vp, "mac_vnode_check_open"); if (mac_vnode_check_open_enabled()) return (mac_vnode_check_open_impl(cred, vp, accmode)); return (0); } int mac_vnode_check_mprotect(struct ucred *cred, struct vnode *vp, int prot); #define mac_vnode_check_poll_enabled() __predict_false(mac_vnode_check_poll_fp_flag) #ifdef MAC extern bool mac_vnode_check_poll_fp_flag; int mac_vnode_check_poll(struct ucred *active_cred, struct ucred *file_cred, struct vnode *vp); #else #define mac_vnode_check_poll_fp_flag 0 static inline int mac_vnode_check_poll(struct ucred *active_cred, struct ucred *file_cred, struct vnode *vp) { return (0); } #endif int mac_vnode_check_readdir(struct ucred *cred, struct vnode *vp); int mac_vnode_check_readlink(struct ucred *cred, struct vnode *vp); #define mac_vnode_check_rename_from_enabled() __predict_false(mac_vnode_check_rename_from_fp_flag) #ifdef MAC extern bool mac_vnode_check_rename_from_fp_flag; #endif int mac_vnode_check_rename_from(struct ucred *cred, struct vnode *dvp, struct vnode *vp, struct componentname *cnp); int mac_vnode_check_rename_to(struct ucred *cred, struct vnode *dvp, struct vnode *vp, int samedir, struct componentname *cnp); int mac_vnode_check_revoke(struct ucred *cred, struct vnode *vp); int mac_vnode_check_setacl(struct ucred *cred, struct vnode *vp, acl_type_t type, struct acl *acl); int mac_vnode_check_setextattr(struct ucred *cred, struct vnode *vp, int attrnamespace, const char *name); int mac_vnode_check_setflags(struct ucred *cred, struct vnode *vp, u_long flags); int mac_vnode_check_setmode(struct ucred *cred, struct vnode *vp, mode_t mode); int mac_vnode_check_setowner(struct ucred *cred, struct vnode *vp, uid_t uid, gid_t gid); int mac_vnode_check_setutimes(struct ucred *cred, struct vnode *vp, struct timespec atime, struct timespec mtime); int mac_vnode_check_stat_impl(struct ucred *active_cred, struct ucred *file_cred, struct vnode *vp); #ifdef MAC extern bool mac_vnode_check_stat_fp_flag; #else #define mac_vnode_check_stat_fp_flag 0 #endif #define mac_vnode_check_stat_enabled() __predict_false(mac_vnode_check_stat_fp_flag) static inline int mac_vnode_check_stat(struct ucred *active_cred, struct ucred *file_cred, struct vnode *vp) { mac_vnode_assert_locked(vp, "mac_vnode_check_stat"); if (mac_vnode_check_stat_enabled()) return (mac_vnode_check_stat_impl(active_cred, file_cred, vp)); return (0); } int mac_vnode_check_read_impl(struct ucred *active_cred, struct ucred *file_cred, struct vnode *vp); #ifdef MAC extern bool mac_vnode_check_read_fp_flag; #else #define mac_vnode_check_read_fp_flag 0 #endif #define mac_vnode_check_read_enabled() __predict_false(mac_vnode_check_read_fp_flag) static inline int mac_vnode_check_read(struct ucred *active_cred, struct ucred *file_cred, struct vnode *vp) { mac_vnode_assert_locked(vp, "mac_vnode_check_read"); if (mac_vnode_check_read_enabled()) return (mac_vnode_check_read_impl(active_cred, file_cred, vp)); return (0); } int mac_vnode_check_write_impl(struct ucred *active_cred, struct ucred *file_cred, struct vnode *vp); #ifdef MAC extern bool mac_vnode_check_write_fp_flag; #else #define mac_vnode_check_write_fp_flag 0 #endif #define mac_vnode_check_write_enabled() __predict_false(mac_vnode_check_write_fp_flag) static inline int mac_vnode_check_write(struct ucred *active_cred, struct ucred *file_cred, struct vnode *vp) { mac_vnode_assert_locked(vp, "mac_vnode_check_write"); if (mac_vnode_check_write_enabled()) return (mac_vnode_check_write_impl(active_cred, file_cred, vp)); return (0); } int mac_vnode_check_unlink(struct ucred *cred, struct vnode *dvp, struct vnode *vp, struct componentname *cnp); void mac_vnode_copy_label(struct label *, struct label *); void mac_vnode_init(struct vnode *); int mac_vnode_create_extattr(struct ucred *cred, struct mount *mp, struct vnode *dvp, struct vnode *vp, struct componentname *cnp); void mac_vnode_destroy(struct vnode *); void mac_vnode_execve_transition(struct ucred *oldcred, struct ucred *newcred, struct vnode *vp, struct label *interpvplabel, struct image_params *imgp); int mac_vnode_execve_will_transition(struct ucred *cred, struct vnode *vp, struct label *interpvplabel, struct image_params *imgp); void mac_vnode_relabel(struct ucred *cred, struct vnode *vp, struct label *newlabel); /* * Calls to help various file systems implement labeling functionality using * their existing EA implementation. */ int vop_stdsetlabel_ea(struct vop_setlabel_args *ap); #endif /* !_SECURITY_MAC_MAC_FRAMEWORK_H_ */ Index: head/sys/security/mac/mac_pipe.c =================================================================== --- head/sys/security/mac/mac_pipe.c (revision 367832) +++ head/sys/security/mac/mac_pipe.c (revision 367833) @@ -1,259 +1,259 @@ /*- * Copyright (c) 2002-2003 Networks Associates Technology, Inc. * Copyright (c) 2006 SPARTA, Inc. * Copyright (c) 2009 Robert N. M. Watson * All rights reserved. * * This software was developed for the FreeBSD Project in part by Network * Associates Laboratories, the Security Research Division of Network * Associates, Inc. under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), * as part of the DARPA CHATS research program. * * This software was enhanced by SPARTA ISSO under SPAWAR contract * N66001-04-C-6019 ("SEFOS"). * * This software was developed at the University of Cambridge Computer * Laboratory with support from a grant from Google, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_mac.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct label * mac_pipe_label_alloc(void) { struct label *label; label = mac_labelzone_alloc(M_WAITOK); MAC_POLICY_PERFORM(pipe_init_label, label); return (label); } void mac_pipe_init(struct pipepair *pp) { if (mac_labeled & MPC_OBJECT_PIPE) pp->pp_label = mac_pipe_label_alloc(); else pp->pp_label = NULL; } void mac_pipe_label_free(struct label *label) { MAC_POLICY_PERFORM_NOSLEEP(pipe_destroy_label, label); mac_labelzone_free(label); } void mac_pipe_destroy(struct pipepair *pp) { if (pp->pp_label != NULL) { mac_pipe_label_free(pp->pp_label); pp->pp_label = NULL; } } void mac_pipe_copy_label(struct label *src, struct label *dest) { MAC_POLICY_PERFORM_NOSLEEP(pipe_copy_label, src, dest); } int mac_pipe_externalize_label(struct label *label, char *elements, char *outbuf, size_t outbuflen) { int error; MAC_POLICY_EXTERNALIZE(pipe, label, elements, outbuf, outbuflen); return (error); } int mac_pipe_internalize_label(struct label *label, char *string) { int error; MAC_POLICY_INTERNALIZE(pipe, label, string); return (error); } void mac_pipe_create(struct ucred *cred, struct pipepair *pp) { MAC_POLICY_PERFORM_NOSLEEP(pipe_create, cred, pp, pp->pp_label); } static void mac_pipe_relabel(struct ucred *cred, struct pipepair *pp, struct label *newlabel) { MAC_POLICY_PERFORM_NOSLEEP(pipe_relabel, cred, pp, pp->pp_label, newlabel); } MAC_CHECK_PROBE_DEFINE4(pipe_check_ioctl, "struct ucred *", "struct pipepair *", "unsigned long", "void *"); int mac_pipe_check_ioctl(struct ucred *cred, struct pipepair *pp, unsigned long cmd, void *data) { int error; mtx_assert(&pp->pp_mtx, MA_OWNED); MAC_POLICY_CHECK_NOSLEEP(pipe_check_ioctl, cred, pp, pp->pp_label, cmd, data); MAC_CHECK_PROBE4(pipe_check_ioctl, error, cred, pp, cmd, data); return (error); } MAC_CHECK_PROBE_DEFINE2(pipe_check_poll, "struct ucred *", "struct pipepair *"); int -mac_pipe_check_poll(struct ucred *cred, struct pipepair *pp) +mac_pipe_check_poll_impl(struct ucred *cred, struct pipepair *pp) { int error; mtx_assert(&pp->pp_mtx, MA_OWNED); MAC_POLICY_CHECK_NOSLEEP(pipe_check_poll, cred, pp, pp->pp_label); MAC_CHECK_PROBE2(pipe_check_poll, error, cred, pp); return (error); } MAC_CHECK_PROBE_DEFINE2(pipe_check_read, "struct ucred *", "struct pipepair *"); int mac_pipe_check_read(struct ucred *cred, struct pipepair *pp) { int error; mtx_assert(&pp->pp_mtx, MA_OWNED); MAC_POLICY_CHECK_NOSLEEP(pipe_check_read, cred, pp, pp->pp_label); MAC_CHECK_PROBE2(pipe_check_read, error, cred, pp); return (error); } MAC_CHECK_PROBE_DEFINE3(pipe_check_relabel, "struct ucred *", "struct pipepair *", "struct label *"); static int mac_pipe_check_relabel(struct ucred *cred, struct pipepair *pp, struct label *newlabel) { int error; mtx_assert(&pp->pp_mtx, MA_OWNED); MAC_POLICY_CHECK_NOSLEEP(pipe_check_relabel, cred, pp, pp->pp_label, newlabel); MAC_CHECK_PROBE3(pipe_check_relabel, error, cred, pp, newlabel); return (error); } MAC_CHECK_PROBE_DEFINE2(pipe_check_stat, "struct ucred *", "struct pipepair *"); int mac_pipe_check_stat(struct ucred *cred, struct pipepair *pp) { int error; mtx_assert(&pp->pp_mtx, MA_OWNED); MAC_POLICY_CHECK_NOSLEEP(pipe_check_stat, cred, pp, pp->pp_label); MAC_CHECK_PROBE2(pipe_check_stat, error, cred, pp); return (error); } MAC_CHECK_PROBE_DEFINE2(pipe_check_write, "struct ucred *", "struct pipepair *"); int mac_pipe_check_write(struct ucred *cred, struct pipepair *pp) { int error; mtx_assert(&pp->pp_mtx, MA_OWNED); MAC_POLICY_CHECK_NOSLEEP(pipe_check_write, cred, pp, pp->pp_label); MAC_CHECK_PROBE2(pipe_check_write, error, cred, pp); return (error); } int mac_pipe_label_set(struct ucred *cred, struct pipepair *pp, struct label *label) { int error; mtx_assert(&pp->pp_mtx, MA_OWNED); error = mac_pipe_check_relabel(cred, pp, label); if (error) return (error); mac_pipe_relabel(cred, pp, label); return (0); } Index: head/sys/sys/pipe.h =================================================================== --- head/sys/sys/pipe.h (revision 367832) +++ head/sys/sys/pipe.h (revision 367833) @@ -1,147 +1,152 @@ /*- * Copyright (c) 1996 John S. Dyson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice immediately at the beginning of the file, without modification, * this list of conditions, and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Absolutely no warranty of function or purpose is made by the author * John S. Dyson. * 4. This work was done expressly for inclusion into FreeBSD. Other use * is allowed if this notation is included. * 5. Modifications may be freely made to this file if the above conditions * are met. * * $FreeBSD$ */ #ifndef _SYS_PIPE_H_ #define _SYS_PIPE_H_ #ifndef _KERNEL #error "no user-serviceable parts inside" #endif /* * Pipe buffer size, keep moderate in value, pipes take kva space. */ #ifndef PIPE_SIZE #define PIPE_SIZE 16384 #endif #ifndef BIG_PIPE_SIZE #define BIG_PIPE_SIZE (64*1024) #endif #ifndef SMALL_PIPE_SIZE #define SMALL_PIPE_SIZE PAGE_SIZE #endif /* * PIPE_MINDIRECT MUST be smaller than PIPE_SIZE and MUST be bigger * than PIPE_BUF. */ #ifndef PIPE_MINDIRECT #define PIPE_MINDIRECT 8192 #endif #define PIPENPAGES (BIG_PIPE_SIZE / PAGE_SIZE + 1) /* * See sys_pipe.c for info on what these limits mean. */ extern long maxpipekva; extern struct fileops pipeops; /* * Pipe buffer information. * Separate in, out, cnt are used to simplify calculations. * Buffered write is active when the buffer.cnt field is set. */ struct pipebuf { u_int cnt; /* number of chars currently in buffer */ u_int in; /* in pointer */ u_int out; /* out pointer */ u_int size; /* size of buffer */ caddr_t buffer; /* kva of buffer */ }; /* * Information to support direct transfers between processes for pipes. */ struct pipemapping { vm_size_t cnt; /* number of chars in buffer */ vm_size_t pos; /* current position of transfer */ int npages; /* number of pages */ vm_page_t ms[PIPENPAGES]; /* pages in source process */ }; /* * Bits in pipe_state. */ #define PIPE_ASYNC 0x004 /* Async? I/O. */ #define PIPE_WANTR 0x008 /* Reader wants some characters. */ #define PIPE_WANTW 0x010 /* Writer wants space to put characters. */ #define PIPE_WANT 0x020 /* Pipe is wanted to be run-down. */ #define PIPE_SEL 0x040 /* Pipe has a select active. */ #define PIPE_EOF 0x080 /* Pipe is in EOF condition. */ #define PIPE_LOCKFL 0x100 /* Process has exclusive access to pointers/data. */ #define PIPE_LWANT 0x200 /* Process wants exclusive access to pointers/data. */ #define PIPE_DIRECTW 0x400 /* Pipe direct write active. */ #define PIPE_DIRECTOK 0x800 /* Direct mode ok. */ -#define PIPE_NAMED 0x1000 /* Is a named pipe. */ /* + * Bits in pipe_type. + */ +#define PIPE_TYPE_NAMED 0x001 /* Is a named pipe. */ + +/* * Per-pipe data structure. * Two of these are linked together to produce bi-directional pipes. */ struct pipe { struct pipebuf pipe_buffer; /* data storage */ struct pipemapping pipe_pages; /* wired pages for direct I/O */ struct selinfo pipe_sel; /* for compat with select */ struct timespec pipe_atime; /* time of last access */ struct timespec pipe_mtime; /* time of last modify */ struct timespec pipe_ctime; /* time of status change */ struct sigio *pipe_sigio; /* information for async I/O */ struct pipe *pipe_peer; /* link with other direction */ struct pipepair *pipe_pair; /* container structure pointer */ - u_int pipe_state; /* pipe status info */ + u_short pipe_state; /* pipe status info */ + u_short pipe_type; /* pipe type info */ int pipe_busy; /* busy flag, mostly to handle rundown sanely */ int pipe_present; /* still present? */ int pipe_wgen; /* writer generation for named pipe */ ino_t pipe_ino; /* fake inode for stat(2) */ }; /* * Values for the pipe_present. */ #define PIPE_ACTIVE 1 #define PIPE_CLOSING 2 #define PIPE_FINALIZED 3 /* * Container structure to hold the two pipe endpoints, mutex, and label * pointer. */ struct pipepair { struct pipe pp_rpipe; struct pipe pp_wpipe; struct mtx pp_mtx; struct label *pp_label; }; #define PIPE_MTX(pipe) (&(pipe)->pipe_pair->pp_mtx) #define PIPE_LOCK(pipe) mtx_lock(PIPE_MTX(pipe)) #define PIPE_UNLOCK(pipe) mtx_unlock(PIPE_MTX(pipe)) #define PIPE_LOCK_ASSERT(pipe, type) mtx_assert(PIPE_MTX(pipe), (type)) void pipe_dtor(struct pipe *dpipe); int pipe_named_ctor(struct pipe **ppipe, struct thread *td); void pipeselwakeup(struct pipe *cpipe); #endif /* !_SYS_PIPE_H_ */