Index: head/contrib/capsicum-test/.gitignore =================================================================== --- head/contrib/capsicum-test/.gitignore +++ head/contrib/capsicum-test/.gitignore @@ -0,0 +1,19 @@ +capsicum-test +mini-me +mini-me.noexec +mini-me.setuid +mini-me.32 +mini-me.x32 +mini-me.64 +libgtest.a +smoketest +*.o +libcap*.deb +libcap*.dsc +libcap*.tar.gz +libcap*.changes +casper*.deb +casper*.dsc +casper*.tar.gz +casper*.changes +libcaprights.a \ No newline at end of file Index: head/contrib/capsicum-test/CONTRIBUTING.md =================================================================== --- head/contrib/capsicum-test/CONTRIBUTING.md +++ head/contrib/capsicum-test/CONTRIBUTING.md @@ -0,0 +1,20 @@ +## Contributor License Agreement ## + +Contributions to any Google project must be accompanied by a Contributor +License Agreement. This is not a copyright **assignment**, it simply gives +Google permission to use and redistribute your contributions as part of the +project. + + * If you are an individual writing original source code and you're sure you + own the intellectual property, then you'll need to sign an [individual + CLA][]. + + * If you work for a company that wants to allow you to contribute your work, + then you'll need to sign a [corporate CLA][]. + +You generally only need to submit a CLA once, so if you've already submitted +one (even if it was for a different project), you probably don't need to do it +again. + +[individual CLA]: https://developers.google.com/open-source/cla/individual +[corporate CLA]: https://developers.google.com/open-source/cla/corporate Index: head/contrib/capsicum-test/GNUmakefile =================================================================== --- head/contrib/capsicum-test/GNUmakefile +++ head/contrib/capsicum-test/GNUmakefile @@ -0,0 +1,78 @@ +OS:=$(shell uname) + +# Set ARCH to 32 or x32 for i386/x32 ABIs +ARCH?=64 +ARCHFLAG=-m$(ARCH) + +ifeq ($(OS),Linux) +PROCESSOR:=$(shell uname -p) + +ifneq ($(wildcard /usr/lib/$(PROCESSOR)-linux-gnu),) +# Can use standard Debian location for static libraries. +PLATFORM_LIBDIR=/usr/lib/$(PROCESSOR)-linux-gnu +else +# Attempt to determine library location from gcc configuration. +PLATFORM_LIBDIR=$(shell gcc -v 2>&1 | grep "Configured with:" | sed 's/.*--libdir=\(\/usr\/[^ ]*\).*/\1/g') +endif + +# Override for explicitly specified ARCHFLAG. +# Use locally compiled libcaprights in this case, on the +# assumption that any installed version is 64-bit. +ifeq ($(ARCHFLAG),-m32) +PROCESSOR=i386 +PLATFORM_LIBDIR=/usr/lib32 +LIBCAPRIGHTS=./libcaprights.a +endif +ifeq ($(ARCHFLAG),-mx32) +PROCESSOR=x32 +PLATFORM_LIBDIR=/usr/libx32 +LIBCAPRIGHTS=./libcaprights.a +endif + +# Detect presence of libsctp in normal Debian location +ifneq ($(wildcard $(PLATFORM_LIBDIR)/libsctp.a),) +LIBSCTP=-lsctp +CXXFLAGS=-DHAVE_SCTP +endif + +ifneq ($(LIBCAPRIGHTS),) +# Build local libcaprights.a (assuming ./configure +# has already been done in libcaprights/) +LOCAL_LIBS=$(LIBCAPRIGHTS) +LIBCAPRIGHTS_OBJS=libcaprights/capsicum.o libcaprights/linux-bpf-capmode.o libcaprights/procdesc.o libcaprights/signal.o +LOCAL_CLEAN=$(LOCAL_LIBS) $(LIBCAPRIGHTS_OBJS) +else +# Detect installed libcaprights static library. +ifneq ($(wildcard $(PLATFORM_LIBDIR)/libcaprights.a),) +LIBCAPRIGHTS=$(PLATFORM_LIBDIR)/libcaprights.a +else +ifneq ($(wildcard /usr/lib/libcaprights.a),) +LIBCAPRIGHTS=/usr/lib/libcaprights.a +endif +endif +endif + +endif + +# Extra test programs for arch-transition tests +EXTRA_PROGS = mini-me.32 mini-me.64 +ifneq ($(wildcard /usr/include/gnu/stubs-x32.h),) +EXTRA_PROGS += mini-me.x32 +endif + +# Chain on to the master makefile +include makefile + +./libcaprights.a: $(LIBCAPRIGHTS_OBJS) + ar cr $@ $^ + +# Small static programs of known architectures +# These may require additional packages to be installed; for example, for Debian: +# - libc6-dev-i386 provides 32-bit headers for a 64-bit system +# - libc6-dev-x32 provides headers for the x32 ABI. +mini-me.32: mini-me.c + $(CC) $(CFLAGS) -m32 -static -o $@ $< +mini-me.x32: mini-me.c + $(CC) $(CFLAGS) -mx32 -static -o $@ $< +mini-me.64: mini-me.c + $(CC) $(CFLAGS) -m64 -static -o $@ $< Index: head/contrib/capsicum-test/LICENSE =================================================================== --- head/contrib/capsicum-test/LICENSE +++ head/contrib/capsicum-test/LICENSE @@ -0,0 +1,26 @@ +Copyright (c) 2009-2011 Robert N. M. Watson +Copyright (c) 2011 Jonathan Anderson +Copyright (C) 2012 The Chromium OS Authors +Copyright (c) 2013-2014 Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +SUCH DAMAGE. Index: head/contrib/capsicum-test/README.md =================================================================== --- head/contrib/capsicum-test/README.md +++ head/contrib/capsicum-test/README.md @@ -0,0 +1,62 @@ +# Capsicum User Space Tests + +This directory holds unit tests for [Capsicum](http://www.cl.cam.ac.uk/research/security/capsicum/) +object-capabilities. The tests exercise the syscall interface to a Capsicum-enabled operating system, +currently either [FreeBSD >=10.x](http://www.freebsd.org) or a modified Linux kernel (the +[capsicum-linux](http://github.com/google/capsicum-linux) project). + +The tests are written in C++98, and use the [Google Test](https://code.google.com/p/googletest/) +framework, with some additions to fork off particular tests (because a process that enters capability +mode cannot leave it again). + +## Provenance + +The original basis for these tests was: + + - [unit tests](https://github.com/freebsd/freebsd/tree/master/tools/regression/security/cap_test) + written by Robert Watson and Jonathan Anderson for the original FreeBSD 9.x Capsicum implementation + - [unit tests](http://git.chromium.org/gitweb/?p=chromiumos/third_party/kernel-capsicum.git;a=tree;f=tools/testing/capsicum_tests;hb=refs/heads/capsicum) written by Meredydd Luff for the original Capsicum-Linux port. + +These tests were coalesced and moved into an independent repository to enable +comparative testing across multiple OSes, and then substantially extended. + +## OS Configuration + +### Linux + +The following kernel configuration options are needed to run the tests: + + - `CONFIG_SECURITY_CAPSICUM`: enable the Capsicum framework + - `CONFIG_PROCDESC`: enable Capsicum process-descriptor functionality + - `CONFIG_DEBUG_FS`: enable debug filesystem + - `CONFIG_IP_SCTP`: enable SCTP support + +### FreeBSD (>= 10.x) + +The following kernel configuration options are needed so that all tests can run: + + - `options P1003_1B_MQUEUE`: Enable POSIX message queues (or `kldload mqueuefs`) + +## Other Dependencies + +### Linux + +The following additional development packages are needed to build the full test suite on Linux. + + - `libcaprights`: See below + - `libcap-dev`: Provides headers for POSIX.1e capabilities. + - `libsctp1`: Provides SCTP library functions. + - `libsctp-dev`: Provides headers for SCTP library functions. + + +## Linux libcaprights + +The Capsicum userspace library is held in the `libcaprights/` subdirectory. Ideally, this +library should be built (with `./configure; make` or `dpkg-buildpackage -uc -us`) and +installed (with `make install` or `dpkg -i libcaprights*.deb`) so that the tests will +use behave like a normal Capsicum-aware application. + +However, if no installed copy of the library is found, the `GNUmakefile` will attempt +to use the local `libcaprights/*.c` source; this requires `./configure` to have been +performed in the `libcaprights` subdirectory. The local code is also used for +cross-compiled builds of the test suite (e.g. `make ARCH=32` or `make ARCH=x32`). Index: head/contrib/capsicum-test/capability-fd-pair.cc =================================================================== --- head/contrib/capsicum-test/capability-fd-pair.cc +++ head/contrib/capsicum-test/capability-fd-pair.cc @@ -0,0 +1,188 @@ +// Tests involving 2 capability file descriptors. +#include +#include +#include + +#include "capsicum.h" +#include "syscalls.h" +#include "capsicum-test.h" + +TEST(CapabilityPair, sendfile) { + int in_fd = open(TmpFile("cap_sendfile_in"), O_CREAT|O_RDWR, 0644); + EXPECT_OK(write(in_fd, "1234", 4)); + // Output fd for sendfile must be a stream socket in FreeBSD. + int sock_fds[2]; + EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, sock_fds)); + + cap_rights_t r_rs; + cap_rights_init(&r_rs, CAP_READ, CAP_SEEK); + cap_rights_t r_ws; + cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK); + + int cap_in_ro = dup(in_fd); + EXPECT_OK(cap_in_ro); + EXPECT_OK(cap_rights_limit(cap_in_ro, &r_rs)); + int cap_in_wo = dup(in_fd); + EXPECT_OK(cap_in_wo); + EXPECT_OK(cap_rights_limit(cap_in_wo, &r_ws)); + int cap_out_ro = dup(sock_fds[0]); + EXPECT_OK(cap_out_ro); + EXPECT_OK(cap_rights_limit(cap_out_ro, &r_rs)); + int cap_out_wo = dup(sock_fds[0]); + EXPECT_OK(cap_out_wo); + EXPECT_OK(cap_rights_limit(cap_out_wo, &r_ws)); + + off_t offset = 0; + EXPECT_NOTCAPABLE(sendfile_(cap_out_ro, cap_in_ro, &offset, 4)); + EXPECT_NOTCAPABLE(sendfile_(cap_out_wo, cap_in_wo, &offset, 4)); + EXPECT_OK(sendfile_(cap_out_wo, cap_in_ro, &offset, 4)); + + close(cap_in_ro); + close(cap_in_wo); + close(cap_out_ro); + close(cap_out_wo); + close(in_fd); + close(sock_fds[0]); + close(sock_fds[1]); + unlink(TmpFile("cap_sendfile_in")); +} + +#ifdef HAVE_TEE +TEST(CapabilityPair, tee) { + int pipe1_fds[2]; + EXPECT_OK(pipe2(pipe1_fds, O_NONBLOCK)); + int pipe2_fds[2]; + EXPECT_OK(pipe2(pipe2_fds, O_NONBLOCK)); + + // Put some data into pipe1. + unsigned char buffer[4] = {1, 2, 3, 4}; + EXPECT_OK(write(pipe1_fds[1], buffer, 4)); + + cap_rights_t r_ro; + cap_rights_init(&r_ro, CAP_READ); + cap_rights_t r_wo; + cap_rights_init(&r_wo, CAP_WRITE); + cap_rights_t r_rw; + cap_rights_init(&r_rw, CAP_READ, CAP_WRITE); + + // Various attempts to tee into pipe2. + int cap_in_wo = dup(pipe1_fds[0]); + EXPECT_OK(cap_in_wo); + EXPECT_OK(cap_rights_limit(cap_in_wo, &r_wo)); + int cap_in_rw = dup(pipe1_fds[0]); + EXPECT_OK(cap_in_rw); + EXPECT_OK(cap_rights_limit(cap_in_rw, &r_rw)); + int cap_out_ro = dup(pipe2_fds[1]); + EXPECT_OK(cap_out_ro); + EXPECT_OK(cap_rights_limit(cap_out_ro, &r_ro)); + int cap_out_rw = dup(pipe2_fds[1]); + EXPECT_OK(cap_out_rw); + EXPECT_OK(cap_rights_limit(cap_out_rw, &r_rw)); + + EXPECT_NOTCAPABLE(tee(cap_in_wo, cap_out_rw, 4, SPLICE_F_NONBLOCK)); + EXPECT_NOTCAPABLE(tee(cap_in_rw, cap_out_ro, 4, SPLICE_F_NONBLOCK)); + EXPECT_OK(tee(cap_in_rw, cap_out_rw, 4, SPLICE_F_NONBLOCK)); + + close(cap_in_wo); + close(cap_in_rw); + close(cap_out_ro); + close(cap_out_rw); + close(pipe1_fds[0]); + close(pipe1_fds[1]); + close(pipe2_fds[0]); + close(pipe2_fds[1]); +} +#endif + +#ifdef HAVE_SPLICE +TEST(CapabilityPair, splice) { + int pipe1_fds[2]; + EXPECT_OK(pipe2(pipe1_fds, O_NONBLOCK)); + int pipe2_fds[2]; + EXPECT_OK(pipe2(pipe2_fds, O_NONBLOCK)); + + // Put some data into pipe1. + unsigned char buffer[4] = {1, 2, 3, 4}; + EXPECT_OK(write(pipe1_fds[1], buffer, 4)); + + cap_rights_t r_ro; + cap_rights_init(&r_ro, CAP_READ); + cap_rights_t r_wo; + cap_rights_init(&r_wo, CAP_WRITE); + cap_rights_t r_rs; + cap_rights_init(&r_rs, CAP_READ, CAP_SEEK); + cap_rights_t r_ws; + cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK); + + // Various attempts to splice. + int cap_in_wo = dup(pipe1_fds[0]); + EXPECT_OK(cap_in_wo); + EXPECT_OK(cap_rights_limit(cap_in_wo, &r_wo)); + int cap_in_ro = dup(pipe1_fds[0]); + EXPECT_OK(cap_in_ro); + EXPECT_OK(cap_rights_limit(cap_in_ro, &r_ro)); + int cap_in_ro_seek = dup(pipe1_fds[0]); + EXPECT_OK(cap_in_ro_seek); + EXPECT_OK(cap_rights_limit(cap_in_ro_seek, &r_rs)); + int cap_out_wo = dup(pipe2_fds[1]); + EXPECT_OK(cap_out_wo); + EXPECT_OK(cap_rights_limit(cap_out_wo, &r_wo)); + int cap_out_ro = dup(pipe2_fds[1]); + EXPECT_OK(cap_out_ro); + EXPECT_OK(cap_rights_limit(cap_out_ro, &r_ro)); + int cap_out_wo_seek = dup(pipe2_fds[1]); + EXPECT_OK(cap_out_wo_seek); + EXPECT_OK(cap_rights_limit(cap_out_wo_seek, &r_ws)); + + EXPECT_NOTCAPABLE(splice(cap_in_ro, NULL, cap_out_wo_seek, NULL, 4, SPLICE_F_NONBLOCK)); + EXPECT_NOTCAPABLE(splice(cap_in_wo, NULL, cap_out_wo_seek, NULL, 4, SPLICE_F_NONBLOCK)); + EXPECT_NOTCAPABLE(splice(cap_in_ro_seek, NULL, cap_out_ro, NULL, 4, SPLICE_F_NONBLOCK)); + EXPECT_NOTCAPABLE(splice(cap_in_ro_seek, NULL, cap_out_wo, NULL, 4, SPLICE_F_NONBLOCK)); + EXPECT_OK(splice(cap_in_ro_seek, NULL, cap_out_wo_seek, NULL, 4, SPLICE_F_NONBLOCK)); + + close(cap_in_wo); + close(cap_in_ro); + close(cap_in_ro_seek); + close(cap_out_wo); + close(cap_out_ro); + close(cap_out_wo_seek); + close(pipe1_fds[0]); + close(pipe1_fds[1]); + close(pipe2_fds[0]); + close(pipe2_fds[1]); +} +#endif + +#ifdef HAVE_VMSPLICE +// Although it only involves a single file descriptor, test vmsplice(2) here too. +TEST(CapabilityPair, vmsplice) { + int pipe_fds[2]; + EXPECT_OK(pipe2(pipe_fds, O_NONBLOCK)); + + cap_rights_t r_ro; + cap_rights_init(&r_ro, CAP_READ); + cap_rights_t r_rw; + cap_rights_init(&r_rw, CAP_READ, CAP_WRITE); + + int cap_ro = dup(pipe_fds[1]); + EXPECT_OK(cap_ro); + EXPECT_OK(cap_rights_limit(cap_ro, &r_ro)); + int cap_rw = dup(pipe_fds[1]); + EXPECT_OK(cap_rw); + EXPECT_OK(cap_rights_limit(cap_rw, &r_rw)); + + unsigned char buffer[4] = {1, 2, 3, 4}; + struct iovec iov; + memset(&iov, 0, sizeof(iov)); + iov.iov_base = buffer; + iov.iov_len = sizeof(buffer); + + EXPECT_NOTCAPABLE(vmsplice(cap_ro, &iov, 1, SPLICE_F_NONBLOCK)); + EXPECT_OK(vmsplice(cap_rw, &iov, 1, SPLICE_F_NONBLOCK)); + + close(cap_ro); + close(cap_rw); + close(pipe_fds[0]); + close(pipe_fds[1]); +} +#endif Index: head/contrib/capsicum-test/capability-fd.cc =================================================================== --- head/contrib/capsicum-test/capability-fd.cc +++ head/contrib/capsicum-test/capability-fd.cc @@ -0,0 +1,1309 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "capsicum.h" +#include "syscalls.h" +#include "capsicum-test.h" + +/* Utilities for printing rights information */ +/* Written in C style to allow for: */ +/* TODO(drysdale): migrate these to somewhere in libcaprights/ */ +#define RIGHTS_INFO(RR) { (RR), #RR} +typedef struct { + uint64_t right; + const char* name; +} right_info; +static right_info known_rights[] = { + /* Rights that are common to all versions of Capsicum */ + RIGHTS_INFO(CAP_READ), + RIGHTS_INFO(CAP_WRITE), + RIGHTS_INFO(CAP_SEEK_TELL), + RIGHTS_INFO(CAP_SEEK), + RIGHTS_INFO(CAP_PREAD), + RIGHTS_INFO(CAP_PWRITE), + RIGHTS_INFO(CAP_MMAP), + RIGHTS_INFO(CAP_MMAP_R), + RIGHTS_INFO(CAP_MMAP_W), + RIGHTS_INFO(CAP_MMAP_X), + RIGHTS_INFO(CAP_MMAP_RW), + RIGHTS_INFO(CAP_MMAP_RX), + RIGHTS_INFO(CAP_MMAP_WX), + RIGHTS_INFO(CAP_MMAP_RWX), + RIGHTS_INFO(CAP_CREATE), + RIGHTS_INFO(CAP_FEXECVE), + RIGHTS_INFO(CAP_FSYNC), + RIGHTS_INFO(CAP_FTRUNCATE), + RIGHTS_INFO(CAP_LOOKUP), + RIGHTS_INFO(CAP_FCHDIR), + RIGHTS_INFO(CAP_FCHFLAGS), + RIGHTS_INFO(CAP_CHFLAGSAT), + RIGHTS_INFO(CAP_FCHMOD), + RIGHTS_INFO(CAP_FCHMODAT), + RIGHTS_INFO(CAP_FCHOWN), + RIGHTS_INFO(CAP_FCHOWNAT), + RIGHTS_INFO(CAP_FCNTL), + RIGHTS_INFO(CAP_FLOCK), + RIGHTS_INFO(CAP_FPATHCONF), + RIGHTS_INFO(CAP_FSCK), + RIGHTS_INFO(CAP_FSTAT), + RIGHTS_INFO(CAP_FSTATAT), + RIGHTS_INFO(CAP_FSTATFS), + RIGHTS_INFO(CAP_FUTIMES), + RIGHTS_INFO(CAP_FUTIMESAT), + RIGHTS_INFO(CAP_MKDIRAT), + RIGHTS_INFO(CAP_MKFIFOAT), + RIGHTS_INFO(CAP_MKNODAT), + RIGHTS_INFO(CAP_RENAMEAT_SOURCE), + RIGHTS_INFO(CAP_SYMLINKAT), + RIGHTS_INFO(CAP_UNLINKAT), + RIGHTS_INFO(CAP_ACCEPT), + RIGHTS_INFO(CAP_BIND), + RIGHTS_INFO(CAP_CONNECT), + RIGHTS_INFO(CAP_GETPEERNAME), + RIGHTS_INFO(CAP_GETSOCKNAME), + RIGHTS_INFO(CAP_GETSOCKOPT), + RIGHTS_INFO(CAP_LISTEN), + RIGHTS_INFO(CAP_PEELOFF), + RIGHTS_INFO(CAP_RECV), + RIGHTS_INFO(CAP_SEND), + RIGHTS_INFO(CAP_SETSOCKOPT), + RIGHTS_INFO(CAP_SHUTDOWN), + RIGHTS_INFO(CAP_BINDAT), + RIGHTS_INFO(CAP_CONNECTAT), + RIGHTS_INFO(CAP_LINKAT_SOURCE), + RIGHTS_INFO(CAP_RENAMEAT_TARGET), + RIGHTS_INFO(CAP_SOCK_CLIENT), + RIGHTS_INFO(CAP_SOCK_SERVER), + RIGHTS_INFO(CAP_MAC_GET), + RIGHTS_INFO(CAP_MAC_SET), + RIGHTS_INFO(CAP_SEM_GETVALUE), + RIGHTS_INFO(CAP_SEM_POST), + RIGHTS_INFO(CAP_SEM_WAIT), + RIGHTS_INFO(CAP_EVENT), + RIGHTS_INFO(CAP_KQUEUE_EVENT), + RIGHTS_INFO(CAP_IOCTL), + RIGHTS_INFO(CAP_TTYHOOK), + RIGHTS_INFO(CAP_PDWAIT), + RIGHTS_INFO(CAP_PDGETPID), + RIGHTS_INFO(CAP_PDKILL), + RIGHTS_INFO(CAP_EXTATTR_DELETE), + RIGHTS_INFO(CAP_EXTATTR_GET), + RIGHTS_INFO(CAP_EXTATTR_LIST), + RIGHTS_INFO(CAP_EXTATTR_SET), + RIGHTS_INFO(CAP_ACL_CHECK), + RIGHTS_INFO(CAP_ACL_DELETE), + RIGHTS_INFO(CAP_ACL_GET), + RIGHTS_INFO(CAP_ACL_SET), + RIGHTS_INFO(CAP_KQUEUE_CHANGE), + RIGHTS_INFO(CAP_KQUEUE), + /* Rights that are only present in some version or some OS, and so are #ifdef'ed */ + /* LINKAT got split */ +#ifdef CAP_LINKAT + RIGHTS_INFO(CAP_LINKAT), +#endif +#ifdef CAP_LINKAT_SOURCE + RIGHTS_INFO(CAP_LINKAT_SOURCE), +#endif +#ifdef CAP_LINKAT_TARGET + RIGHTS_INFO(CAP_LINKAT_TARGET), +#endif + /* Linux aliased some FD operations for pdgetpid/pdkill */ +#ifdef CAP_PDGETPID_FREEBSD + RIGHTS_INFO(CAP_PDGETPID_FREEBSD), +#endif +#ifdef CAP_PDKILL_FREEBSD + RIGHTS_INFO(CAP_PDKILL_FREEBSD), +#endif + /* Linux-specific rights */ +#ifdef CAP_FSIGNAL + RIGHTS_INFO(CAP_FSIGNAL), +#endif +#ifdef CAP_EPOLL_CTL + RIGHTS_INFO(CAP_EPOLL_CTL), +#endif +#ifdef CAP_NOTIFY + RIGHTS_INFO(CAP_NOTIFY), +#endif +#ifdef CAP_SETNS + RIGHTS_INFO(CAP_SETNS), +#endif +#ifdef CAP_PERFMON + RIGHTS_INFO(CAP_PERFMON), +#endif +#ifdef CAP_BPF + RIGHTS_INFO(CAP_BPF), +#endif + /* Rights in later versions of FreeBSD (>10.0) */ +}; + +void ShowCapRights(FILE *out, int fd) { + size_t ii; + bool first = true; + cap_rights_t rights; + CAP_SET_NONE(&rights); + if (cap_rights_get(fd, &rights) < 0) { + fprintf(out, "Failed to get rights for fd %d: errno %d\n", fd, errno); + return; + } + + /* First print out all known rights */ + size_t num_known = (sizeof(known_rights)/sizeof(known_rights[0])); + for (ii = 0; ii < num_known; ii++) { + if (cap_rights_is_set(&rights, known_rights[ii].right)) { + if (!first) fprintf(out, ","); + first = false; + fprintf(out, "%s", known_rights[ii].name); + } + } + /* Now repeat the loop, clearing rights we know of; this needs to be + * a separate loop because some named rights overlap. + */ + for (ii = 0; ii < num_known; ii++) { + cap_rights_clear(&rights, known_rights[ii].right); + } + /* The following relies on the internal structure of cap_rights_t to + * try to show rights we don't know about. */ + for (ii = 0; ii < (size_t)CAPARSIZE(&rights); ii++) { + uint64_t bits = (rights.cr_rights[0] & 0x01ffffffffffffffULL); + if (bits != 0) { + uint64_t which = 1; + for (which = 1; which < 0x0200000000000000 ; which <<= 1) { + if (bits & which) { + if (!first) fprintf(out, ","); + fprintf(out, "CAP_RIGHT(%d, 0x%016llxULL)", (int)ii, (long long unsigned)which); + } + } + } + } + fprintf(out, "\n"); +} + +void ShowAllCapRights(FILE *out) { + int fd; + struct rlimit limits; + if (getrlimit(RLIMIT_NOFILE, &limits) != 0) { + fprintf(out, "Failed to getrlimit for max FDs: errno %d\n", errno); + return; + } + for (fd = 0; fd < (int)limits.rlim_cur; fd++) { + if (fcntl(fd, F_GETFD, 0) != 0) { + continue; + } + fprintf(out, "fd %d: ", fd); + ShowCapRights(out, fd); + } +} + +FORK_TEST(Capability, CapNew) { + cap_rights_t r_rws; + cap_rights_init(&r_rws, CAP_READ, CAP_WRITE, CAP_SEEK); + cap_rights_t r_all; + CAP_SET_ALL(&r_all); + + int cap_fd = dup(STDOUT_FILENO); + cap_rights_t rights; + CAP_SET_NONE(&rights); + EXPECT_OK(cap_rights_get(cap_fd, &rights)); + EXPECT_RIGHTS_EQ(&r_all, &rights); + + EXPECT_OK(cap_fd); + EXPECT_OK(cap_rights_limit(cap_fd, &r_rws)); + if (cap_fd < 0) return; + int rc = write(cap_fd, "OK!\n", 4); + EXPECT_OK(rc); + EXPECT_EQ(4, rc); + EXPECT_OK(cap_rights_get(cap_fd, &rights)); + EXPECT_RIGHTS_EQ(&r_rws, &rights); + + // dup/dup2 should preserve rights. + int cap_dup = dup(cap_fd); + EXPECT_OK(cap_dup); + EXPECT_OK(cap_rights_get(cap_dup, &rights)); + EXPECT_RIGHTS_EQ(&r_rws, &rights); + close(cap_dup); + EXPECT_OK(dup2(cap_fd, cap_dup)); + EXPECT_OK(cap_rights_get(cap_dup, &rights)); + EXPECT_RIGHTS_EQ(&r_rws, &rights); + close(cap_dup); +#ifdef HAVE_DUP3 + EXPECT_OK(dup3(cap_fd, cap_dup, 0)); + EXPECT_OK(cap_rights_get(cap_dup, &rights)); + EXPECT_RIGHTS_EQ(&r_rws, &rights); + close(cap_dup); +#endif + + // Try to get a disjoint set of rights in a sub-capability. + cap_rights_t r_rs; + cap_rights_init(&r_rs, CAP_READ, CAP_SEEK); + cap_rights_t r_rsmapchmod; + cap_rights_init(&r_rsmapchmod, CAP_READ, CAP_SEEK, CAP_MMAP, CAP_FCHMOD); + int cap_cap_fd = dup(cap_fd); + EXPECT_OK(cap_cap_fd); + EXPECT_NOTCAPABLE(cap_rights_limit(cap_cap_fd, &r_rsmapchmod)); + + // Dump rights info to stderr (mostly to ensure that Show[All]CapRights() + // is working. + ShowAllCapRights(stderr); + + EXPECT_OK(close(cap_fd)); +} + +FORK_TEST(Capability, CapEnter) { + EXPECT_EQ(0, cap_enter()); +} + +FORK_TEST(Capability, BasicInterception) { + cap_rights_t r_0; + cap_rights_init(&r_0, 0); + int cap_fd = dup(1); + EXPECT_OK(cap_fd); + EXPECT_OK(cap_rights_limit(cap_fd, &r_0)); + + EXPECT_NOTCAPABLE(write(cap_fd, "", 0)); + + EXPECT_OK(cap_enter()); // Enter capability mode + + EXPECT_NOTCAPABLE(write(cap_fd, "", 0)); + + // Create a new capability which does have write permission + cap_rights_t r_ws; + cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK); + int cap_fd2 = dup(1); + EXPECT_OK(cap_fd2); + EXPECT_OK(cap_rights_limit(cap_fd2, &r_ws)); + EXPECT_OK(write(cap_fd2, "", 0)); + + // Tidy up. + if (cap_fd >= 0) close(cap_fd); + if (cap_fd2 >= 0) close(cap_fd2); +} + +FORK_TEST_ON(Capability, OpenAtDirectoryTraversal, TmpFile("cap_openat_testfile")) { + int dir = open(tmpdir.c_str(), O_RDONLY); + EXPECT_OK(dir); + + cap_enter(); + + int file = openat(dir, "cap_openat_testfile", O_RDONLY|O_CREAT, 0644); + EXPECT_OK(file); + + // Test that we are confined to /tmp, and cannot + // escape using absolute paths or ../. + int new_file = openat(dir, "../dev/null", O_RDONLY); + EXPECT_EQ(-1, new_file); + + new_file = openat(dir, "..", O_RDONLY); + EXPECT_EQ(-1, new_file); + + new_file = openat(dir, "/dev/null", O_RDONLY); + EXPECT_EQ(-1, new_file); + + new_file = openat(dir, "/", O_RDONLY); + EXPECT_EQ(-1, new_file); + + // Tidy up. + close(file); + close(dir); +} + +FORK_TEST_ON(Capability, FileInSync, TmpFile("cap_file_sync")) { + int fd = open(TmpFile("cap_file_sync"), O_RDWR|O_CREAT, 0644); + EXPECT_OK(fd); + const char* message = "Hello capability world"; + EXPECT_OK(write(fd, message, strlen(message))); + + cap_rights_t r_rsstat; + cap_rights_init(&r_rsstat, CAP_READ, CAP_SEEK, CAP_FSTAT); + + int cap_fd = dup(fd); + EXPECT_OK(cap_fd); + EXPECT_OK(cap_rights_limit(cap_fd, &r_rsstat)); + int cap_cap_fd = dup(cap_fd); + EXPECT_OK(cap_cap_fd); + EXPECT_OK(cap_rights_limit(cap_cap_fd, &r_rsstat)); + + EXPECT_OK(cap_enter()); // Enter capability mode. + + // Changes to one file descriptor affect the others. + EXPECT_EQ(1, lseek(fd, 1, SEEK_SET)); + EXPECT_EQ(1, lseek(fd, 0, SEEK_CUR)); + EXPECT_EQ(1, lseek(cap_fd, 0, SEEK_CUR)); + EXPECT_EQ(1, lseek(cap_cap_fd, 0, SEEK_CUR)); + EXPECT_EQ(3, lseek(cap_fd, 3, SEEK_SET)); + EXPECT_EQ(3, lseek(fd, 0, SEEK_CUR)); + EXPECT_EQ(3, lseek(cap_fd, 0, SEEK_CUR)); + EXPECT_EQ(3, lseek(cap_cap_fd, 0, SEEK_CUR)); + EXPECT_EQ(5, lseek(cap_cap_fd, 5, SEEK_SET)); + EXPECT_EQ(5, lseek(fd, 0, SEEK_CUR)); + EXPECT_EQ(5, lseek(cap_fd, 0, SEEK_CUR)); + EXPECT_EQ(5, lseek(cap_cap_fd, 0, SEEK_CUR)); + + close(cap_cap_fd); + close(cap_fd); + close(fd); +} + +// Create a capability on /tmp that does not allow CAP_WRITE, +// and check that this restriction is inherited through openat(). +FORK_TEST_ON(Capability, Inheritance, TmpFile("cap_openat_write_testfile")) { + int dir = open(tmpdir.c_str(), O_RDONLY); + EXPECT_OK(dir); + + cap_rights_t r_rl; + cap_rights_init(&r_rl, CAP_READ, CAP_LOOKUP); + + int cap_dir = dup(dir); + EXPECT_OK(cap_dir); + EXPECT_OK(cap_rights_limit(cap_dir, &r_rl)); + + const char *filename = "cap_openat_write_testfile"; + int file = openat(dir, filename, O_WRONLY|O_CREAT, 0644); + EXPECT_OK(file); + EXPECT_EQ(5, write(file, "TEST\n", 5)); + if (file >= 0) close(file); + + EXPECT_OK(cap_enter()); + file = openat(cap_dir, filename, O_RDONLY); + EXPECT_OK(file); + + cap_rights_t rights; + cap_rights_init(&rights, 0); + EXPECT_OK(cap_rights_get(file, &rights)); + EXPECT_RIGHTS_EQ(&r_rl, &rights); + if (file >= 0) close(file); + + file = openat(cap_dir, filename, O_WRONLY|O_APPEND); + EXPECT_NOTCAPABLE(file); + if (file > 0) close(file); + + if (dir > 0) close(dir); + if (cap_dir > 0) close(cap_dir); +} + + +// Ensure that, if the capability had enough rights for the system call to +// pass, then it did. Otherwise, ensure that the errno is ENOTCAPABLE; +// capability restrictions should kick in before any other error logic. +#define CHECK_RIGHT_RESULT(result, rights, ...) do { \ + cap_rights_t rights_needed; \ + cap_rights_init(&rights_needed, __VA_ARGS__); \ + if (cap_rights_contains(&rights, &rights_needed)) { \ + EXPECT_OK(result) << std::endl \ + << " need: " << rights_needed \ + << std::endl \ + << " got: " << rights; \ + } else { \ + EXPECT_EQ(-1, result) << " need: " << rights_needed \ + << std::endl \ + << " got: "<< rights; \ + EXPECT_EQ(ENOTCAPABLE, errno); \ + } \ +} while (0) + +#define EXPECT_MMAP_NOTCAPABLE(result) do { \ + void *rv = result; \ + EXPECT_EQ(MAP_FAILED, rv); \ + EXPECT_EQ(ENOTCAPABLE, errno); \ + if (rv != MAP_FAILED) munmap(rv, getpagesize()); \ +} while (0) + +#define EXPECT_MMAP_OK(result) do { \ + void *rv = result; \ + EXPECT_NE(MAP_FAILED, rv) << " with errno " << errno; \ + if (rv != MAP_FAILED) munmap(rv, getpagesize()); \ +} while (0) + + +// As above, but for the special mmap() case: unmap after successful mmap(). +#define CHECK_RIGHT_MMAP_RESULT(result, rights, ...) do { \ + cap_rights_t rights_needed; \ + cap_rights_init(&rights_needed, __VA_ARGS__); \ + if (cap_rights_contains(&rights, &rights_needed)) { \ + EXPECT_MMAP_OK(result); \ + } else { \ + EXPECT_MMAP_NOTCAPABLE(result); \ + } \ +} while (0) + +FORK_TEST_ON(Capability, Mmap, TmpFile("cap_mmap_operations")) { + int fd = open(TmpFile("cap_mmap_operations"), O_RDWR | O_CREAT, 0644); + EXPECT_OK(fd); + if (fd < 0) return; + + cap_rights_t r_0; + cap_rights_init(&r_0, 0); + cap_rights_t r_mmap; + cap_rights_init(&r_mmap, CAP_MMAP); + cap_rights_t r_r; + cap_rights_init(&r_r, CAP_PREAD); + cap_rights_t r_rmmap; + cap_rights_init(&r_rmmap, CAP_PREAD, CAP_MMAP); + + // If we're missing a capability, it will fail. + int cap_none = dup(fd); + EXPECT_OK(cap_none); + EXPECT_OK(cap_rights_limit(cap_none, &r_0)); + int cap_mmap = dup(fd); + EXPECT_OK(cap_mmap); + EXPECT_OK(cap_rights_limit(cap_mmap, &r_mmap)); + int cap_read = dup(fd); + EXPECT_OK(cap_read); + EXPECT_OK(cap_rights_limit(cap_read, &r_r)); + int cap_both = dup(fd); + EXPECT_OK(cap_both); + EXPECT_OK(cap_rights_limit(cap_both, &r_rmmap)); + + EXPECT_OK(cap_enter()); // Enter capability mode. + + EXPECT_MMAP_NOTCAPABLE(mmap(NULL, getpagesize(), PROT_READ, MAP_PRIVATE, cap_none, 0)); + EXPECT_MMAP_NOTCAPABLE(mmap(NULL, getpagesize(), PROT_READ, MAP_PRIVATE, cap_mmap, 0)); + EXPECT_MMAP_NOTCAPABLE(mmap(NULL, getpagesize(), PROT_READ, MAP_PRIVATE, cap_read, 0)); + + EXPECT_MMAP_OK(mmap(NULL, getpagesize(), PROT_READ, MAP_PRIVATE, cap_both, 0)); + + // A call with MAP_ANONYMOUS should succeed without any capability requirements. + EXPECT_MMAP_OK(mmap(NULL, getpagesize(), PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0)); + + EXPECT_OK(close(cap_both)); + EXPECT_OK(close(cap_read)); + EXPECT_OK(close(cap_mmap)); + EXPECT_OK(close(cap_none)); + EXPECT_OK(close(fd)); +} + +// Given a file descriptor, create a capability with specific rights and +// make sure only those rights work. +#define TRY_FILE_OPS(fd, ...) do { \ + cap_rights_t rights; \ + cap_rights_init(&rights, __VA_ARGS__); \ + TryFileOps((fd), rights); \ +} while (0) + +static void TryFileOps(int fd, cap_rights_t rights) { + int cap_fd = dup(fd); + EXPECT_OK(cap_fd); + EXPECT_OK(cap_rights_limit(cap_fd, &rights)); + if (cap_fd < 0) return; + cap_rights_t erights; + EXPECT_OK(cap_rights_get(cap_fd, &erights)); + EXPECT_RIGHTS_EQ(&rights, &erights); + + // Check creation of a capability from a capability. + int cap_cap_fd = dup(cap_fd); + EXPECT_OK(cap_cap_fd); + EXPECT_OK(cap_rights_limit(cap_cap_fd, &rights)); + EXPECT_NE(cap_fd, cap_cap_fd); + EXPECT_OK(cap_rights_get(cap_cap_fd, &erights)); + EXPECT_RIGHTS_EQ(&rights, &erights); + close(cap_cap_fd); + + char ch; + CHECK_RIGHT_RESULT(read(cap_fd, &ch, sizeof(ch)), rights, CAP_READ, CAP_SEEK_ASWAS); + + ssize_t len1 = pread(cap_fd, &ch, sizeof(ch), 0); + CHECK_RIGHT_RESULT(len1, rights, CAP_PREAD); + ssize_t len2 = pread(cap_fd, &ch, sizeof(ch), 0); + CHECK_RIGHT_RESULT(len2, rights, CAP_PREAD); + EXPECT_EQ(len1, len2); + + CHECK_RIGHT_RESULT(write(cap_fd, &ch, sizeof(ch)), rights, CAP_WRITE, CAP_SEEK_ASWAS); + CHECK_RIGHT_RESULT(pwrite(cap_fd, &ch, sizeof(ch), 0), rights, CAP_PWRITE); + CHECK_RIGHT_RESULT(lseek(cap_fd, 0, SEEK_SET), rights, CAP_SEEK); + +#ifdef HAVE_CHFLAGS + // Note: this is not expected to work over NFS. + struct statfs sf; + EXPECT_OK(fstatfs(fd, &sf)); + bool is_nfs = (strncmp("nfs", sf.f_fstypename, sizeof(sf.f_fstypename)) == 0); + if (!is_nfs) { + CHECK_RIGHT_RESULT(fchflags(cap_fd, UF_NODUMP), rights, CAP_FCHFLAGS); + } +#endif + + CHECK_RIGHT_MMAP_RESULT(mmap(NULL, getpagesize(), PROT_NONE, MAP_SHARED, cap_fd, 0), + rights, CAP_MMAP); + CHECK_RIGHT_MMAP_RESULT(mmap(NULL, getpagesize(), PROT_READ, MAP_SHARED, cap_fd, 0), + rights, CAP_MMAP_R); + CHECK_RIGHT_MMAP_RESULT(mmap(NULL, getpagesize(), PROT_WRITE, MAP_SHARED, cap_fd, 0), + rights, CAP_MMAP_W); + CHECK_RIGHT_MMAP_RESULT(mmap(NULL, getpagesize(), PROT_EXEC, MAP_SHARED, cap_fd, 0), + rights, CAP_MMAP_X); + CHECK_RIGHT_MMAP_RESULT(mmap(NULL, getpagesize(), PROT_READ | PROT_WRITE, MAP_SHARED, cap_fd, 0), + rights, CAP_MMAP_RW); + CHECK_RIGHT_MMAP_RESULT(mmap(NULL, getpagesize(), PROT_READ | PROT_EXEC, MAP_SHARED, cap_fd, 0), + rights, CAP_MMAP_RX); + CHECK_RIGHT_MMAP_RESULT(mmap(NULL, getpagesize(), PROT_EXEC | PROT_WRITE, MAP_SHARED, cap_fd, 0), + rights, CAP_MMAP_WX); + CHECK_RIGHT_MMAP_RESULT(mmap(NULL, getpagesize(), PROT_READ | PROT_WRITE | PROT_EXEC, MAP_SHARED, cap_fd, 0), + rights, CAP_MMAP_RWX); + + CHECK_RIGHT_RESULT(fsync(cap_fd), rights, CAP_FSYNC); +#ifdef HAVE_SYNC_FILE_RANGE + CHECK_RIGHT_RESULT(sync_file_range(cap_fd, 0, 1, 0), rights, CAP_FSYNC, CAP_SEEK); +#endif + + int rc = fcntl(cap_fd, F_GETFL); + CHECK_RIGHT_RESULT(rc, rights, CAP_FCNTL); + rc = fcntl(cap_fd, F_SETFL, rc); + CHECK_RIGHT_RESULT(rc, rights, CAP_FCNTL); + + CHECK_RIGHT_RESULT(fchown(cap_fd, -1, -1), rights, CAP_FCHOWN); + + CHECK_RIGHT_RESULT(fchmod(cap_fd, 0644), rights, CAP_FCHMOD); + + CHECK_RIGHT_RESULT(flock(cap_fd, LOCK_SH), rights, CAP_FLOCK); + CHECK_RIGHT_RESULT(flock(cap_fd, LOCK_UN), rights, CAP_FLOCK); + + CHECK_RIGHT_RESULT(ftruncate(cap_fd, 0), rights, CAP_FTRUNCATE); + + struct stat sb; + CHECK_RIGHT_RESULT(fstat(cap_fd, &sb), rights, CAP_FSTAT); + + struct statfs cap_sf; + CHECK_RIGHT_RESULT(fstatfs(cap_fd, &cap_sf), rights, CAP_FSTATFS); + +#ifdef HAVE_FPATHCONF + CHECK_RIGHT_RESULT(fpathconf(cap_fd, _PC_NAME_MAX), rights, CAP_FPATHCONF); +#endif + + CHECK_RIGHT_RESULT(futimes(cap_fd, NULL), rights, CAP_FUTIMES); + + struct pollfd pollfd; + pollfd.fd = cap_fd; + pollfd.events = POLLIN | POLLERR | POLLHUP; + pollfd.revents = 0; + int ret = poll(&pollfd, 1, 0); + if (cap_rights_is_set(&rights, CAP_EVENT)) { + EXPECT_OK(ret); + } else { + EXPECT_NE(0, (pollfd.revents & POLLNVAL)); + } + + struct timeval tv; + tv.tv_sec = 0; + tv.tv_usec = 100; + fd_set rset; + FD_ZERO(&rset); + FD_SET(cap_fd, &rset); + fd_set wset; + FD_ZERO(&wset); + FD_SET(cap_fd, &wset); + ret = select(cap_fd+1, &rset, &wset, NULL, &tv); + if (cap_rights_is_set(&rights, CAP_EVENT)) { + EXPECT_OK(ret); + } else { + EXPECT_NOTCAPABLE(ret); + } + + // TODO(FreeBSD): kqueue + + EXPECT_OK(close(cap_fd)); +} + +FORK_TEST_ON(Capability, Operations, TmpFile("cap_fd_operations")) { + int fd = open(TmpFile("cap_fd_operations"), O_RDWR | O_CREAT, 0644); + EXPECT_OK(fd); + if (fd < 0) return; + + EXPECT_OK(cap_enter()); // Enter capability mode. + + // Try a variety of different combinations of rights - a full + // enumeration is too large (2^N with N~30+) to perform. + TRY_FILE_OPS(fd, CAP_READ); + TRY_FILE_OPS(fd, CAP_PREAD); + TRY_FILE_OPS(fd, CAP_WRITE); + TRY_FILE_OPS(fd, CAP_PWRITE); + TRY_FILE_OPS(fd, CAP_READ, CAP_WRITE); + TRY_FILE_OPS(fd, CAP_PREAD, CAP_PWRITE); + TRY_FILE_OPS(fd, CAP_SEEK); + TRY_FILE_OPS(fd, CAP_FCHFLAGS); + TRY_FILE_OPS(fd, CAP_IOCTL); + TRY_FILE_OPS(fd, CAP_FSTAT); + TRY_FILE_OPS(fd, CAP_MMAP); + TRY_FILE_OPS(fd, CAP_MMAP_R); + TRY_FILE_OPS(fd, CAP_MMAP_W); + TRY_FILE_OPS(fd, CAP_MMAP_X); + TRY_FILE_OPS(fd, CAP_MMAP_RW); + TRY_FILE_OPS(fd, CAP_MMAP_RX); + TRY_FILE_OPS(fd, CAP_MMAP_WX); + TRY_FILE_OPS(fd, CAP_MMAP_RWX); + TRY_FILE_OPS(fd, CAP_FCNTL); + TRY_FILE_OPS(fd, CAP_EVENT); + TRY_FILE_OPS(fd, CAP_FSYNC); + TRY_FILE_OPS(fd, CAP_FCHOWN); + TRY_FILE_OPS(fd, CAP_FCHMOD); + TRY_FILE_OPS(fd, CAP_FTRUNCATE); + TRY_FILE_OPS(fd, CAP_FLOCK); + TRY_FILE_OPS(fd, CAP_FSTATFS); + TRY_FILE_OPS(fd, CAP_FPATHCONF); + TRY_FILE_OPS(fd, CAP_FUTIMES); + TRY_FILE_OPS(fd, CAP_ACL_GET); + TRY_FILE_OPS(fd, CAP_ACL_SET); + TRY_FILE_OPS(fd, CAP_ACL_DELETE); + TRY_FILE_OPS(fd, CAP_ACL_CHECK); + TRY_FILE_OPS(fd, CAP_EXTATTR_GET); + TRY_FILE_OPS(fd, CAP_EXTATTR_SET); + TRY_FILE_OPS(fd, CAP_EXTATTR_DELETE); + TRY_FILE_OPS(fd, CAP_EXTATTR_LIST); + TRY_FILE_OPS(fd, CAP_MAC_GET); + TRY_FILE_OPS(fd, CAP_MAC_SET); + + // Socket-specific. + TRY_FILE_OPS(fd, CAP_GETPEERNAME); + TRY_FILE_OPS(fd, CAP_GETSOCKNAME); + TRY_FILE_OPS(fd, CAP_ACCEPT); + + close(fd); +} + +#define TRY_DIR_OPS(dfd, ...) do { \ + cap_rights_t rights; \ + cap_rights_init(&rights, __VA_ARGS__); \ + TryDirOps((dfd), rights); \ +} while (0) + +static void TryDirOps(int dirfd, cap_rights_t rights) { + cap_rights_t erights; + int dfd_cap = dup(dirfd); + EXPECT_OK(dfd_cap); + EXPECT_OK(cap_rights_limit(dfd_cap, &rights)); + EXPECT_OK(cap_rights_get(dfd_cap, &erights)); + EXPECT_RIGHTS_EQ(&rights, &erights); + + int rc = openat(dfd_cap, "cap_create", O_CREAT | O_RDONLY, 0600); + CHECK_RIGHT_RESULT(rc, rights, CAP_CREATE, CAP_READ, CAP_LOOKUP); + if (rc >= 0) { + EXPECT_OK(close(rc)); + EXPECT_OK(unlinkat(dirfd, "cap_create", 0)); + } + rc = openat(dfd_cap, "cap_create", O_CREAT | O_WRONLY | O_APPEND, 0600); + CHECK_RIGHT_RESULT(rc, rights, CAP_CREATE, CAP_WRITE, CAP_LOOKUP); + if (rc >= 0) { + EXPECT_OK(close(rc)); + EXPECT_OK(unlinkat(dirfd, "cap_create", 0)); + } + rc = openat(dfd_cap, "cap_create", O_CREAT | O_RDWR | O_APPEND, 0600); + CHECK_RIGHT_RESULT(rc, rights, CAP_CREATE, CAP_READ, CAP_WRITE, CAP_LOOKUP); + if (rc >= 0) { + EXPECT_OK(close(rc)); + EXPECT_OK(unlinkat(dirfd, "cap_create", 0)); + } + + rc = openat(dirfd, "cap_faccess", O_CREAT, 0600); + EXPECT_OK(rc); + EXPECT_OK(close(rc)); + rc = faccessat(dfd_cap, "cap_faccess", F_OK, 0); + CHECK_RIGHT_RESULT(rc, rights, CAP_FSTAT, CAP_LOOKUP); + EXPECT_OK(unlinkat(dirfd, "cap_faccess", 0)); + + rc = openat(dirfd, "cap_fsync", O_CREAT, 0600); + EXPECT_OK(rc); + EXPECT_OK(close(rc)); + rc = openat(dfd_cap, "cap_fsync", O_FSYNC | O_RDONLY); + CHECK_RIGHT_RESULT(rc, rights, CAP_FSYNC, CAP_READ, CAP_LOOKUP); + if (rc >= 0) { + EXPECT_OK(close(rc)); + } + rc = openat(dfd_cap, "cap_fsync", O_FSYNC | O_WRONLY | O_APPEND); + CHECK_RIGHT_RESULT(rc, rights, CAP_FSYNC, CAP_WRITE, CAP_LOOKUP); + if (rc >= 0) { + EXPECT_OK(close(rc)); + } + rc = openat(dfd_cap, "cap_fsync", O_FSYNC | O_RDWR | O_APPEND); + CHECK_RIGHT_RESULT(rc, rights, CAP_FSYNC, CAP_READ, CAP_WRITE, CAP_LOOKUP); + if (rc >= 0) { + EXPECT_OK(close(rc)); + } + rc = openat(dfd_cap, "cap_fsync", O_SYNC | O_RDONLY); + CHECK_RIGHT_RESULT(rc, rights, CAP_FSYNC, CAP_READ, CAP_LOOKUP); + if (rc >= 0) { + EXPECT_OK(close(rc)); + } + rc = openat(dfd_cap, "cap_fsync", O_SYNC | O_WRONLY | O_APPEND); + CHECK_RIGHT_RESULT(rc, rights, CAP_FSYNC, CAP_WRITE, CAP_LOOKUP); + if (rc >= 0) { + EXPECT_OK(close(rc)); + } + rc = openat(dfd_cap, "cap_fsync", O_SYNC | O_RDWR | O_APPEND); + CHECK_RIGHT_RESULT(rc, rights, CAP_FSYNC, CAP_READ, CAP_WRITE, CAP_LOOKUP); + if (rc >= 0) { + EXPECT_OK(close(rc)); + } + EXPECT_OK(unlinkat(dirfd, "cap_fsync", 0)); + + rc = openat(dirfd, "cap_ftruncate", O_CREAT, 0600); + EXPECT_OK(rc); + EXPECT_OK(close(rc)); + rc = openat(dfd_cap, "cap_ftruncate", O_TRUNC | O_RDONLY); + CHECK_RIGHT_RESULT(rc, rights, CAP_FTRUNCATE, CAP_READ, CAP_LOOKUP); + if (rc >= 0) { + EXPECT_OK(close(rc)); + } + rc = openat(dfd_cap, "cap_ftruncate", O_TRUNC | O_WRONLY); + CHECK_RIGHT_RESULT(rc, rights, CAP_FTRUNCATE, CAP_WRITE, CAP_LOOKUP); + if (rc >= 0) { + EXPECT_OK(close(rc)); + } + rc = openat(dfd_cap, "cap_ftruncate", O_TRUNC | O_RDWR); + CHECK_RIGHT_RESULT(rc, rights, CAP_FTRUNCATE, CAP_READ, CAP_WRITE, CAP_LOOKUP); + if (rc >= 0) { + EXPECT_OK(close(rc)); + } + EXPECT_OK(unlinkat(dirfd, "cap_ftruncate", 0)); + + rc = openat(dfd_cap, "cap_create", O_CREAT | O_WRONLY, 0600); + CHECK_RIGHT_RESULT(rc, rights, CAP_CREATE, CAP_WRITE, CAP_SEEK, CAP_LOOKUP); + if (rc >= 0) { + EXPECT_OK(close(rc)); + EXPECT_OK(unlinkat(dirfd, "cap_create", 0)); + } + rc = openat(dfd_cap, "cap_create", O_CREAT | O_RDWR, 0600); + CHECK_RIGHT_RESULT(rc, rights, CAP_CREATE, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_LOOKUP); + if (rc >= 0) { + EXPECT_OK(close(rc)); + EXPECT_OK(unlinkat(dirfd, "cap_create", 0)); + } + + rc = openat(dirfd, "cap_fsync", O_CREAT, 0600); + EXPECT_OK(rc); + EXPECT_OK(close(rc)); + rc = openat(dfd_cap, "cap_fsync", O_FSYNC | O_WRONLY); + CHECK_RIGHT_RESULT(rc, + rights, CAP_FSYNC, CAP_WRITE, CAP_SEEK, CAP_LOOKUP); + if (rc >= 0) { + EXPECT_OK(close(rc)); + } + rc = openat(dfd_cap, "cap_fsync", O_FSYNC | O_RDWR); + CHECK_RIGHT_RESULT(rc, + rights, CAP_FSYNC, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_LOOKUP); + if (rc >= 0) { + EXPECT_OK(close(rc)); + } + rc = openat(dfd_cap, "cap_fsync", O_SYNC | O_WRONLY); + CHECK_RIGHT_RESULT(rc, + rights, CAP_FSYNC, CAP_WRITE, CAP_SEEK, CAP_LOOKUP); + if (rc >= 0) { + EXPECT_OK(close(rc)); + } + rc = openat(dfd_cap, "cap_fsync", O_SYNC | O_RDWR); + CHECK_RIGHT_RESULT(rc, + rights, CAP_FSYNC, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_LOOKUP); + if (rc >= 0) { + EXPECT_OK(close(rc)); + } + EXPECT_OK(unlinkat(dirfd, "cap_fsync", 0)); + +#ifdef HAVE_CHFLAGSAT + rc = openat(dirfd, "cap_chflagsat", O_CREAT, 0600); + EXPECT_OK(rc); + EXPECT_OK(close(rc)); + rc = chflagsat(dfd_cap, "cap_chflagsat", UF_NODUMP, 0); + CHECK_RIGHT_RESULT(rc, rights, CAP_CHFLAGSAT, CAP_LOOKUP); + EXPECT_OK(unlinkat(dirfd, "cap_chflagsat", 0)); +#endif + + rc = openat(dirfd, "cap_fchownat", O_CREAT, 0600); + EXPECT_OK(rc); + EXPECT_OK(close(rc)); + rc = fchownat(dfd_cap, "cap_fchownat", -1, -1, 0); + CHECK_RIGHT_RESULT(rc, rights, CAP_FCHOWN, CAP_LOOKUP); + EXPECT_OK(unlinkat(dirfd, "cap_fchownat", 0)); + + rc = openat(dirfd, "cap_fchmodat", O_CREAT, 0600); + EXPECT_OK(rc); + EXPECT_OK(close(rc)); + rc = fchmodat(dfd_cap, "cap_fchmodat", 0600, 0); + CHECK_RIGHT_RESULT(rc, rights, CAP_FCHMOD, CAP_LOOKUP); + EXPECT_OK(unlinkat(dirfd, "cap_fchmodat", 0)); + + rc = openat(dirfd, "cap_fstatat", O_CREAT, 0600); + EXPECT_OK(rc); + EXPECT_OK(close(rc)); + struct stat sb; + rc = fstatat(dfd_cap, "cap_fstatat", &sb, 0); + CHECK_RIGHT_RESULT(rc, rights, CAP_FSTAT, CAP_LOOKUP); + EXPECT_OK(unlinkat(dirfd, "cap_fstatat", 0)); + + rc = openat(dirfd, "cap_futimesat", O_CREAT, 0600); + EXPECT_OK(rc); + EXPECT_OK(close(rc)); + rc = futimesat(dfd_cap, "cap_futimesat", NULL); + CHECK_RIGHT_RESULT(rc, rights, CAP_FUTIMES, CAP_LOOKUP); + EXPECT_OK(unlinkat(dirfd, "cap_futimesat", 0)); + + // For linkat(2), need: + // - CAP_LINKAT_SOURCE on source + // - CAP_LINKAT_TARGET on destination. + rc = openat(dirfd, "cap_linkat_src", O_CREAT, 0600); + EXPECT_OK(rc); + EXPECT_OK(close(rc)); + + rc = linkat(dirfd, "cap_linkat_src", dfd_cap, "cap_linkat_dst", 0); + CHECK_RIGHT_RESULT(rc, rights, CAP_LINKAT_TARGET); + if (rc >= 0) { + EXPECT_OK(unlinkat(dirfd, "cap_linkat_dst", 0)); + } + + rc = linkat(dfd_cap, "cap_linkat_src", dirfd, "cap_linkat_dst", 0); + CHECK_RIGHT_RESULT(rc, rights, CAP_LINKAT_SOURCE); + if (rc >= 0) { + EXPECT_OK(unlinkat(dirfd, "cap_linkat_dst", 0)); + } + + EXPECT_OK(unlinkat(dirfd, "cap_linkat_src", 0)); + + rc = mkdirat(dfd_cap, "cap_mkdirat", 0700); + CHECK_RIGHT_RESULT(rc, rights, CAP_MKDIRAT, CAP_LOOKUP); + if (rc >= 0) { + EXPECT_OK(unlinkat(dirfd, "cap_mkdirat", AT_REMOVEDIR)); + } + +#ifdef HAVE_MKFIFOAT + rc = mkfifoat(dfd_cap, "cap_mkfifoat", 0600); + CHECK_RIGHT_RESULT(rc, rights, CAP_MKFIFOAT, CAP_LOOKUP); + if (rc >= 0) { + EXPECT_OK(unlinkat(dirfd, "cap_mkfifoat", 0)); + } +#endif + + if (getuid() == 0) { + rc = mknodat(dfd_cap, "cap_mknodat", S_IFCHR | 0600, 0); + CHECK_RIGHT_RESULT(rc, rights, CAP_MKNODAT, CAP_LOOKUP); + if (rc >= 0) { + EXPECT_OK(unlinkat(dirfd, "cap_mknodat", 0)); + } + } + + // For renameat(2), need: + // - CAP_RENAMEAT_SOURCE on source + // - CAP_RENAMEAT_TARGET on destination. + rc = openat(dirfd, "cap_renameat_src", O_CREAT, 0600); + EXPECT_OK(rc); + EXPECT_OK(close(rc)); + + rc = renameat(dirfd, "cap_renameat_src", dfd_cap, "cap_renameat_dst"); + CHECK_RIGHT_RESULT(rc, rights, CAP_RENAMEAT_TARGET); + if (rc >= 0) { + EXPECT_OK(unlinkat(dirfd, "cap_renameat_dst", 0)); + } else { + EXPECT_OK(unlinkat(dirfd, "cap_renameat_src", 0)); + } + + rc = openat(dirfd, "cap_renameat_src", O_CREAT, 0600); + EXPECT_OK(rc); + EXPECT_OK(close(rc)); + + rc = renameat(dfd_cap, "cap_renameat_src", dirfd, "cap_renameat_dst"); + CHECK_RIGHT_RESULT(rc, rights, CAP_RENAMEAT_SOURCE); + + if (rc >= 0) { + EXPECT_OK(unlinkat(dirfd, "cap_renameat_dst", 0)); + } else { + EXPECT_OK(unlinkat(dirfd, "cap_renameat_src", 0)); + } + + rc = symlinkat("test", dfd_cap, "cap_symlinkat"); + CHECK_RIGHT_RESULT(rc, rights, CAP_SYMLINKAT, CAP_LOOKUP); + if (rc >= 0) { + EXPECT_OK(unlinkat(dirfd, "cap_symlinkat", 0)); + } + + rc = openat(dirfd, "cap_unlinkat", O_CREAT, 0600); + EXPECT_OK(rc); + EXPECT_OK(close(rc)); + rc = unlinkat(dfd_cap, "cap_unlinkat", 0); + CHECK_RIGHT_RESULT(rc, rights, CAP_UNLINKAT, CAP_LOOKUP); + unlinkat(dirfd, "cap_unlinkat", 0); + EXPECT_OK(mkdirat(dirfd, "cap_unlinkat", 0700)); + rc = unlinkat(dfd_cap, "cap_unlinkat", AT_REMOVEDIR); + CHECK_RIGHT_RESULT(rc, rights, CAP_UNLINKAT, CAP_LOOKUP); + unlinkat(dirfd, "cap_unlinkat", AT_REMOVEDIR); + + EXPECT_OK(close(dfd_cap)); +} + +void DirOperationsTest(int extra) { + int rc = mkdir(TmpFile("cap_dirops"), 0755); + EXPECT_OK(rc); + if (rc < 0 && errno != EEXIST) return; + int dfd = open(TmpFile("cap_dirops"), O_RDONLY | O_DIRECTORY | extra); + EXPECT_OK(dfd); + int tmpfd = open(tmpdir.c_str(), O_RDONLY | O_DIRECTORY); + EXPECT_OK(tmpfd); + + EXPECT_OK(cap_enter()); // Enter capability mode. + + TRY_DIR_OPS(dfd, CAP_LINKAT_SOURCE); + TRY_DIR_OPS(dfd, CAP_LINKAT_TARGET); + TRY_DIR_OPS(dfd, CAP_CREATE, CAP_READ, CAP_LOOKUP); + TRY_DIR_OPS(dfd, CAP_CREATE, CAP_WRITE, CAP_LOOKUP); + TRY_DIR_OPS(dfd, CAP_CREATE, CAP_READ, CAP_WRITE, CAP_LOOKUP); + TRY_DIR_OPS(dfd, CAP_FSYNC, CAP_READ, CAP_LOOKUP); + TRY_DIR_OPS(dfd, CAP_FSYNC, CAP_WRITE, CAP_LOOKUP); + TRY_DIR_OPS(dfd, CAP_FSYNC, CAP_READ, CAP_WRITE, CAP_LOOKUP); + TRY_DIR_OPS(dfd, CAP_FTRUNCATE, CAP_READ, CAP_LOOKUP); + TRY_DIR_OPS(dfd, CAP_FTRUNCATE, CAP_WRITE, CAP_LOOKUP); + TRY_DIR_OPS(dfd, CAP_FTRUNCATE, CAP_READ, CAP_WRITE, CAP_LOOKUP); + TRY_DIR_OPS(dfd, CAP_FCHOWN, CAP_LOOKUP); + TRY_DIR_OPS(dfd, CAP_FCHMOD, CAP_LOOKUP); + TRY_DIR_OPS(dfd, CAP_FSTAT, CAP_LOOKUP); + TRY_DIR_OPS(dfd, CAP_FUTIMES, CAP_LOOKUP); + TRY_DIR_OPS(dfd, CAP_MKDIRAT, CAP_LOOKUP); + TRY_DIR_OPS(dfd, CAP_MKFIFOAT, CAP_LOOKUP); + TRY_DIR_OPS(dfd, CAP_MKNODAT, CAP_LOOKUP); + TRY_DIR_OPS(dfd, CAP_SYMLINKAT, CAP_LOOKUP); + TRY_DIR_OPS(dfd, CAP_UNLINKAT, CAP_LOOKUP); + // Rename needs CAP_RENAMEAT_SOURCE on source directory and + // CAP_RENAMEAT_TARGET on destination directory. + TRY_DIR_OPS(dfd, CAP_RENAMEAT_SOURCE, CAP_UNLINKAT, CAP_LOOKUP); + TRY_DIR_OPS(dfd, CAP_RENAMEAT_TARGET, CAP_UNLINKAT, CAP_LOOKUP); + + EXPECT_OK(unlinkat(tmpfd, "cap_dirops", AT_REMOVEDIR)); + EXPECT_OK(close(tmpfd)); + EXPECT_OK(close(dfd)); +} + +FORK_TEST(Capability, DirOperations) { + DirOperationsTest(0); +} + +#ifdef O_PATH +FORK_TEST(Capability, PathDirOperations) { + // Make the dfd in the test a path-only file descriptor. + DirOperationsTest(O_PATH); +} +#endif + +static void TryReadWrite(int cap_fd) { + char buffer[64]; + EXPECT_OK(read(cap_fd, buffer, sizeof(buffer))); + int rc = write(cap_fd, "", 0); + EXPECT_EQ(-1, rc); + EXPECT_EQ(ENOTCAPABLE, errno); +} + +FORK_TEST_ON(Capability, SocketTransfer, TmpFile("cap_fd_transfer")) { + int sock_fds[2]; + EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, sock_fds)); + + struct msghdr mh; + mh.msg_name = NULL; // No address needed + mh.msg_namelen = 0; + char buffer1[1024]; + struct iovec iov[1]; + iov[0].iov_base = buffer1; + iov[0].iov_len = sizeof(buffer1); + mh.msg_iov = iov; + mh.msg_iovlen = 1; + char buffer2[1024]; + mh.msg_control = buffer2; + mh.msg_controllen = sizeof(buffer2); + struct cmsghdr *cmptr; + + cap_rights_t r_rs; + cap_rights_init(&r_rs, CAP_READ, CAP_SEEK); + + pid_t child = fork(); + if (child == 0) { + // Child: enter cap mode + EXPECT_OK(cap_enter()); + + // Child: wait to receive FD over socket + int rc = recvmsg(sock_fds[0], &mh, 0); + EXPECT_OK(rc); + EXPECT_LE(CMSG_LEN(sizeof(int)), mh.msg_controllen); + cmptr = CMSG_FIRSTHDR(&mh); + int cap_fd = *(int*)CMSG_DATA(cmptr); + EXPECT_EQ(CMSG_LEN(sizeof(int)), cmptr->cmsg_len); + cmptr = CMSG_NXTHDR(&mh, cmptr); + EXPECT_TRUE(cmptr == NULL); + + // Child: confirm we can do the right operations on the capability + cap_rights_t rights; + EXPECT_OK(cap_rights_get(cap_fd, &rights)); + EXPECT_RIGHTS_EQ(&r_rs, &rights); + TryReadWrite(cap_fd); + + // Child: wait for a normal read + int val; + read(sock_fds[0], &val, sizeof(val)); + exit(0); + } + + int fd = open(TmpFile("cap_fd_transfer"), O_RDWR | O_CREAT, 0644); + EXPECT_OK(fd); + if (fd < 0) return; + int cap_fd = dup(fd); + EXPECT_OK(cap_fd); + EXPECT_OK(cap_rights_limit(cap_fd, &r_rs)); + + EXPECT_OK(cap_enter()); // Enter capability mode. + + // Confirm we can do the right operations on the capability + TryReadWrite(cap_fd); + + // Send the file descriptor over the pipe to the sub-process + mh.msg_controllen = CMSG_LEN(sizeof(int)); + cmptr = CMSG_FIRSTHDR(&mh); + cmptr->cmsg_level = SOL_SOCKET; + cmptr->cmsg_type = SCM_RIGHTS; + cmptr->cmsg_len = CMSG_LEN(sizeof(int)); + *(int *)CMSG_DATA(cmptr) = cap_fd; + buffer1[0] = 0; + iov[0].iov_len = 1; + sleep(3); + int rc = sendmsg(sock_fds[1], &mh, 0); + EXPECT_OK(rc); + + sleep(1); // Ensure subprocess runs + int zero = 0; + write(sock_fds[1], &zero, sizeof(zero)); +} + +TEST(Capability, SyscallAt) { + int rc = mkdir(TmpFile("cap_at_topdir"), 0755); + EXPECT_OK(rc); + if (rc < 0 && errno != EEXIST) return; + + cap_rights_t r_all; + cap_rights_init(&r_all, CAP_READ, CAP_LOOKUP, CAP_MKNODAT, CAP_UNLINKAT, CAP_MKDIRAT, CAP_MKFIFOAT); + cap_rights_t r_no_unlink; + cap_rights_init(&r_no_unlink, CAP_READ, CAP_LOOKUP, CAP_MKDIRAT, CAP_MKFIFOAT); + cap_rights_t r_no_mkdir; + cap_rights_init(&r_no_mkdir, CAP_READ, CAP_LOOKUP, CAP_UNLINKAT, CAP_MKFIFOAT); + cap_rights_t r_no_mkfifo; + cap_rights_init(&r_no_mkfifo, CAP_READ, CAP_LOOKUP, CAP_UNLINKAT, CAP_MKDIRAT); + cap_rights_t r_no_mknod; + cap_rights_init(&r_no_mknod, CAP_READ, CAP_LOOKUP, CAP_UNLINKAT, CAP_MKDIRAT); + cap_rights_t r_create; + cap_rights_init(&r_create, CAP_READ, CAP_LOOKUP, CAP_CREATE); + cap_rights_t r_bind; + cap_rights_init(&r_bind, CAP_READ, CAP_LOOKUP, CAP_BIND); + + int dfd = open(TmpFile("cap_at_topdir"), O_RDONLY); + EXPECT_OK(dfd); + int cap_dfd_all = dup(dfd); + EXPECT_OK(cap_dfd_all); + EXPECT_OK(cap_rights_limit(cap_dfd_all, &r_all)); + int cap_dfd_no_unlink = dup(dfd); + EXPECT_OK(cap_dfd_no_unlink); + EXPECT_OK(cap_rights_limit(cap_dfd_no_unlink, &r_no_unlink)); + int cap_dfd_no_mkdir = dup(dfd); + EXPECT_OK(cap_dfd_no_mkdir); + EXPECT_OK(cap_rights_limit(cap_dfd_no_mkdir, &r_no_mkdir)); + int cap_dfd_no_mkfifo = dup(dfd); + EXPECT_OK(cap_dfd_no_mkfifo); + EXPECT_OK(cap_rights_limit(cap_dfd_no_mkfifo, &r_no_mkfifo)); + int cap_dfd_no_mknod = dup(dfd); + EXPECT_OK(cap_dfd_no_mknod); + EXPECT_OK(cap_rights_limit(cap_dfd_no_mknod, &r_no_mknod)); + int cap_dfd_create = dup(dfd); + EXPECT_OK(cap_dfd_create); + EXPECT_OK(cap_rights_limit(cap_dfd_create, &r_create)); + int cap_dfd_bind = dup(dfd); + EXPECT_OK(cap_dfd_bind); + EXPECT_OK(cap_rights_limit(cap_dfd_bind, &r_bind)); + + // Need CAP_MKDIRAT to mkdirat(2). + EXPECT_NOTCAPABLE(mkdirat(cap_dfd_no_mkdir, "cap_subdir", 0755)); + rmdir(TmpFile("cap_at_topdir/cap_subdir")); + EXPECT_OK(mkdirat(cap_dfd_all, "cap_subdir", 0755)); + + // Need CAP_UNLINKAT to unlinkat(dfd, name, AT_REMOVEDIR). + EXPECT_NOTCAPABLE(unlinkat(cap_dfd_no_unlink, "cap_subdir", AT_REMOVEDIR)); + EXPECT_OK(unlinkat(cap_dfd_all, "cap_subdir", AT_REMOVEDIR)); + rmdir(TmpFile("cap_at_topdir/cap_subdir")); + + // Need CAP_MKFIFOAT to mkfifoat(2). + EXPECT_NOTCAPABLE(mkfifoat(cap_dfd_no_mkfifo, "cap_fifo", 0755)); + unlink(TmpFile("cap_at_topdir/cap_fifo")); + EXPECT_OK(mkfifoat(cap_dfd_all, "cap_fifo", 0755)); + unlink(TmpFile("cap_at_topdir/cap_fifo")); + +#ifdef HAVE_MKNOD_REG + // Need CAP_CREATE to create a regular file with mknodat(2). + EXPECT_NOTCAPABLE(mknodat(cap_dfd_all, "cap_regular", S_IFREG|0755, 0)); + unlink(TmpFile("cap_at_topdir/cap_regular")); + EXPECT_OK(mknodat(cap_dfd_create, "cap_regular", S_IFREG|0755, 0)); + unlink(TmpFile("cap_at_topdir/cap_regular")); +#endif + +#ifdef HAVE_MKNOD_SOCKET + // Need CAP_BIND to create a UNIX domain socket with mknodat(2). + EXPECT_NOTCAPABLE(mknodat(cap_dfd_all, "cap_socket", S_IFSOCK|0755, 0)); + unlink(TmpFile("cap_at_topdir/cap_socket")); + EXPECT_OK(mknodat(cap_dfd_bind, "cap_socket", S_IFSOCK|0755, 0)); + unlink(TmpFile("cap_at_topdir/cap_socket")); +#endif + + if (getuid() == 0) { + // Need CAP_MKNODAT to mknodat(2) a device + EXPECT_NOTCAPABLE(mknodat(cap_dfd_no_mknod, "cap_device", S_IFCHR|0755, makedev(99, 123))); + unlink(TmpFile("cap_at_topdir/cap_device")); + EXPECT_OK(mknodat(cap_dfd_all, "cap_device", S_IFCHR|0755, makedev(99, 123))); + unlink(TmpFile("cap_at_topdir/cap_device")); + + // Need CAP_MKFIFOAT to mknodat(2) for a FIFO. + EXPECT_NOTCAPABLE(mknodat(cap_dfd_no_mkfifo, "cap_fifo", S_IFIFO|0755, 0)); + unlink(TmpFile("cap_at_topdir/cap_fifo")); + EXPECT_OK(mknodat(cap_dfd_all, "cap_fifo", S_IFIFO|0755, 0)); + unlink(TmpFile("cap_at_topdir/cap_fifo")); + } else { + TEST_SKIPPED("requires root (partial)"); + } + + close(cap_dfd_all); + close(cap_dfd_no_mknod); + close(cap_dfd_no_mkfifo); + close(cap_dfd_no_mkdir); + close(cap_dfd_no_unlink); + close(cap_dfd_create); + close(cap_dfd_bind); + close(dfd); + + // Tidy up. + rmdir(TmpFile("cap_at_topdir")); +} + +FORK_TEST_ON(Capability, ExtendedAttributes, TmpFile("cap_extattr")) { + int fd = open(TmpFile("cap_extattr"), O_RDONLY|O_CREAT, 0644); + EXPECT_OK(fd); + + char buffer[1024]; + int rc = fgetxattr_(fd, "user.capsicumtest", buffer, sizeof(buffer)); + if (rc < 0 && errno == ENOTSUP) { + // Need user_xattr mount option for non-root users on Linux + TEST_SKIPPED("/tmp doesn't support extended attributes"); + close(fd); + return; + } + + cap_rights_t r_rws; + cap_rights_init(&r_rws, CAP_READ, CAP_WRITE, CAP_SEEK); + cap_rights_t r_xlist; + cap_rights_init(&r_xlist, CAP_EXTATTR_LIST); + cap_rights_t r_xget; + cap_rights_init(&r_xget, CAP_EXTATTR_GET); + cap_rights_t r_xset; + cap_rights_init(&r_xset, CAP_EXTATTR_SET); + cap_rights_t r_xdel; + cap_rights_init(&r_xdel, CAP_EXTATTR_DELETE); + + int cap = dup(fd); + EXPECT_OK(cap); + EXPECT_OK(cap_rights_limit(cap, &r_rws)); + int cap_xlist = dup(fd); + EXPECT_OK(cap_xlist); + EXPECT_OK(cap_rights_limit(cap_xlist, &r_xlist)); + int cap_xget = dup(fd); + EXPECT_OK(cap_xget); + EXPECT_OK(cap_rights_limit(cap_xget, &r_xget)); + int cap_xset = dup(fd); + EXPECT_OK(cap_xset); + EXPECT_OK(cap_rights_limit(cap_xset, &r_xset)); + int cap_xdel = dup(fd); + EXPECT_OK(cap_xdel); + EXPECT_OK(cap_rights_limit(cap_xdel, &r_xdel)); + + const char* value = "capsicum"; + int len = strlen(value) + 1; + EXPECT_NOTCAPABLE(fsetxattr_(cap, "user.capsicumtest", value, len, 0)); + EXPECT_NOTCAPABLE(fsetxattr_(cap_xlist, "user.capsicumtest", value, len, 0)); + EXPECT_NOTCAPABLE(fsetxattr_(cap_xget, "user.capsicumtest", value, len, 0)); + EXPECT_NOTCAPABLE(fsetxattr_(cap_xdel, "user.capsicumtest", value, len, 0)); + EXPECT_OK(fsetxattr_(cap_xset, "user.capsicumtest", value, len, 0)); + + EXPECT_NOTCAPABLE(flistxattr_(cap, buffer, sizeof(buffer))); + EXPECT_NOTCAPABLE(flistxattr_(cap_xget, buffer, sizeof(buffer))); + EXPECT_NOTCAPABLE(flistxattr_(cap_xset, buffer, sizeof(buffer))); + EXPECT_NOTCAPABLE(flistxattr_(cap_xdel, buffer, sizeof(buffer))); + EXPECT_OK(flistxattr_(cap_xlist, buffer, sizeof(buffer))); + + EXPECT_NOTCAPABLE(fgetxattr_(cap, "user.capsicumtest", buffer, sizeof(buffer))); + EXPECT_NOTCAPABLE(fgetxattr_(cap_xlist, "user.capsicumtest", buffer, sizeof(buffer))); + EXPECT_NOTCAPABLE(fgetxattr_(cap_xset, "user.capsicumtest", buffer, sizeof(buffer))); + EXPECT_NOTCAPABLE(fgetxattr_(cap_xdel, "user.capsicumtest", buffer, sizeof(buffer))); + EXPECT_OK(fgetxattr_(cap_xget, "user.capsicumtest", buffer, sizeof(buffer))); + + EXPECT_NOTCAPABLE(fremovexattr_(cap, "user.capsicumtest")); + EXPECT_NOTCAPABLE(fremovexattr_(cap_xlist, "user.capsicumtest")); + EXPECT_NOTCAPABLE(fremovexattr_(cap_xget, "user.capsicumtest")); + EXPECT_NOTCAPABLE(fremovexattr_(cap_xset, "user.capsicumtest")); + EXPECT_OK(fremovexattr_(cap_xdel, "user.capsicumtest")); + + close(cap_xdel); + close(cap_xset); + close(cap_xget); + close(cap_xlist); + close(cap); + close(fd); +} + +TEST(Capability, PipeUnseekable) { + int fds[2]; + EXPECT_OK(pipe(fds)); + + // Some programs detect pipes by calling seek() and getting ESPIPE. + EXPECT_EQ(-1, lseek(fds[0], 0, SEEK_SET)); + EXPECT_EQ(ESPIPE, errno); + + cap_rights_t rights; + cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_SEEK); + EXPECT_OK(cap_rights_limit(fds[0], &rights)); + + EXPECT_EQ(-1, lseek(fds[0], 0, SEEK_SET)); + EXPECT_EQ(ESPIPE, errno); + + // Remove CAP_SEEK and see if ENOTCAPABLE trumps ESPIPE. + cap_rights_init(&rights, CAP_READ, CAP_WRITE); + EXPECT_OK(cap_rights_limit(fds[0], &rights)); + EXPECT_EQ(-1, lseek(fds[0], 0, SEEK_SET)); + EXPECT_EQ(ENOTCAPABLE, errno); + // TODO(drysdale): in practical terms it might be nice if ESPIPE trumped ENOTCAPABLE. + // EXPECT_EQ(ESPIPE, errno); + + close(fds[0]); + close(fds[1]); +} + +TEST(Capability, NoBypassDAC) { + REQUIRE_ROOT(); + int fd = open(TmpFile("cap_root_owned"), O_RDONLY|O_CREAT, 0644); + EXPECT_OK(fd); + cap_rights_t rights; + cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_FCHMOD, CAP_FSTAT); + EXPECT_OK(cap_rights_limit(fd, &rights)); + + pid_t child = fork(); + if (child == 0) { + // Child: change uid to a lesser being + setuid(other_uid); + // Attempt to fchmod the file, and fail. + // Having CAP_FCHMOD doesn't bypass the need to comply with DAC policy. + int rc = fchmod(fd, 0666); + EXPECT_EQ(-1, rc); + EXPECT_EQ(EPERM, errno); + exit(HasFailure()); + } + int status; + EXPECT_EQ(child, waitpid(child, &status, 0)); + EXPECT_TRUE(WIFEXITED(status)) << "0x" << std::hex << status; + EXPECT_EQ(0, WEXITSTATUS(status)); + struct stat info; + EXPECT_OK(fstat(fd, &info)); + EXPECT_EQ((mode_t)(S_IFREG|0644), info.st_mode); + close(fd); + unlink(TmpFile("cap_root_owned")); +} Index: head/contrib/capsicum-test/capmode.cc =================================================================== --- head/contrib/capsicum-test/capmode.cc +++ head/contrib/capsicum-test/capmode.cc @@ -0,0 +1,654 @@ +// Test routines to make sure a variety of system calls are or are not +// available in capability mode. The goal is not to see if they work, just +// whether or not they return the expected ECAPMODE. +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "capsicum.h" +#include "syscalls.h" +#include "capsicum-test.h" + +// Test fixture that opens (and closes) a bunch of files. +class WithFiles : public ::testing::Test { + public: + WithFiles() : + fd_file_(open(TmpFile("cap_capmode"), O_RDWR|O_CREAT, 0644)), + fd_close_(open("/dev/null", O_RDWR)), + fd_dir_(open(tmpdir.c_str(), O_RDONLY)), + fd_socket_(socket(PF_INET, SOCK_DGRAM, 0)), + fd_tcp_socket_(socket(PF_INET, SOCK_STREAM, 0)) { + EXPECT_OK(fd_file_); + EXPECT_OK(fd_close_); + EXPECT_OK(fd_dir_); + EXPECT_OK(fd_socket_); + EXPECT_OK(fd_tcp_socket_); + } + ~WithFiles() { + if (fd_tcp_socket_ >= 0) close(fd_tcp_socket_); + if (fd_socket_ >= 0) close(fd_socket_); + if (fd_dir_ >= 0) close(fd_dir_); + if (fd_close_ >= 0) close(fd_close_); + if (fd_file_ >= 0) close(fd_file_); + unlink(TmpFile("cap_capmode")); + } + protected: + int fd_file_; + int fd_close_; + int fd_dir_; + int fd_socket_; + int fd_tcp_socket_; +}; + +FORK_TEST_F(WithFiles, DisallowedFileSyscalls) { + unsigned int mode = -1; + EXPECT_OK(cap_getmode(&mode)); + EXPECT_EQ(0, (int)mode); + EXPECT_OK(cap_enter()); // Enter capability mode. + EXPECT_OK(cap_getmode(&mode)); + EXPECT_EQ(1, (int)mode); + + // System calls that are not permitted in capability mode. + EXPECT_CAPMODE(access(TmpFile("cap_capmode_access"), F_OK)); + EXPECT_CAPMODE(acct(TmpFile("cap_capmode_acct"))); + EXPECT_CAPMODE(chdir(TmpFile("cap_capmode_chdir"))); +#ifdef HAVE_CHFLAGS + EXPECT_CAPMODE(chflags(TmpFile("cap_capmode_chflags"), UF_NODUMP)); +#endif + EXPECT_CAPMODE(chmod(TmpFile("cap_capmode_chmod"), 0644)); + EXPECT_CAPMODE(chown(TmpFile("cap_capmode_chown"), -1, -1)); + EXPECT_CAPMODE(chroot(TmpFile("cap_capmode_chroot"))); + EXPECT_CAPMODE(creat(TmpFile("cap_capmode_creat"), 0644)); + EXPECT_CAPMODE(fchdir(fd_dir_)); +#ifdef HAVE_GETFSSTAT + struct statfs statfs; + EXPECT_CAPMODE(getfsstat(&statfs, sizeof(statfs), MNT_NOWAIT)); +#endif + EXPECT_CAPMODE(link(TmpFile("foo"), TmpFile("bar"))); + struct stat sb; + EXPECT_CAPMODE(lstat(TmpFile("cap_capmode_lstat"), &sb)); + EXPECT_CAPMODE(mknod(TmpFile("capmode_mknod"), 0644 | S_IFIFO, 0)); + EXPECT_CAPMODE(bogus_mount_()); + EXPECT_CAPMODE(open("/dev/null", O_RDWR)); + char buf[64]; + EXPECT_CAPMODE(readlink(TmpFile("cap_capmode_readlink"), buf, sizeof(buf))); +#ifdef HAVE_REVOKE + EXPECT_CAPMODE(revoke(TmpFile("cap_capmode_revoke"))); +#endif + EXPECT_CAPMODE(stat(TmpFile("cap_capmode_stat"), &sb)); + EXPECT_CAPMODE(symlink(TmpFile("cap_capmode_symlink_from"), TmpFile("cap_capmode_symlink_to"))); + EXPECT_CAPMODE(unlink(TmpFile("cap_capmode_unlink"))); + EXPECT_CAPMODE(umount2("/not_mounted", 0)); +} + +FORK_TEST_F(WithFiles, DisallowedSocketSyscalls) { + EXPECT_OK(cap_enter()); // Enter capability mode. + + // System calls that are not permitted in capability mode. + struct sockaddr_in addr; + addr.sin_family = AF_INET; + addr.sin_port = 0; + addr.sin_addr.s_addr = htonl(INADDR_ANY); + EXPECT_CAPMODE(bind_(fd_socket_, (sockaddr*)&addr, sizeof(addr))); + addr.sin_family = AF_INET; + addr.sin_port = 53; + addr.sin_addr.s_addr = htonl(0x08080808); + EXPECT_CAPMODE(connect_(fd_tcp_socket_, (sockaddr*)&addr, sizeof(addr))); +} + +FORK_TEST_F(WithFiles, AllowedFileSyscalls) { + int rc; + EXPECT_OK(cap_enter()); // Enter capability mode. + + EXPECT_OK(close(fd_close_)); + fd_close_ = -1; + int fd_dup = dup(fd_file_); + EXPECT_OK(fd_dup); + EXPECT_OK(dup2(fd_file_, fd_dup)); +#ifdef HAVE_DUP3 + EXPECT_OK(dup3(fd_file_, fd_dup, 0)); +#endif + if (fd_dup >= 0) close(fd_dup); + + struct stat sb; + EXPECT_OK(fstat(fd_file_, &sb)); + EXPECT_OK(lseek(fd_file_, 0, SEEK_SET)); + char ch; + EXPECT_OK(read(fd_file_, &ch, sizeof(ch))); + EXPECT_OK(write(fd_file_, &ch, sizeof(ch))); + +#ifdef HAVE_CHFLAGS + rc = fchflags(fd_file_, UF_NODUMP); + if (rc < 0) { + EXPECT_NE(ECAPMODE, errno); + } +#endif + + char buf[1024]; + rc = getdents_(fd_dir_, (void*)buf, sizeof(buf)); + EXPECT_OK(rc); + + char data[] = "123"; + EXPECT_OK(pwrite(fd_file_, data, 1, 0)); + EXPECT_OK(pread(fd_file_, data, 1, 0)); + + struct iovec io; + io.iov_base = data; + io.iov_len = 2; +#if !defined(__i386__) && !defined(__linux__) + // TODO(drysdale): reinstate these tests for 32-bit runs when possible + // libc bug is fixed. + EXPECT_OK(pwritev(fd_file_, &io, 1, 0)); + EXPECT_OK(preadv(fd_file_, &io, 1, 0)); +#endif + EXPECT_OK(writev(fd_file_, &io, 1)); + EXPECT_OK(readv(fd_file_, &io, 1)); + +#ifdef HAVE_SYNCFS + EXPECT_OK(syncfs(fd_file_)); +#endif +#ifdef HAVE_SYNC_FILE_RANGE + EXPECT_OK(sync_file_range(fd_file_, 0, 1, 0)); +#endif +#ifdef HAVE_READAHEAD + if (!tmpdir_on_tmpfs) { // tmpfs doesn't support readahead(2) + EXPECT_OK(readahead(fd_file_, 0, 1)); + } +#endif +} + +FORK_TEST_F(WithFiles, AllowedSocketSyscalls) { + EXPECT_OK(cap_enter()); // Enter capability mode. + + // recvfrom() either returns -1 with EAGAIN, or 0. + int rc = recvfrom(fd_socket_, NULL, 0, MSG_DONTWAIT, NULL, NULL); + if (rc < 0) { + EXPECT_EQ(EAGAIN, errno); + } + char ch; + EXPECT_OK(write(fd_file_, &ch, sizeof(ch))); + + // These calls will fail for lack of e.g. a proper name to send to, + // but they are allowed in capability mode, so errno != ECAPMODE. + EXPECT_FAIL_NOT_CAPMODE(accept(fd_socket_, NULL, NULL)); + EXPECT_FAIL_NOT_CAPMODE(getpeername(fd_socket_, NULL, NULL)); + EXPECT_FAIL_NOT_CAPMODE(getsockname(fd_socket_, NULL, NULL)); + EXPECT_FAIL_NOT_CAPMODE(recvmsg(fd_socket_, NULL, 0)); + EXPECT_FAIL_NOT_CAPMODE(sendmsg(fd_socket_, NULL, 0)); + EXPECT_FAIL_NOT_CAPMODE(sendto(fd_socket_, NULL, 0, 0, NULL, 0)); + off_t offset = 0; + EXPECT_FAIL_NOT_CAPMODE(sendfile_(fd_socket_, fd_file_, &offset, 1)); + + // The socket/socketpair syscalls are allowed, but they don't give + // anything externally useful (can't call bind/connect on them). + int fd_socket2 = socket(PF_INET, SOCK_DGRAM, 0); + EXPECT_OK(fd_socket2); + if (fd_socket2 >= 0) close(fd_socket2); + int fd_pair[2] = {-1, -1}; + EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, fd_pair)); + if (fd_pair[0] >= 0) close(fd_pair[0]); + if (fd_pair[1] >= 0) close(fd_pair[1]); +} + +#ifdef HAVE_SEND_RECV_MMSG +FORK_TEST(Capmode, AllowedMmsgSendRecv) { + int fd_socket = socket(PF_INET, SOCK_DGRAM, 0); + + struct sockaddr_in addr; + addr.sin_family = AF_INET; + addr.sin_port = htons(0); + addr.sin_addr.s_addr = htonl(INADDR_ANY); + EXPECT_OK(bind(fd_socket, (sockaddr*)&addr, sizeof(addr))); + + EXPECT_OK(cap_enter()); // Enter capability mode. + + char buffer[256] = {0}; + struct iovec iov; + iov.iov_base = buffer; + iov.iov_len = sizeof(buffer); + struct mmsghdr mm; + memset(&mm, 0, sizeof(mm)); + mm.msg_hdr.msg_iov = &iov; + mm.msg_hdr.msg_iovlen = 1; + struct timespec ts; + ts.tv_sec = 1; + ts.tv_nsec = 100; + EXPECT_FAIL_NOT_CAPMODE(recvmmsg(fd_socket, &mm, 1, MSG_DONTWAIT, &ts)); + EXPECT_FAIL_NOT_CAPMODE(sendmmsg(fd_socket, &mm, 1, 0)); + close(fd_socket); +} +#endif + +FORK_TEST(Capmode, AllowedIdentifierSyscalls) { + // Record some identifiers + gid_t my_gid = getgid(); + pid_t my_pid = getpid(); + pid_t my_ppid = getppid(); + uid_t my_uid = getuid(); + pid_t my_sid = getsid(my_pid); + + EXPECT_OK(cap_enter()); // Enter capability mode. + + EXPECT_EQ(my_gid, getegid_()); + EXPECT_EQ(my_uid, geteuid_()); + EXPECT_EQ(my_gid, getgid_()); + EXPECT_EQ(my_pid, getpid()); + EXPECT_EQ(my_ppid, getppid()); + EXPECT_EQ(my_uid, getuid_()); + EXPECT_EQ(my_sid, getsid(my_pid)); + gid_t grps[128]; + EXPECT_OK(getgroups_(128, grps)); + uid_t ruid; + uid_t euid; + uid_t suid; + EXPECT_OK(getresuid(&ruid, &euid, &suid)); + gid_t rgid; + gid_t egid; + gid_t sgid; + EXPECT_OK(getresgid(&rgid, &egid, &sgid)); +#ifdef HAVE_GETLOGIN + EXPECT_TRUE(getlogin() != NULL); +#endif + + // Set various identifiers (to their existing values). + EXPECT_OK(setgid(my_gid)); +#ifdef HAVE_SETFSGID + EXPECT_OK(setfsgid(my_gid)); +#endif + EXPECT_OK(setuid(my_uid)); +#ifdef HAVE_SETFSUID + EXPECT_OK(setfsuid(my_uid)); +#endif + EXPECT_OK(setregid(my_gid, my_gid)); + EXPECT_OK(setresgid(my_gid, my_gid, my_gid)); + EXPECT_OK(setreuid(my_uid, my_uid)); + EXPECT_OK(setresuid(my_uid, my_uid, my_uid)); + EXPECT_OK(setsid()); +} + +FORK_TEST(Capmode, AllowedSchedSyscalls) { + EXPECT_OK(cap_enter()); // Enter capability mode. + int policy = sched_getscheduler(0); + EXPECT_OK(policy); + struct sched_param sp; + EXPECT_OK(sched_getparam(0, &sp)); + if (policy >= 0 && (!SCHED_SETSCHEDULER_REQUIRES_ROOT || getuid() == 0)) { + EXPECT_OK(sched_setscheduler(0, policy, &sp)); + } + EXPECT_OK(sched_setparam(0, &sp)); + EXPECT_OK(sched_get_priority_max(policy)); + EXPECT_OK(sched_get_priority_min(policy)); + struct timespec ts; + EXPECT_OK(sched_rr_get_interval(0, &ts)); + EXPECT_OK(sched_yield()); +} + + +FORK_TEST(Capmode, AllowedTimerSyscalls) { + EXPECT_OK(cap_enter()); // Enter capability mode. + struct timespec ts; + EXPECT_OK(clock_getres(CLOCK_REALTIME, &ts)); + EXPECT_OK(clock_gettime(CLOCK_REALTIME, &ts)); + struct itimerval itv; + EXPECT_OK(getitimer(ITIMER_REAL, &itv)); + EXPECT_OK(setitimer(ITIMER_REAL, &itv, NULL)); + struct timeval tv; + struct timezone tz; + EXPECT_OK(gettimeofday(&tv, &tz)); + ts.tv_sec = 0; + ts.tv_nsec = 1; + EXPECT_OK(nanosleep(&ts, NULL)); +} + + +FORK_TEST(Capmode, AllowedProfilSyscall) { + EXPECT_OK(cap_enter()); // Enter capability mode. + char sbuf[32]; + EXPECT_OK(profil((profil_arg1_t*)sbuf, sizeof(sbuf), 0, 1)); +} + + +FORK_TEST(Capmode, AllowedResourceSyscalls) { + EXPECT_OK(cap_enter()); // Enter capability mode. + errno = 0; + int rc = getpriority(PRIO_PROCESS, 0); + EXPECT_EQ(0, errno); + EXPECT_OK(setpriority(PRIO_PROCESS, 0, rc)); + struct rlimit rlim; + EXPECT_OK(getrlimit_(RLIMIT_CORE, &rlim)); + EXPECT_OK(setrlimit(RLIMIT_CORE, &rlim)); + struct rusage ruse; + EXPECT_OK(getrusage(RUSAGE_SELF, &ruse)); +} + +FORK_TEST(CapMode, AllowedMmapSyscalls) { + // mmap() some memory. + size_t mem_size = getpagesize(); + void *mem = mmap(NULL, mem_size, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0); + EXPECT_TRUE(mem != NULL); + EXPECT_OK(cap_enter()); // Enter capability mode. + + EXPECT_OK(msync(mem, mem_size, MS_ASYNC)); + EXPECT_OK(madvise(mem, mem_size, MADV_NORMAL)); + unsigned char vec[2]; + EXPECT_OK(mincore_(mem, mem_size, vec)); + EXPECT_OK(mprotect(mem, mem_size, PROT_READ|PROT_WRITE)); + + if (!MLOCK_REQUIRES_ROOT || getuid() == 0) { + EXPECT_OK(mlock(mem, mem_size)); + EXPECT_OK(munlock(mem, mem_size)); + int rc = mlockall(MCL_CURRENT); + if (rc != 0) { + // mlockall may well fail with ENOMEM for non-root users, as the + // default RLIMIT_MEMLOCK value isn't that big. + EXPECT_NE(ECAPMODE, errno); + } + EXPECT_OK(munlockall()); + } + // Unmap the memory. + EXPECT_OK(munmap(mem, mem_size)); +} + +FORK_TEST(Capmode, AllowedPipeSyscalls) { + EXPECT_OK(cap_enter()); // Enter capability mode + int fd2[2]; + int rc = pipe(fd2); + EXPECT_EQ(0, rc); + +#ifdef HAVE_VMSPLICE + char buf[11] = "0123456789"; + struct iovec iov; + iov.iov_base = buf; + iov.iov_len = sizeof(buf); + EXPECT_FAIL_NOT_CAPMODE(vmsplice(fd2[0], &iov, 1, SPLICE_F_NONBLOCK)); +#endif + + if (rc == 0) { + close(fd2[0]); + close(fd2[1]); + }; +#ifdef HAVE_PIPE2 + rc = pipe2(fd2, 0); + EXPECT_EQ(0, rc); + if (rc == 0) { + close(fd2[0]); + close(fd2[1]); + }; +#endif +} + +TEST(Capmode, AllowedAtSyscalls) { + int rc = mkdir(TmpFile("cap_at_syscalls"), 0755); + EXPECT_OK(rc); + if (rc < 0 && errno != EEXIST) return; + int dfd = open(TmpFile("cap_at_syscalls"), O_RDONLY); + EXPECT_OK(dfd); + + int file = openat(dfd, "testfile", O_RDONLY|O_CREAT, 0644); + EXPECT_OK(file); + EXPECT_OK(close(file)); + + + pid_t child = fork(); + if (child == 0) { + // Child: enter cap mode and run tests + EXPECT_OK(cap_enter()); // Enter capability mode + + struct stat fs; + EXPECT_OK(fstatat(dfd, "testfile", &fs, 0)); + EXPECT_OK(mkdirat(dfd, "subdir", 0600)); + EXPECT_OK(fchmodat(dfd, "subdir", 0644, 0)); + EXPECT_OK(faccessat(dfd, "subdir", F_OK, 0)); + EXPECT_OK(renameat(dfd, "subdir", dfd, "subdir2")); + EXPECT_OK(renameat(dfd, "subdir2", dfd, "subdir")); + struct timeval tv[2]; + struct timezone tz; + EXPECT_OK(gettimeofday(&tv[0], &tz)); + EXPECT_OK(gettimeofday(&tv[1], &tz)); + EXPECT_OK(futimesat(dfd, "testfile", tv)); + + EXPECT_OK(fchownat(dfd, "testfile", fs.st_uid, fs.st_gid, 0)); + EXPECT_OK(linkat(dfd, "testfile", dfd, "linky", 0)); + EXPECT_OK(symlinkat("testfile", dfd, "symlink")); + char buffer[256]; + EXPECT_OK(readlinkat(dfd, "symlink", buffer, sizeof(buffer))); + EXPECT_OK(unlinkat(dfd, "linky", 0)); + EXPECT_OK(unlinkat(dfd, "subdir", AT_REMOVEDIR)); + + // Check that invalid requests get a non-Capsicum errno. + errno = 0; + rc = readlinkat(-1, "symlink", buffer, sizeof(buffer)); + EXPECT_GE(0, rc); + EXPECT_NE(ECAPMODE, errno); + + exit(HasFailure()); + } + + // Wait for the child. + int status; + EXPECT_EQ(child, waitpid(child, &status, 0)); + rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1; + EXPECT_EQ(0, rc); + + // Tidy up. + close(dfd); + rmdir(TmpFile("cap_at_syscalls/subdir")); + unlink(TmpFile("cap_at_syscalls/symlink")); + unlink(TmpFile("cap_at_syscalls/linky")); + unlink(TmpFile("cap_at_syscalls/testfile")); + rmdir(TmpFile("cap_at_syscalls")); +} + +TEST(Capmode, AllowedAtSyscallsCwd) { + int rc = mkdir(TmpFile("cap_at_syscalls_cwd"), 0755); + EXPECT_OK(rc); + if (rc < 0 && errno != EEXIST) return; + int dfd = open(TmpFile("cap_at_syscalls_cwd"), O_RDONLY); + EXPECT_OK(dfd); + + int file = openat(dfd, "testfile", O_RDONLY|O_CREAT, 0644); + EXPECT_OK(file); + EXPECT_OK(close(file)); + + pid_t child = fork(); + if (child == 0) { + // Child: move into temp dir, enter cap mode and run tests + EXPECT_OK(fchdir(dfd)); + EXPECT_OK(cap_enter()); // Enter capability mode + + // Test that *at(AT_FDCWD, path,...) is policed with ECAPMODE. + EXPECT_CAPMODE(openat(AT_FDCWD, "testfile", O_RDONLY)); + struct stat fs; + EXPECT_CAPMODE(fstatat(AT_FDCWD, "testfile", &fs, 0)); + EXPECT_CAPMODE(mkdirat(AT_FDCWD, "subdir", 0600)); + EXPECT_CAPMODE(fchmodat(AT_FDCWD, "subdir", 0644, 0)); + EXPECT_CAPMODE(faccessat(AT_FDCWD, "subdir", F_OK, 0)); + EXPECT_CAPMODE(renameat(AT_FDCWD, "subdir", AT_FDCWD, "subdir2")); + EXPECT_CAPMODE(renameat(AT_FDCWD, "subdir2", AT_FDCWD, "subdir")); + struct timeval tv[2]; + struct timezone tz; + EXPECT_OK(gettimeofday(&tv[0], &tz)); + EXPECT_OK(gettimeofday(&tv[1], &tz)); + EXPECT_CAPMODE(futimesat(AT_FDCWD, "testfile", tv)); + + EXPECT_CAPMODE(fchownat(AT_FDCWD, "testfile", fs.st_uid, fs.st_gid, 0)); + EXPECT_CAPMODE(linkat(AT_FDCWD, "testfile", AT_FDCWD, "linky", 0)); + EXPECT_CAPMODE(symlinkat("testfile", AT_FDCWD, "symlink")); + char buffer[256]; + EXPECT_CAPMODE(readlinkat(AT_FDCWD, "symlink", buffer, sizeof(buffer))); + EXPECT_CAPMODE(unlinkat(AT_FDCWD, "linky", 0)); + + exit(HasFailure()); + } + + // Wait for the child. + int status; + EXPECT_EQ(child, waitpid(child, &status, 0)); + rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1; + EXPECT_EQ(0, rc); + + // Tidy up. + close(dfd); + rmdir(TmpFile("cap_at_syscalls_cwd/subdir")); + unlink(TmpFile("cap_at_syscalls_cwd/symlink")); + unlink(TmpFile("cap_at_syscalls_cwd/linky")); + unlink(TmpFile("cap_at_syscalls_cwd/testfile")); + rmdir(TmpFile("cap_at_syscalls_cwd")); +} + +TEST(Capmode, Abort) { + // Check that abort(3) works even in capability mode. + pid_t child = fork(); + if (child == 0) { + // Child: enter capability mode and call abort(3). + // Triggers something like kill(getpid(), SIGABRT). + cap_enter(); // Enter capability mode. + abort(); + exit(99); + } + int status; + EXPECT_EQ(child, waitpid(child, &status, 0)); + EXPECT_TRUE(WIFSIGNALED(status)) << " status = " << std::hex << status; + EXPECT_EQ(SIGABRT, WTERMSIG(status)) << " status = " << std::hex << status; +} + +FORK_TEST_F(WithFiles, AllowedMiscSyscalls) { + umask(022); + mode_t um_before = umask(022); + EXPECT_OK(cap_enter()); // Enter capability mode. + + mode_t um = umask(022); + EXPECT_NE(-ECAPMODE, (int)um); + EXPECT_EQ(um_before, um); + stack_t ss; + EXPECT_OK(sigaltstack(NULL, &ss)); + + // Finally, tests for system calls that don't fit the pattern very well. + pid_t pid = fork(); + EXPECT_OK(pid); + if (pid == 0) { + // Child: almost immediately exit. + sleep(1); + exit(0); + } else if (pid > 0) { + errno = 0; + EXPECT_CAPMODE(ptrace_(PTRACE_PEEKDATA_, pid, &pid, NULL)); + EXPECT_CAPMODE(waitpid(pid, NULL, 0)); + } + + // No error return from sync(2) to test, but check errno remains unset. + errno = 0; + sync(); + EXPECT_EQ(0, errno); + + // TODO(FreeBSD): ktrace + +#ifdef HAVE_SYSARCH + // sysarch() is, by definition, architecture-dependent +#if defined (__amd64__) || defined (__i386__) + long sysarch_arg = 0; + EXPECT_CAPMODE(sysarch(I386_SET_IOPERM, &sysarch_arg)); +#else + // TOOD(jra): write a test for other architectures, like arm +#endif +#endif +} + +void *thread_fn(void *p) { + int delay = *(int *)p; + sleep(delay); + EXPECT_OK(getpid_()); + EXPECT_CAPMODE(open("/dev/null", O_RDWR)); + return NULL; +} + +// Check that restrictions are the same in subprocesses and threads +FORK_TEST(Capmode, NewThread) { + // Fire off a new thread before entering capability mode + pthread_t early_thread; + int one = 1; // second + EXPECT_OK(pthread_create(&early_thread, NULL, thread_fn, &one)); + + // Fire off a new process before entering capability mode. + int early_child = fork(); + EXPECT_OK(early_child); + if (early_child == 0) { + // Child: wait and then confirm this process is unaffect by capability mode in the parent. + sleep(1); + int fd = open("/dev/null", O_RDWR); + EXPECT_OK(fd); + close(fd); + exit(0); + } + + EXPECT_OK(cap_enter()); // Enter capability mode. + // Do an allowed syscall. + EXPECT_OK(getpid_()); + int child = fork(); + EXPECT_OK(child); + if (child == 0) { + // Child: do an allowed and a disallowed syscall. + EXPECT_OK(getpid_()); + EXPECT_CAPMODE(open("/dev/null", O_RDWR)); + exit(0); + } + // Don't (can't) wait for either child. + + // Wait for the early-started thread. + EXPECT_OK(pthread_join(early_thread, NULL)); + + // Fire off a new thread. + pthread_t child_thread; + int zero = 0; // seconds + EXPECT_OK(pthread_create(&child_thread, NULL, thread_fn, &zero)); + EXPECT_OK(pthread_join(child_thread, NULL)); + + // Fork a subprocess which fires off a new thread. + child = fork(); + EXPECT_OK(child); + if (child == 0) { + pthread_t child_thread2; + EXPECT_OK(pthread_create(&child_thread2, NULL, thread_fn, &zero)); + EXPECT_OK(pthread_join(child_thread2, NULL)); + exit(0); + } + // Sleep for a bit to allow the subprocess to finish. + sleep(2); +} + +static int had_signal = 0; +static void handle_signal(int) { had_signal = 1; } + +FORK_TEST(Capmode, SelfKill) { + pid_t me = getpid(); + sighandler_t original = signal(SIGUSR1, handle_signal); + + pid_t child = fork(); + if (child == 0) { + // Child: sleep and exit + sleep(1); + exit(0); + } + + EXPECT_OK(cap_enter()); // Enter capability mode. + + // Can only kill(2) to own pid. + EXPECT_CAPMODE(kill(child, SIGUSR1)); + EXPECT_OK(kill(me, SIGUSR1)); + EXPECT_EQ(1, had_signal); + + signal(SIGUSR1, original); +} Index: head/contrib/capsicum-test/capsicum-freebsd.h =================================================================== --- head/contrib/capsicum-test/capsicum-freebsd.h +++ head/contrib/capsicum-test/capsicum-freebsd.h @@ -0,0 +1,73 @@ +#ifndef __CAPSICUM_FREEBSD_H__ +#define __CAPSICUM_FREEBSD_H__ +#ifdef __FreeBSD__ +/************************************************************ + * FreeBSD Capsicum Functionality. + ************************************************************/ + +#ifdef __cplusplus +extern "C" { +#endif + +/* FreeBSD definitions. */ +#include +#include +#if __FreeBSD_version >= 1100014 || \ + (__FreeBSD_version >= 1001511 && __FreeBSD_version < 1100000) +#include +#else +#include +#endif +#include + +#if __FreeBSD_version >= 1000000 +#define AT_SYSCALLS_IN_CAPMODE +#define HAVE_CAP_RIGHTS_GET +#define HAVE_CAP_RIGHTS_LIMIT +#define HAVE_PROCDESC_FSTAT +#define HAVE_CAP_FCNTLS_LIMIT +// fcntl(2) takes int, cap_fcntls_limit(2) takes uint32_t. +typedef uint32_t cap_fcntl_t; +#define HAVE_CAP_IOCTLS_LIMIT +// ioctl(2) and cap_ioctls_limit(2) take unsigned long. +typedef unsigned long cap_ioctl_t; + +#if __FreeBSD_version >= 1101000 +#define HAVE_OPENAT_INTERMEDIATE_DOTDOT +#endif + +#endif + +#ifdef __cplusplus +} +#endif + +// Use fexecve_() in tests to allow Linux variant to bypass glibc version. +#define fexecve_(F, A, E) fexecve(F, A, E) + +#ifdef ENOTBENEATH +#define E_NO_TRAVERSE_CAPABILITY ENOTBENEATH +#define E_NO_TRAVERSE_O_BENEATH ENOTBENEATH +#else +#define E_NO_TRAVERSE_CAPABILITY ENOTCAPABLE +#define E_NO_TRAVERSE_O_BENEATH ENOTCAPABLE +#endif + +// FreeBSD limits the number of ioctls in cap_ioctls_limit to 256 +#define CAP_IOCTLS_LIMIT_MAX 256 + +// Too many links +#define E_TOO_MANY_LINKS EMLINK + +// TODO(FreeBSD): uncomment if/when FreeBSD propagates rights on accept. +// FreeBSD does not generate a capability from accept(cap_fd,...). +// https://bugs.freebsd.org/201052 +// #define CAP_FROM_ACCEPT +// TODO(FreeBSD): uncomment if/when FreeBSD propagates rights on sctp_peeloff. +// FreeBSD does not generate a capability from sctp_peeloff(cap_fd,...). +// https://bugs.freebsd.org/201052 +// #define CAP_FROM_PEELOFF + +#endif /* __FreeBSD__ */ + +#endif /*__CAPSICUM_FREEBSD_H__*/ Index: head/contrib/capsicum-test/capsicum-linux.h =================================================================== --- head/contrib/capsicum-test/capsicum-linux.h +++ head/contrib/capsicum-test/capsicum-linux.h @@ -0,0 +1,40 @@ +#ifndef __CAPSICUM_LINUX_H__ +#define __CAPSICUM_LINUX_H__ + +#ifdef __linux__ +/************************************************************ + * Linux Capsicum Functionality. + ************************************************************/ +#include +#include +#include + +#define HAVE_CAP_RIGHTS_LIMIT +#define HAVE_CAP_RIGHTS_GET +#define HAVE_CAP_FCNTLS_LIMIT +#define HAVE_CAP_IOCTLS_LIMIT +#define HAVE_PROC_FDINFO +#define HAVE_PDWAIT4 +#define CAP_FROM_ACCEPT +// TODO(drysdale): uncomment if/when Linux propagates rights on sctp_peeloff. +// Linux does not generate a capability from sctp_peeloff(cap_fd,...). +// #define CAP_FROM_PEELOFF +// TODO(drysdale): uncomment if/when Linux allows intermediate .. path segments +// for openat()-like operations. +// #define HAVE_OPENAT_INTERMEDIATE_DOTDOT + +// Failure to open file due to path traversal generates EPERM +#ifdef ENOTBENEATH +#define E_NO_TRAVERSE_CAPABILITY ENOTBENEATH +#define E_NO_TRAVERSE_O_BENEATH ENOTBENEATH +#else +#define E_NO_TRAVERSE_CAPABILITY EPERM +#define E_NO_TRAVERSE_O_BENEATH EPERM +#endif + +// Too many links +#define E_TOO_MANY_LINKS ELOOP + +#endif /* __linux__ */ + +#endif /*__CAPSICUM_LINUX_H__*/ Index: head/contrib/capsicum-test/capsicum-rights.h =================================================================== --- head/contrib/capsicum-test/capsicum-rights.h +++ head/contrib/capsicum-test/capsicum-rights.h @@ -0,0 +1,118 @@ +#ifndef __CAPSICUM_RIGHTS_H__ +#define __CAPSICUM_RIGHTS_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef __FreeBSD__ +#include +#if __FreeBSD_version >= 1100014 || \ + (__FreeBSD_version >= 1001511 && __FreeBSD_version < 1100000) +#include +#else +#include +#endif +#endif + +#ifdef __linux__ +#include +#endif + +#ifdef __cplusplus +} +#endif + +#ifndef CAP_RIGHTS_VERSION +/************************************************************ + * Capsicum compatibility layer: implement new (FreeBSD10.x) + * rights manipulation API in terms of original (FreeBSD9.x) + * functionality. + ************************************************************/ +#include +#include + +/* Rights manipulation macros/functions. + * Note that these use variadic macros, available in C99 / C++11 (and + * also in earlier gcc versions). + */ +#define cap_rights_init(rights, ...) _cap_rights_init((rights), __VA_ARGS__, 0ULL) +#define cap_rights_set(rights, ...) _cap_rights_set((rights), __VA_ARGS__, 0ULL) +#define cap_rights_clear(rights, ...) _cap_rights_clear((rights), __VA_ARGS__, 0ULL) +#define cap_rights_is_set(rights, ...) _cap_rights_is_set((rights), __VA_ARGS__, 0ULL) + +inline cap_rights_t* _cap_rights_init(cap_rights_t *rights, ...) { + va_list ap; + cap_rights_t right; + *rights = 0; + va_start(ap, rights); + while (true) { + right = va_arg(ap, cap_rights_t); + *rights |= right; + if (right == 0) break; + } + va_end(ap); + return rights; +} + +inline cap_rights_t* _cap_rights_set(cap_rights_t *rights, ...) { + va_list ap; + cap_rights_t right; + va_start(ap, rights); + while (true) { + right = va_arg(ap, cap_rights_t); + *rights |= right; + if (right == 0) break; + } + va_end(ap); + return rights; +} + +inline cap_rights_t* _cap_rights_clear(cap_rights_t *rights, ...) { + va_list ap; + cap_rights_t right; + va_start(ap, rights); + while (true) { + right = va_arg(ap, cap_rights_t); + *rights &= ~right; + if (right == 0) break; + } + va_end(ap); + return rights; +} + +inline bool _cap_rights_is_set(const cap_rights_t *rights, ...) { + va_list ap; + cap_rights_t right; + cap_rights_t accumulated = 0; + va_start(ap, rights); + while (true) { + right = va_arg(ap, cap_rights_t); + accumulated |= right; + if (right == 0) break; + } + va_end(ap); + return (accumulated & *rights) == accumulated; +} + +inline bool _cap_rights_is_valid(const cap_rights_t *rights) { + return true; +} + +inline cap_rights_t* cap_rights_merge(cap_rights_t *dst, const cap_rights_t *src) { + *dst |= *src; + return dst; +} + +inline cap_rights_t* cap_rights_remove(cap_rights_t *dst, const cap_rights_t *src) { + *dst &= ~(*src); + return dst; +} + +inline bool cap_rights_contains(const cap_rights_t *big, const cap_rights_t *little) { + return ((*big) & (*little)) == (*little); +} + +#endif /* old/new style rights manipulation */ + +#endif /*__CAPSICUM_RIGHTS_H__*/ Index: head/contrib/capsicum-test/capsicum-test-main.cc =================================================================== --- head/contrib/capsicum-test/capsicum-test-main.cc +++ head/contrib/capsicum-test/capsicum-test-main.cc @@ -0,0 +1,156 @@ +#include +#ifdef __linux__ +#include +#include +#elif defined(__FreeBSD__) +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include "gtest/gtest.h" +#include "capsicum-test.h" + +// For versions of googletest that lack GTEST_SKIP. +#ifndef GTEST_SKIP +#define GTEST_SKIP GTEST_FAIL +#endif + +std::string tmpdir; + +class SetupEnvironment : public ::testing::Environment +{ +public: + SetupEnvironment() : teardown_tmpdir_(false) {} + void SetUp() override { + CheckCapsicumSupport(); + if (tmpdir.empty()) { + std::cerr << "Generating temporary directory root: "; + CreateTemporaryRoot(); + } else { + std::cerr << "User provided temporary directory root: "; + } + std::cerr << tmpdir << std::endl; + } + void CheckCapsicumSupport() { +#ifdef __FreeBSD__ + int rc; + bool trap_enotcap_enabled; + size_t trap_enotcap_enabled_len = sizeof(trap_enotcap_enabled); + + if (feature_present("security_capabilities") == 0) { + GTEST_SKIP() << "Skipping tests because capsicum support is not " + << "enabled in the kernel."; + } + // If this OID is enabled, it will send SIGTRAP to the process when + // `ENOTCAPABLE` is returned. + const char *oid = "kern.trap_enotcap"; + rc = sysctlbyname(oid, &trap_enotcap_enabled, &trap_enotcap_enabled_len, + nullptr, 0); + if (rc != 0) { + GTEST_FAIL() << "sysctlbyname failed: " << strerror(errno); + } + if (trap_enotcap_enabled) { + GTEST_SKIP() << "Debug sysctl, " << oid << ", enabled. " + << "Skipping tests because its enablement invalidates the " + << "test results."; + } +#endif /* FreeBSD */ + } + void CreateTemporaryRoot() { + char *tmpdir_name = tempnam(nullptr, "cptst"); + + ASSERT_NE(tmpdir_name, nullptr); + ASSERT_EQ(mkdir(tmpdir_name, 0700), 0) << + "Could not create temp directory, " << tmpdir_name << ": " << + strerror(errno); + tmpdir = std::string(tmpdir_name); + free(tmpdir_name); + teardown_tmpdir_ = true; + } + void TearDown() override { + if (teardown_tmpdir_) { + rmdir(tmpdir.c_str()); + } + } +private: + bool teardown_tmpdir_; +}; + +std::string capsicum_test_bindir; + +int main(int argc, char* argv[]) { + // Set up the test program path, so capsicum-test can find programs, like + // mini-me* when executed from an absolute path. + { + char *new_path, *old_path, *program_name; + + program_name = strdup(argv[0]); + assert(program_name); + capsicum_test_bindir = std::string(dirname(program_name)); + free(program_name); + + old_path = getenv("PATH"); + assert(old_path); + + assert(asprintf(&new_path, "%s:%s", capsicum_test_bindir.c_str(), + old_path) > 0); + assert(setenv("PATH", new_path, 1) == 0); + } + + ::testing::InitGoogleTest(&argc, argv); + for (int ii = 1; ii < argc; ii++) { + if (strcmp(argv[ii], "-v") == 0) { + verbose = true; + } else if (strcmp(argv[ii], "-T") == 0) { + ii++; + assert(ii < argc); + tmpdir = argv[ii]; + struct stat info; + stat(tmpdir.c_str(), &info); + assert(S_ISDIR(info.st_mode)); + } else if (strcmp(argv[ii], "-t") == 0) { + force_mt = true; + } else if (strcmp(argv[ii], "-F") == 0) { + force_nofork = true; + } else if (strcmp(argv[ii], "-u") == 0) { + if (++ii >= argc) { + std::cerr << "-u needs argument" << std::endl; + exit(1); + } + if (isdigit(argv[ii][0])) { + other_uid = atoi(argv[ii]); + } else { + struct passwd *p = getpwnam(argv[ii]); + if (!p) { + std::cerr << "Failed to get entry for " << argv[ii] << ", errno=" << errno << std::endl; + exit(1); + } + other_uid = p->pw_uid; + } + } + } + if (other_uid == 0) { + struct stat info; + if (stat(argv[0], &info) == 0) { + other_uid = info.st_uid; + } + } + +#ifdef __linux__ + // Check whether our temporary directory is on a tmpfs volume. + struct statfs fsinfo; + statfs(tmpdir.c_str(), &fsinfo); + tmpdir_on_tmpfs = (fsinfo.f_type == TMPFS_MAGIC); +#endif + + testing::AddGlobalTestEnvironment(new SetupEnvironment()); + int rc = RUN_ALL_TESTS(); + ShowSkippedTests(std::cerr); + return rc; +} Index: head/contrib/capsicum-test/capsicum-test.h =================================================================== --- head/contrib/capsicum-test/capsicum-test.h +++ head/contrib/capsicum-test/capsicum-test.h @@ -0,0 +1,260 @@ +/* -*- C++ -*- */ +#ifndef CAPSICUM_TEST_H +#define CAPSICUM_TEST_H + +#include +#include +#include +#include +#include + +#include +#include + +#include "gtest/gtest.h" + +extern bool verbose; +extern std::string tmpdir; +extern bool tmpdir_on_tmpfs; +extern bool force_mt; +extern bool force_nofork; +extern uid_t other_uid; + +static inline void *WaitingThreadFn(void *) { + // Loop until cancelled + while (true) { + usleep(10000); + pthread_testcancel(); + } + return NULL; +} + +// If force_mt is set, run another thread in parallel with the test. This forces +// the kernel into multi-threaded mode. +template +void MaybeRunWithThread(T *self, Function fn) { + pthread_t subthread; + if (force_mt) { + pthread_create(&subthread, NULL, WaitingThreadFn, NULL); + } + (self->*fn)(); + if (force_mt) { + pthread_cancel(subthread); + pthread_join(subthread, NULL); + } +} +template +void MaybeRunWithThread(Function fn) { + pthread_t subthread; + if (force_mt) { + pthread_create(&subthread, NULL, WaitingThreadFn, NULL); + } + (fn)(); + if (force_mt) { + pthread_cancel(subthread); + pthread_join(subthread, NULL); + } +} + +// Return the absolute path of a filename in the temp directory, `tmpdir`, +// with the given pathname, e.g., "/tmp/", if `tmpdir` was set to +// "/tmp". +const char *TmpFile(const char *pathname); + +// Run the given test function in a forked process, so that trapdoor +// entry doesn't affect other tests, and watch out for hung processes. +// Implemented as a macro to allow access to the test case instance's +// HasFailure() method, which is reported as the forked process's +// exit status. +#define _RUN_FORKED(INNERCODE, TESTCASENAME, TESTNAME) \ + pid_t pid = force_nofork ? 0 : fork(); \ + if (pid == 0) { \ + INNERCODE; \ + if (!force_nofork) { \ + exit(HasFailure()); \ + } \ + } else if (pid > 0) { \ + int rc, status; \ + int remaining_us = 10000000; \ + while (remaining_us > 0) { \ + status = 0; \ + rc = waitpid(pid, &status, WNOHANG); \ + if (rc != 0) break; \ + remaining_us -= 10000; \ + usleep(10000); \ + } \ + if (remaining_us <= 0) { \ + fprintf(stderr, "Warning: killing unresponsive test " \ + "%s.%s (pid %d)\n", \ + TESTCASENAME, TESTNAME, pid); \ + kill(pid, SIGKILL); \ + ADD_FAILURE() << "Test hung"; \ + } else if (rc < 0) { \ + fprintf(stderr, "Warning: waitpid error %s (%d)\n", \ + strerror(errno), errno); \ + ADD_FAILURE() << "Failed to wait for child"; \ + } else { \ + int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1; \ + EXPECT_EQ(0, rc); \ + } \ + } +#define _RUN_FORKED_MEM(THIS, TESTFN, TESTCASENAME, TESTNAME) \ + _RUN_FORKED(MaybeRunWithThread(THIS, &TESTFN), TESTCASENAME, TESTNAME); +#define _RUN_FORKED_FN(TESTFN, TESTCASENAME, TESTNAME) \ + _RUN_FORKED(MaybeRunWithThread(&TESTFN), TESTCASENAME, TESTNAME); + +// Run a test case in a forked process, possibly cleaning up a +// test file after completion +#define FORK_TEST_ON(test_case_name, test_name, test_file) \ + static void test_case_name##_##test_name##_ForkTest(); \ + TEST(test_case_name, test_name ## Forked) { \ + _RUN_FORKED_FN(test_case_name##_##test_name##_ForkTest, \ + #test_case_name, #test_name); \ + const char *filename = test_file; \ + if (filename) unlink(filename); \ + } \ + static void test_case_name##_##test_name##_ForkTest() + +#define FORK_TEST(test_case_name, test_name) FORK_TEST_ON(test_case_name, test_name, NULL) + +// Run a test case fixture in a forked process, so that trapdoors don't +// affect other tests. +#define ICLASS_NAME(test_case_name, test_name) Forked##test_case_name##_##test_name +#define FORK_TEST_F(test_case_name, test_name) \ + class ICLASS_NAME(test_case_name, test_name) : public test_case_name { \ + public: \ + ICLASS_NAME(test_case_name, test_name)() {} \ + void InnerTestBody(); \ + }; \ + TEST_F(ICLASS_NAME(test_case_name, test_name), _) { \ + _RUN_FORKED_MEM(this, \ + ICLASS_NAME(test_case_name, test_name)::InnerTestBody, \ + #test_case_name, #test_name); \ + } \ + void ICLASS_NAME(test_case_name, test_name)::InnerTestBody() + +// Emit errno information on failure +#define EXPECT_OK(v) EXPECT_LE(0, v) << " errno " << errno << " " << strerror(errno) + +// Expect a syscall to fail with the given error. +#define EXPECT_SYSCALL_FAIL(E, C) \ + do { \ + EXPECT_GT(0, C); \ + EXPECT_EQ(E, errno); \ + } while (0) + +// Expect a syscall to fail with anything other than the given error. +#define EXPECT_SYSCALL_FAIL_NOT(E, C) \ + do { \ + EXPECT_GT(0, C); \ + EXPECT_NE(E, errno); \ + } while (0) + +// Expect a void syscall to fail with anything other than the given error. +#define EXPECT_VOID_SYSCALL_FAIL_NOT(E, C) \ + do { \ + errno = 0; \ + C; \ + EXPECT_NE(E, errno) << #C << " failed with ECAPMODE"; \ + } while (0) + +// Expect a system call to fail due to path traversal; exact error +// code is OS-specific. +#ifdef O_BENEATH +#define EXPECT_OPENAT_FAIL_TRAVERSAL(fd, path, flags) \ + do { \ + const int result = openat((fd), (path), (flags)); \ + if (((flags) & O_BENEATH) == O_BENEATH) { \ + EXPECT_SYSCALL_FAIL(E_NO_TRAVERSE_O_BENEATH, result); \ + } else { \ + EXPECT_SYSCALL_FAIL(E_NO_TRAVERSE_CAPABILITY, result); \ + } \ + } while (0) +#else +#define EXPECT_OPENAT_FAIL_TRAVERSAL(fd, path, flags) \ + do { \ + const int result = openat((fd), (path), (flags)); \ + EXPECT_SYSCALL_FAIL(E_NO_TRAVERSE_CAPABILITY, result); \ + } while (0) +#endif + +// Expect a system call to fail with ECAPMODE. +#define EXPECT_CAPMODE(C) EXPECT_SYSCALL_FAIL(ECAPMODE, C) + +// Expect a system call to fail, but not with ECAPMODE. +#define EXPECT_FAIL_NOT_CAPMODE(C) EXPECT_SYSCALL_FAIL_NOT(ECAPMODE, C) +#define EXPECT_FAIL_VOID_NOT_CAPMODE(C) EXPECT_VOID_SYSCALL_FAIL_NOT(ECAPMODE, C) + +// Expect a system call to fail with ENOTCAPABLE. +#define EXPECT_NOTCAPABLE(C) EXPECT_SYSCALL_FAIL(ENOTCAPABLE, C) + +// Expect a system call to fail, but not with ENOTCAPABLE. +#define EXPECT_FAIL_NOT_NOTCAPABLE(C) EXPECT_SYSCALL_FAIL_NOT(ENOTCAPABLE, C) + +// Expect a system call to fail with either ENOTCAPABLE or ECAPMODE. +#define EXPECT_CAPFAIL(C) \ + do { \ + int rc = C; \ + EXPECT_GT(0, rc); \ + EXPECT_TRUE(errno == ECAPMODE || errno == ENOTCAPABLE) \ + << #C << " did not fail with ECAPMODE/ENOTCAPABLE but " << errno; \ + } while (0) + +// Ensure that 'rights' are a subset of 'max'. +#define EXPECT_RIGHTS_IN(rights, max) \ + EXPECT_TRUE(cap_rights_contains((max), (rights))) \ + << "rights " << std::hex << *(rights) \ + << " not a subset of " << std::hex << *(max) + +// Ensure rights are identical +#define EXPECT_RIGHTS_EQ(a, b) \ + do { \ + EXPECT_RIGHTS_IN((a), (b)); \ + EXPECT_RIGHTS_IN((b), (a)); \ + } while (0) + +// Get the state of a process as a single character. +// - 'D': disk wait +// - 'R': runnable +// - 'S': sleeping/idle +// - 'T': stopped +// - 'Z': zombie +// On error, return either '?' or '\0'. +char ProcessState(int pid); + +// Check process state reaches a particular expected state (or two). +// Retries a few times to allow for timing issues. +#define EXPECT_PID_REACHES_STATES(pid, expected1, expected2) { \ + int counter = 5; \ + char state; \ + do { \ + state = ProcessState(pid); \ + if (state == expected1 || state == expected2) break; \ + usleep(100000); \ + } while (--counter > 0); \ + EXPECT_TRUE(state == expected1 || state == expected2) \ + << " pid " << pid << " in state " << state; \ +} + +#define EXPECT_PID_ALIVE(pid) EXPECT_PID_REACHES_STATES(pid, 'R', 'S') +#define EXPECT_PID_DEAD(pid) EXPECT_PID_REACHES_STATES(pid, 'Z', '\0') +#define EXPECT_PID_ZOMBIE(pid) EXPECT_PID_REACHES_STATES(pid, 'Z', 'Z'); +#define EXPECT_PID_GONE(pid) EXPECT_PID_REACHES_STATES(pid, '\0', '\0'); + +void ShowSkippedTests(std::ostream& os); +void TestSkipped(const char *testcase, const char *test, const std::string& reason); +#define TEST_SKIPPED(reason) \ + do { \ + const ::testing::TestInfo* const info = ::testing::UnitTest::GetInstance()->current_test_info(); \ + std::cerr << "Skipping " << info->test_case_name() << "::" << info->name() << " because: " << reason << std::endl; \ + TestSkipped(info->test_case_name(), info->name(), reason); \ + } while (0) + +// Mark a test that can only be run as root. +#define REQUIRE_ROOT() \ + if (getuid() != 0) { \ + TEST_SKIPPED("requires root"); \ + return; \ + } + +#endif // CAPSICUM_TEST_H Index: head/contrib/capsicum-test/capsicum-test.cc =================================================================== --- head/contrib/capsicum-test/capsicum-test.cc +++ head/contrib/capsicum-test/capsicum-test.cc @@ -0,0 +1,102 @@ +#include "capsicum-test.h" + +#include +#include +#include + +#include +#include +#include + +bool verbose = false; +bool tmpdir_on_tmpfs = false; +bool force_mt = false; +bool force_nofork = false; +uid_t other_uid = 0; + +namespace { +std::map tmp_paths; +} + +const char *TmpFile(const char *p) { + std::string pathname(p); + if (tmp_paths.find(pathname) == tmp_paths.end()) { + std::string fullname = tmpdir + "/" + pathname; + tmp_paths[pathname] = fullname; + } + return tmp_paths[pathname].c_str(); +} + +char ProcessState(int pid) { +#ifdef __linux__ + // Open the process status file. + char s[1024]; + snprintf(s, sizeof(s), "/proc/%d/status", pid); + FILE *f = fopen(s, "r"); + if (f == NULL) return '\0'; + + // Read the file line by line looking for the state line. + const char *prompt = "State:\t"; + while (!feof(f)) { + fgets(s, sizeof(s), f); + if (!strncmp(s, prompt, strlen(prompt))) { + fclose(f); + return s[strlen(prompt)]; + } + } + fclose(f); + return '?'; +#endif +#ifdef __FreeBSD__ + char buffer[1024]; + snprintf(buffer, sizeof(buffer), "ps -p %d -o state | grep -v STAT", pid); + sig_t original = signal(SIGCHLD, SIG_IGN); + FILE* cmd = popen(buffer, "r"); + usleep(50000); // allow any pending SIGCHLD signals to arrive + signal(SIGCHLD, original); + int result = fgetc(cmd); + fclose(cmd); + // Map FreeBSD codes to Linux codes. + switch (result) { + case EOF: + return '\0'; + case 'D': // disk wait + case 'R': // runnable + case 'S': // sleeping + case 'T': // stopped + case 'Z': // zombie + return result; + case 'W': // idle interrupt thread + return 'S'; + case 'I': // idle + return 'S'; + case 'L': // waiting to acquire lock + default: + return '?'; + } +#endif +} + +typedef std::vector TestList; +typedef std::map SkippedTestMap; +static SkippedTestMap skipped_tests; +void TestSkipped(const char *testcase, const char *test, const std::string& reason) { + if (skipped_tests.find(reason) == skipped_tests.end()) { + skipped_tests[reason] = new TestList; + } + std::string testname(testcase); + testname += "."; + testname += test; + skipped_tests[reason]->push_back(testname); +} + +void ShowSkippedTests(std::ostream& os) { + for (SkippedTestMap::iterator skiplist = skipped_tests.begin(); + skiplist != skipped_tests.end(); ++skiplist) { + os << "Following tests were skipped because: " << skiplist->first << std::endl; + for (size_t ii = 0; ii < skiplist->second->size(); ++ii) { + const std::string& testname((*skiplist->second)[ii]); + os << " " << testname << std::endl; + } + } +} Index: head/contrib/capsicum-test/capsicum.h =================================================================== --- head/contrib/capsicum-test/capsicum.h +++ head/contrib/capsicum-test/capsicum.h @@ -0,0 +1,175 @@ +/* + * Minimal portability layer for Capsicum-related features. + */ +#ifndef __CAPSICUM_H__ +#define __CAPSICUM_H__ + +#ifdef __FreeBSD__ +#include "capsicum-freebsd.h" +#endif + +#ifdef __linux__ +#include "capsicum-linux.h" +#endif + +/* + * CAP_ALL/CAP_NONE is a value in FreeBSD9.x Capsicum, but a functional macro + * in FreeBSD10.x Capsicum. Always use CAP_SET_ALL/CAP_SET_NONE instead. + */ +#ifndef CAP_SET_ALL +#ifdef CAP_RIGHTS_VERSION +#define CAP_SET_ALL(rights) CAP_ALL(rights) +#else +#define CAP_SET_ALL(rights) *(rights) = CAP_MASK_VALID +#endif +#endif + +#ifndef CAP_SET_NONE +#ifdef CAP_RIGHTS_VERSION +#define CAP_SET_NONE(rights) CAP_NONE(rights) +#else +#define CAP_SET_NONE(rights) *(rights) = 0 +#endif +#endif + + +/************************************************************ + * Define new-style rights in terms of old-style rights if + * absent. + ************************************************************/ +#include "capsicum-rights.h" + +/* + * Cope with systems (e.g. FreeBSD 10.x) where CAP_RENAMEAT hasn't been split out. + * (src, dest): RENAMEAT, LINKAT => RENAMEAT_SOURCE, RENAMEAT_TARGET + */ +#ifndef CAP_RENAMEAT_SOURCE +#define CAP_RENAMEAT_SOURCE CAP_RENAMEAT +#endif +#ifndef CAP_RENAMEAT_TARGET +#define CAP_RENAMEAT_TARGET CAP_LINKAT +#endif +/* + * Cope with systems (e.g. FreeBSD 10.x) where CAP_RENAMEAT hasn't been split out. + * (src, dest): 0, LINKAT => LINKAT_SOURCE, LINKAT_TARGET + */ +#ifndef CAP_LINKAT_SOURCE +#define CAP_LINKAT_SOURCE CAP_LOOKUP +#endif +#ifndef CAP_LINKAT_TARGET +#define CAP_LINKAT_TARGET CAP_LINKAT +#endif + +#ifdef CAP_PREAD +/* Existence of CAP_PREAD implies new-style CAP_SEEK semantics */ +#define CAP_SEEK_ASWAS 0 +#else +/* Old-style CAP_SEEK semantics */ +#define CAP_SEEK_ASWAS CAP_SEEK +#define CAP_PREAD CAP_READ +#define CAP_PWRITE CAP_WRITE +#endif + +#ifndef CAP_MMAP_R +#define CAP_MMAP_R (CAP_READ|CAP_MMAP) +#define CAP_MMAP_W (CAP_WRITE|CAP_MMAP) +#define CAP_MMAP_X (CAP_MAPEXEC|CAP_MMAP) +#define CAP_MMAP_RW (CAP_MMAP_R|CAP_MMAP_W) +#define CAP_MMAP_RX (CAP_MMAP_R|CAP_MMAP_X) +#define CAP_MMAP_WX (CAP_MMAP_W|CAP_MMAP_X) +#define CAP_MMAP_RWX (CAP_MMAP_R|CAP_MMAP_W|CAP_MMAP_X) +#endif + +#ifndef CAP_MKFIFOAT +#define CAP_MKFIFOAT CAP_MKFIFO +#endif + +#ifndef CAP_MKNODAT +#define CAP_MKNODAT CAP_MKFIFOAT +#endif + +#ifndef CAP_MKDIRAT +#define CAP_MKDIRAT CAP_MKDIR +#endif + +#ifndef CAP_UNLINKAT +#define CAP_UNLINKAT CAP_RMDIR +#endif + +#ifndef CAP_SOCK_CLIENT +#define CAP_SOCK_CLIENT \ + (CAP_CONNECT | CAP_GETPEERNAME | CAP_GETSOCKNAME | CAP_GETSOCKOPT | \ + CAP_PEELOFF | CAP_READ | CAP_WRITE | CAP_SETSOCKOPT | CAP_SHUTDOWN) +#endif + +#ifndef CAP_SOCK_SERVER +#define CAP_SOCK_SERVER \ + (CAP_ACCEPT | CAP_BIND | CAP_GETPEERNAME | CAP_GETSOCKNAME | \ + CAP_GETSOCKOPT | CAP_LISTEN | CAP_PEELOFF | CAP_READ | CAP_WRITE | \ + CAP_SETSOCKOPT | CAP_SHUTDOWN) +#endif + +#ifndef CAP_EVENT +#define CAP_EVENT CAP_POLL_EVENT +#endif + +/************************************************************ + * Define new-style API functions in terms of old-style API + * functions if absent. + ************************************************************/ +#ifndef HAVE_CAP_RIGHTS_GET +/* Define cap_rights_get() in terms of old-style cap_getrights() */ +inline int cap_rights_get(int fd, cap_rights_t *rights) { + return cap_getrights(fd, rights); +} +#endif + +#ifndef HAVE_CAP_RIGHTS_LIMIT +/* Define cap_rights_limit() in terms of old-style cap_new() and dup2() */ +#include +inline int cap_rights_limit(int fd, const cap_rights_t *rights) { + int cap = cap_new(fd, *rights); + if (cap < 0) return cap; + int rc = dup2(cap, fd); + if (rc < 0) return rc; + close(cap); + return rc; +} +#endif + +#include +#ifdef CAP_RIGHTS_VERSION +/* New-style Capsicum API extras for debugging */ +static inline void cap_rights_describe(const cap_rights_t *rights, char *buffer) { + int ii; + for (ii = 0; ii < (CAP_RIGHTS_VERSION+2); ii++) { + int len = sprintf(buffer, "0x%016llx ", (unsigned long long)rights->cr_rights[ii]); + buffer += len; + } +} + +#ifdef __cplusplus +#include +#include +inline std::ostream& operator<<(std::ostream& os, cap_rights_t rights) { + for (int ii = 0; ii < (CAP_RIGHTS_VERSION+2); ii++) { + os << std::hex << std::setw(16) << std::setfill('0') << (unsigned long long)rights.cr_rights[ii] << " "; + } + return os; +} +#endif + +#else + +static inline void cap_rights_describe(const cap_rights_t *rights, char *buffer) { + sprintf(buffer, "0x%016llx", (*rights)); +} + +#endif /* new/old style rights manipulation */ + +#ifdef __cplusplus +#include +extern std::string capsicum_test_bindir; +#endif + +#endif /*__CAPSICUM_H__*/ Index: head/contrib/capsicum-test/fcntl.cc =================================================================== --- head/contrib/capsicum-test/fcntl.cc +++ head/contrib/capsicum-test/fcntl.cc @@ -0,0 +1,411 @@ +// Test that fcntl works in capability mode. +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "capsicum.h" +#include "capsicum-test.h" +#include "syscalls.h" + +// Ensure that fcntl() works consistently for both regular file descriptors and +// capability-wrapped ones. +FORK_TEST(Fcntl, Basic) { + cap_rights_t rights; + cap_rights_init(&rights, CAP_READ, CAP_FCNTL); + + typedef std::map FileMap; + + // Open some files of different types, and wrap them in capabilities. + FileMap files; + files["file"] = open("/etc/passwd", O_RDONLY); + EXPECT_OK(files["file"]); + files["socket"] = socket(PF_LOCAL, SOCK_STREAM, 0); + EXPECT_OK(files["socket"]); + char shm_name[128]; + sprintf(shm_name, "/capsicum-test-%d", getuid()); + files["SHM"] = shm_open(shm_name, (O_CREAT|O_RDWR), 0600); + if ((files["SHM"] == -1) && errno == ENOSYS) { + // shm_open() is not implemented in user-mode Linux. + files.erase("SHM"); + } else { + EXPECT_OK(files["SHM"]); + } + + FileMap caps; + for (FileMap::iterator ii = files.begin(); ii != files.end(); ++ii) { + std::string key = ii->first + " cap"; + caps[key] = dup(ii->second); + EXPECT_OK(cap_rights_limit(caps[key], &rights)); + EXPECT_OK(caps[key]) << " on " << ii->first; + } + + FileMap all(files); + all.insert(files.begin(), files.end()); + + EXPECT_OK(cap_enter()); // Enter capability mode. + + // Ensure that we can fcntl() all the files that we opened above. + cap_rights_t r_ro; + cap_rights_init(&r_ro, CAP_READ); + for (FileMap::iterator ii = all.begin(); ii != all.end(); ++ii) { + EXPECT_OK(fcntl(ii->second, F_GETFL, 0)) << " on " << ii->first; + int cap = dup(ii->second); + EXPECT_OK(cap) << " on " << ii->first; + EXPECT_OK(cap_rights_limit(cap, &r_ro)) << " on " << ii->first; + EXPECT_EQ(-1, fcntl(cap, F_GETFL, 0)) << " on " << ii->first; + EXPECT_EQ(ENOTCAPABLE, errno) << " on " << ii->first; + close(cap); + } + for (FileMap::iterator ii = all.begin(); ii != all.end(); ++ii) { + close(ii->second); + } + shm_unlink(shm_name); +} + +// Supported fcntl(2) operations: +// FreeBSD10 FreeBSD9.1: Linux: Rights: Summary: +// F_DUPFD F_DUPFD F_DUPFD NONE as dup(2) +// F_DUPFD_CLOEXEC F_DUPFD_CLOEXEC NONE as dup(2) with close-on-exec +// F_DUP2FD F_DUP2FD NONE as dup2(2) +// F_DUP2FD_CLOEXEC NONE as dup2(2) with close-on-exec +// F_GETFD F_GETFD F_GETFD NONE get close-on-exec flag +// F_SETFD F_SETFD F_SETFD NONE set close-on-exec flag +// * F_GETFL F_GETFL F_GETFL FCNTL get file status flag +// * F_SETFL F_SETFL F_SETFL FCNTL set file status flag +// * F_GETOWN F_GETOWN F_GETOWN FCNTL get pid receiving SIGIO/SIGURG +// * F_SETOWN F_SETOWN F_SETOWN FCNTL set pid receiving SIGIO/SIGURG +// * F_GETOWN_EX FCNTL get pid/thread receiving SIGIO/SIGURG +// * F_SETOWN_EX FCNTL set pid/thread receiving SIGIO/SIGURG +// F_GETLK F_GETLK F_GETLK FLOCK get lock info +// F_SETLK F_SETLK F_SETLK FLOCK set lock info +// F_SETLK_REMOTE FLOCK set lock info +// F_SETLKW F_SETLKW F_SETLKW FLOCK set lock info (blocking) +// F_READAHEAD F_READAHEAD NONE set or clear readahead amount +// F_RDAHEAD F_RDAHEAD NONE set or clear readahead amount to 128KB +// F_GETSIG POLL_EVENT+FSIGNAL get signal sent when I/O possible +// F_SETSIG POLL_EVENT+FSIGNAL set signal sent when I/O possible +// F_GETLEASE FLOCK+FSIGNAL get lease on file descriptor +// F_SETLEASE FLOCK+FSIGNAL set new lease on file descriptor +// F_NOTIFY NOTIFY generate signal on changes (dnotify) +// F_GETPIPE_SZ GETSOCKOPT get pipe size +// F_SETPIPE_SZ SETSOCKOPT set pipe size +// F_GET_SEAL FSTAT get memfd seals +// F_ADD_SEAL FCHMOD set memfd seal +// If HAVE_CAP_FCNTLS_LIMIT is defined, then fcntl(2) operations that require +// CAP_FCNTL (marked with * above) can be further limited with cap_fcntls_limit(2). +namespace { +#define FCNTL_NUM_RIGHTS 9 +cap_rights_t fcntl_rights[FCNTL_NUM_RIGHTS]; +void InitRights() { + cap_rights_init(&(fcntl_rights[0]), 0); // Later code assumes this is at [0] + cap_rights_init(&(fcntl_rights[1]), CAP_READ, CAP_WRITE); + cap_rights_init(&(fcntl_rights[2]), CAP_FCNTL); + cap_rights_init(&(fcntl_rights[3]), CAP_FLOCK); +#ifdef CAP_FSIGNAL + cap_rights_init(&(fcntl_rights[4]), CAP_EVENT, CAP_FSIGNAL); + cap_rights_init(&(fcntl_rights[5]), CAP_FLOCK, CAP_FSIGNAL); +#else + cap_rights_init(&(fcntl_rights[4]), 0); + cap_rights_init(&(fcntl_rights[5]), 0); +#endif +#ifdef CAP_NOTIFY + cap_rights_init(&(fcntl_rights[6]), CAP_NOTIFY); +#else + cap_rights_init(&(fcntl_rights[6]), 0); +#endif + cap_rights_init(&(fcntl_rights[7]), CAP_SETSOCKOPT); + cap_rights_init(&(fcntl_rights[8]), CAP_GETSOCKOPT); +} + +int CheckFcntl(unsigned long long right, int caps[FCNTL_NUM_RIGHTS], int cmd, long arg, const char* context) { + SCOPED_TRACE(context); + cap_rights_t rights; + cap_rights_init(&rights, right); + int ok_index = -1; + for (int ii = 0; ii < FCNTL_NUM_RIGHTS; ++ii) { + if (cap_rights_contains(&(fcntl_rights[ii]), &rights)) { + if (ok_index == -1) ok_index = ii; + continue; + } + EXPECT_NOTCAPABLE(fcntl(caps[ii], cmd, arg)); + } + EXPECT_NE(-1, ok_index); + int rc = fcntl(caps[ok_index], cmd, arg); + EXPECT_OK(rc); + return rc; +} +} // namespace + +#define CHECK_FCNTL(right, caps, cmd, arg) \ + CheckFcntl(right, caps, cmd, arg, "fcntl(" #cmd ") expect " #right) + +TEST(Fcntl, Commands) { + InitRights(); + int fd = open(TmpFile("cap_fcntl_cmds"), O_RDWR|O_CREAT, 0644); + EXPECT_OK(fd); + write(fd, "TEST", 4); + int sock = socket(PF_LOCAL, SOCK_STREAM, 0); + EXPECT_OK(sock); + int caps[FCNTL_NUM_RIGHTS]; + int sock_caps[FCNTL_NUM_RIGHTS]; + for (int ii = 0; ii < FCNTL_NUM_RIGHTS; ++ii) { + caps[ii] = dup(fd); + EXPECT_OK(caps[ii]); + EXPECT_OK(cap_rights_limit(caps[ii], &(fcntl_rights[ii]))); + sock_caps[ii] = dup(sock); + EXPECT_OK(sock_caps[ii]); + EXPECT_OK(cap_rights_limit(sock_caps[ii], &(fcntl_rights[ii]))); + } + + // Check the things that need no rights against caps[0]. + int newfd = fcntl(caps[0], F_DUPFD, 0); + EXPECT_OK(newfd); + // dup()'ed FD should have same rights. + cap_rights_t rights; + cap_rights_init(&rights, 0); + EXPECT_OK(cap_rights_get(newfd, &rights)); + EXPECT_RIGHTS_EQ(&(fcntl_rights[0]), &rights); + close(newfd); +#ifdef HAVE_F_DUP2FD + EXPECT_OK(fcntl(caps[0], F_DUP2FD, newfd)); + // dup2()'ed FD should have same rights. + EXPECT_OK(cap_rights_get(newfd, &rights)); + EXPECT_RIGHTS_EQ(&(fcntl_rights[0]), &rights); + close(newfd); +#endif + + EXPECT_OK(fcntl(caps[0], F_GETFD, 0)); + EXPECT_OK(fcntl(caps[0], F_SETFD, 0)); + + // Check operations that need CAP_FCNTL. + int fd_flag = CHECK_FCNTL(CAP_FCNTL, caps, F_GETFL, 0); + EXPECT_EQ(0, CHECK_FCNTL(CAP_FCNTL, caps, F_SETFL, fd_flag)); + int owner = CHECK_FCNTL(CAP_FCNTL, sock_caps, F_GETOWN, 0); + EXPECT_EQ(0, CHECK_FCNTL(CAP_FCNTL, sock_caps, F_SETOWN, owner)); + + // Check an operation needing CAP_FLOCK. + struct flock fl; + memset(&fl, 0, sizeof(fl)); + fl.l_type = F_RDLCK; + fl.l_whence = SEEK_SET; + fl.l_start = 0; + fl.l_len = 1; + EXPECT_EQ(0, CHECK_FCNTL(CAP_FLOCK, caps, F_GETLK, (long)&fl)); + + for (int ii = 0; ii < FCNTL_NUM_RIGHTS; ++ii) { + close(sock_caps[ii]); + close(caps[ii]); + } + close(sock); + close(fd); + unlink(TmpFile("cap_fcntl_cmds")); +} + +TEST(Fcntl, WriteLock) { + int fd = open(TmpFile("cap_fcntl_readlock"), O_RDWR|O_CREAT, 0644); + EXPECT_OK(fd); + write(fd, "TEST", 4); + + int cap = dup(fd); + cap_rights_t rights; + cap_rights_init(&rights, CAP_FCNTL, CAP_READ, CAP_WRITE, CAP_FLOCK); + EXPECT_OK(cap_rights_limit(cap, &rights)); + + struct flock fl; + memset(&fl, 0, sizeof(fl)); + fl.l_type = F_WRLCK; + fl.l_whence = SEEK_SET; + fl.l_start = 0; + fl.l_len = 1; + // Write-Lock + EXPECT_OK(fcntl(cap, F_SETLK, (long)&fl)); + + // Check write-locked (from another process). + pid_t child = fork(); + if (child == 0) { + fl.l_type = F_WRLCK; + fl.l_whence = SEEK_SET; + fl.l_start = 0; + fl.l_len = 1; + EXPECT_OK(fcntl(fd, F_GETLK, (long)&fl)); + EXPECT_NE(F_UNLCK, fl.l_type); + exit(HasFailure()); + } + int status; + EXPECT_EQ(child, waitpid(child, &status, 0)); + int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1; + EXPECT_EQ(0, rc); + + // Unlock + fl.l_type = F_UNLCK; + fl.l_whence = SEEK_SET; + fl.l_start = 0; + fl.l_len = 1; + EXPECT_OK(fcntl(cap, F_SETLK, (long)&fl)); + + close(cap); + close(fd); + unlink(TmpFile("cap_fcntl_readlock")); +} + +#ifdef HAVE_CAP_FCNTLS_LIMIT +TEST(Fcntl, SubRightNormalFD) { + int fd = open(TmpFile("cap_fcntl_subrightnorm"), O_RDWR|O_CREAT, 0644); + EXPECT_OK(fd); + + // Restrict the fcntl(2) subrights of a normal FD. + EXPECT_OK(cap_fcntls_limit(fd, CAP_FCNTL_GETFL)); + int fd_flag = fcntl(fd, F_GETFL, 0); + EXPECT_OK(fd_flag); + EXPECT_NOTCAPABLE(fcntl(fd, F_SETFL, fd_flag)); + + // Expect to have all capabilities. + cap_rights_t rights; + EXPECT_OK(cap_rights_get(fd, &rights)); + cap_rights_t all; + CAP_SET_ALL(&all); + EXPECT_RIGHTS_EQ(&all, &rights); + cap_fcntl_t fcntls; + EXPECT_OK(cap_fcntls_get(fd, &fcntls)); + EXPECT_EQ((cap_fcntl_t)CAP_FCNTL_GETFL, fcntls); + + // Can't widen the subrights. + EXPECT_NOTCAPABLE(cap_fcntls_limit(fd, CAP_FCNTL_GETFL|CAP_FCNTL_SETFL)); + + close(fd); + unlink(TmpFile("cap_fcntl_subrightnorm")); +} + +TEST(Fcntl, PreserveSubRights) { + int fd = open(TmpFile("cap_fcntl_subrightpreserve"), O_RDWR|O_CREAT, 0644); + EXPECT_OK(fd); + + cap_rights_t rights; + cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_FCNTL); + EXPECT_OK(cap_rights_limit(fd, &rights)); + EXPECT_OK(cap_fcntls_limit(fd, CAP_FCNTL_GETFL)); + + cap_rights_t cur_rights; + cap_fcntl_t fcntls; + EXPECT_OK(cap_rights_get(fd, &cur_rights)); + EXPECT_RIGHTS_EQ(&rights, &cur_rights); + EXPECT_OK(cap_fcntls_get(fd, &fcntls)); + EXPECT_EQ((cap_fcntl_t)CAP_FCNTL_GETFL, fcntls); + + // Limiting the top-level rights leaves the subrights unaffected... + cap_rights_clear(&rights, CAP_READ); + EXPECT_OK(cap_rights_limit(fd, &rights)); + EXPECT_OK(cap_fcntls_get(fd, &fcntls)); + EXPECT_EQ((cap_fcntl_t)CAP_FCNTL_GETFL, fcntls); + + // ... until we remove CAP_FCNTL. + cap_rights_clear(&rights, CAP_FCNTL); + EXPECT_OK(cap_rights_limit(fd, &rights)); + EXPECT_OK(cap_fcntls_get(fd, &fcntls)); + EXPECT_EQ((cap_fcntl_t)0, fcntls); + EXPECT_EQ(-1, cap_fcntls_limit(fd, CAP_FCNTL_GETFL)); + + close(fd); + unlink(TmpFile("cap_fcntl_subrightpreserve")); +} + +TEST(Fcntl, FLSubRights) { + int fd = open(TmpFile("cap_fcntl_subrights"), O_RDWR|O_CREAT, 0644); + EXPECT_OK(fd); + write(fd, "TEST", 4); + cap_rights_t rights; + cap_rights_init(&rights, CAP_FCNTL); + EXPECT_OK(cap_rights_limit(fd, &rights)); + + // Check operations that need CAP_FCNTL with subrights pristine => OK. + int fd_flag = fcntl(fd, F_GETFL, 0); + EXPECT_OK(fd_flag); + EXPECT_OK(fcntl(fd, F_SETFL, fd_flag)); + + // Check operations that need CAP_FCNTL with all subrights => OK. + EXPECT_OK(cap_fcntls_limit(fd, CAP_FCNTL_ALL)); + fd_flag = fcntl(fd, F_GETFL, 0); + EXPECT_OK(fd_flag); + EXPECT_OK(fcntl(fd, F_SETFL, fd_flag)); + + // Check operations that need CAP_FCNTL with specific subrights. + int fd_get = dup(fd); + int fd_set = dup(fd); + EXPECT_OK(cap_fcntls_limit(fd_get, CAP_FCNTL_GETFL)); + EXPECT_OK(cap_fcntls_limit(fd_set, CAP_FCNTL_SETFL)); + + fd_flag = fcntl(fd_get, F_GETFL, 0); + EXPECT_OK(fd_flag); + EXPECT_NOTCAPABLE(fcntl(fd_set, F_GETFL, 0)); + EXPECT_OK(fcntl(fd_set, F_SETFL, fd_flag)); + EXPECT_NOTCAPABLE(fcntl(fd_get, F_SETFL, fd_flag)); + close(fd_get); + close(fd_set); + + // Check operations that need CAP_FCNTL with no subrights => ENOTCAPABLE. + EXPECT_OK(cap_fcntls_limit(fd, 0)); + EXPECT_NOTCAPABLE(fcntl(fd, F_GETFL, 0)); + EXPECT_NOTCAPABLE(fcntl(fd, F_SETFL, fd_flag)); + + close(fd); + unlink(TmpFile("cap_fcntl_subrights")); +} + +TEST(Fcntl, OWNSubRights) { + int sock = socket(PF_LOCAL, SOCK_STREAM, 0); + EXPECT_OK(sock); + cap_rights_t rights; + cap_rights_init(&rights, CAP_FCNTL); + EXPECT_OK(cap_rights_limit(sock, &rights)); + + // Check operations that need CAP_FCNTL with no subrights => OK. + int owner = fcntl(sock, F_GETOWN, 0); + EXPECT_OK(owner); + EXPECT_OK(fcntl(sock, F_SETOWN, owner)); + + // Check operations that need CAP_FCNTL with all subrights => OK. + EXPECT_OK(cap_fcntls_limit(sock, CAP_FCNTL_ALL)); + owner = fcntl(sock, F_GETOWN, 0); + EXPECT_OK(owner); + EXPECT_OK(fcntl(sock, F_SETOWN, owner)); + + // Check operations that need CAP_FCNTL with specific subrights. + int sock_get = dup(sock); + int sock_set = dup(sock); + EXPECT_OK(cap_fcntls_limit(sock_get, CAP_FCNTL_GETOWN)); + EXPECT_OK(cap_fcntls_limit(sock_set, CAP_FCNTL_SETOWN)); + owner = fcntl(sock_get, F_GETOWN, 0); + EXPECT_OK(owner); + EXPECT_NOTCAPABLE(fcntl(sock_set, F_GETOWN, 0)); + EXPECT_OK(fcntl(sock_set, F_SETOWN, owner)); + EXPECT_NOTCAPABLE(fcntl(sock_get, F_SETOWN, owner)); + // Also check we can retrieve the subrights. + cap_fcntl_t fcntls; + EXPECT_OK(cap_fcntls_get(sock_get, &fcntls)); + EXPECT_EQ((cap_fcntl_t)CAP_FCNTL_GETOWN, fcntls); + EXPECT_OK(cap_fcntls_get(sock_set, &fcntls)); + EXPECT_EQ((cap_fcntl_t)CAP_FCNTL_SETOWN, fcntls); + // And that we can't widen the subrights. + EXPECT_NOTCAPABLE(cap_fcntls_limit(sock_get, CAP_FCNTL_GETOWN|CAP_FCNTL_SETOWN)); + EXPECT_NOTCAPABLE(cap_fcntls_limit(sock_set, CAP_FCNTL_GETOWN|CAP_FCNTL_SETOWN)); + close(sock_get); + close(sock_set); + + // Check operations that need CAP_FCNTL with no subrights => ENOTCAPABLE. + EXPECT_OK(cap_fcntls_limit(sock, 0)); + EXPECT_NOTCAPABLE(fcntl(sock, F_GETOWN, 0)); + EXPECT_NOTCAPABLE(fcntl(sock, F_SETOWN, owner)); + + close(sock); +} +#endif Index: head/contrib/capsicum-test/fexecve.cc =================================================================== --- head/contrib/capsicum-test/fexecve.cc +++ head/contrib/capsicum-test/fexecve.cc @@ -0,0 +1,208 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "syscalls.h" +#include "capsicum.h" +#include "capsicum-test.h" + +// Arguments to use in execve() calls. +static char* null_envp[] = {NULL}; + +class Execve : public ::testing::Test { + public: + Execve() : exec_fd_(-1) { + // We need a program to exec(), but for fexecve() to work in capability + // mode that program needs to be statically linked (otherwise ld.so will + // attempt to traverse the filesystem to load (e.g.) /lib/libc.so and + // fail). + exec_prog_ = capsicum_test_bindir + "/mini-me"; + exec_prog_noexec_ = capsicum_test_bindir + "/mini-me.noexec"; + exec_prog_setuid_ = capsicum_test_bindir + "/mini-me.setuid"; + + exec_fd_ = open(exec_prog_.c_str(), O_RDONLY); + if (exec_fd_ < 0) { + fprintf(stderr, "Error! Failed to open %s\n", exec_prog_.c_str()); + } + argv_checkroot_[0] = (char*)exec_prog_.c_str(); + argv_fail_[0] = (char*)exec_prog_.c_str(); + argv_pass_[0] = (char*)exec_prog_.c_str(); + } + ~Execve() { + if (exec_fd_ >= 0) { + close(exec_fd_); + exec_fd_ = -1; + } + } +protected: + char* argv_checkroot_[3] = {nullptr, (char*)"--checkroot", nullptr}; + char* argv_fail_[3] = {nullptr, (char*)"--fail", nullptr}; + char* argv_pass_[3] = {nullptr, (char*)"--pass", nullptr}; + std::string exec_prog_, exec_prog_noexec_, exec_prog_setuid_; + int exec_fd_; +}; + +class Fexecve : public Execve { + public: + Fexecve() : Execve() {} +}; + +class FexecveWithScript : public Fexecve { + public: + FexecveWithScript() : + Fexecve(), temp_script_filename_(TmpFile("cap_sh_script")) {} + + void SetUp() override { + // First, build an executable shell script + int fd = open(temp_script_filename_, O_RDWR|O_CREAT, 0755); + EXPECT_OK(fd); + const char* contents = "#!/bin/sh\nexit 99\n"; + EXPECT_OK(write(fd, contents, strlen(contents))); + close(fd); + } + void TearDown() override { + (void)::unlink(temp_script_filename_); + } + + const char *temp_script_filename_; +}; + +FORK_TEST_F(Execve, BasicFexecve) { + EXPECT_OK(fexecve_(exec_fd_, argv_pass_, null_envp)); + // Should not reach here, exec() takes over. + EXPECT_TRUE(!"fexecve() should never return"); +} + +FORK_TEST_F(Execve, InCapMode) { + EXPECT_OK(cap_enter()); + EXPECT_OK(fexecve_(exec_fd_, argv_pass_, null_envp)); + // Should not reach here, exec() takes over. + EXPECT_TRUE(!"fexecve() should never return"); +} + +FORK_TEST_F(Execve, FailWithoutCap) { + EXPECT_OK(cap_enter()); + int cap_fd = dup(exec_fd_); + EXPECT_OK(cap_fd); + cap_rights_t rights; + cap_rights_init(&rights, 0); + EXPECT_OK(cap_rights_limit(cap_fd, &rights)); + EXPECT_EQ(-1, fexecve_(cap_fd, argv_fail_, null_envp)); + EXPECT_EQ(ENOTCAPABLE, errno); +} + +FORK_TEST_F(Execve, SucceedWithCap) { + EXPECT_OK(cap_enter()); + int cap_fd = dup(exec_fd_); + EXPECT_OK(cap_fd); + cap_rights_t rights; + // TODO(drysdale): would prefer that Linux Capsicum not need all of these + // rights -- just CAP_FEXECVE|CAP_READ or CAP_FEXECVE would be preferable. + cap_rights_init(&rights, CAP_FEXECVE, CAP_LOOKUP, CAP_READ); + EXPECT_OK(cap_rights_limit(cap_fd, &rights)); + EXPECT_OK(fexecve_(cap_fd, argv_pass_, null_envp)); + // Should not reach here, exec() takes over. + EXPECT_TRUE(!"fexecve() should have succeeded"); +} + +FORK_TEST_F(Fexecve, ExecutePermissionCheck) { + int fd = open(exec_prog_noexec_.c_str(), O_RDONLY); + EXPECT_OK(fd); + if (fd >= 0) { + struct stat data; + EXPECT_OK(fstat(fd, &data)); + EXPECT_EQ((mode_t)0, data.st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)); + EXPECT_EQ(-1, fexecve_(fd, argv_fail_, null_envp)); + EXPECT_EQ(EACCES, errno); + close(fd); + } +} + +FORK_TEST_F(Fexecve, SetuidIgnored) { + if (geteuid() == 0) { + TEST_SKIPPED("requires non-root"); + return; + } + int fd = open(exec_prog_setuid_.c_str(), O_RDONLY); + EXPECT_OK(fd); + EXPECT_OK(cap_enter()); + if (fd >= 0) { + struct stat data; + EXPECT_OK(fstat(fd, &data)); + EXPECT_EQ((mode_t)S_ISUID, data.st_mode & S_ISUID); + EXPECT_OK(fexecve_(fd, argv_checkroot_, null_envp)); + // Should not reach here, exec() takes over. + EXPECT_TRUE(!"fexecve() should have succeeded"); + close(fd); + } +} + +FORK_TEST_F(Fexecve, ExecveFailure) { + EXPECT_OK(cap_enter()); + EXPECT_EQ(-1, execve(argv_fail_[0], argv_fail_, null_envp)); + EXPECT_EQ(ECAPMODE, errno); +} + +FORK_TEST_F(FexecveWithScript, CapModeScriptFail) { + int fd; + + // Open the script file, with CAP_FEXECVE rights. + fd = open(temp_script_filename_, O_RDONLY); + cap_rights_t rights; + cap_rights_init(&rights, CAP_FEXECVE, CAP_READ, CAP_SEEK); + EXPECT_OK(cap_rights_limit(fd, &rights)); + + EXPECT_OK(cap_enter()); // Enter capability mode + + // Attempt fexecve; should fail, because "/bin/sh" is inaccessible. + EXPECT_EQ(-1, fexecve_(fd, argv_pass_, null_envp)); +} + +#ifdef HAVE_EXECVEAT +class Execveat : public Execve { + public: + Execveat() : Execve() {} +}; + +TEST_F(Execveat, NoUpwardTraversal) { + char *abspath = realpath(exec_prog_, NULL); + char cwd[1024]; + getcwd(cwd, sizeof(cwd)); + + int dfd = open(".", O_DIRECTORY|O_RDONLY); + pid_t child = fork(); + if (child == 0) { + EXPECT_OK(cap_enter()); // Enter capability mode. + // Can't execveat() an absolute path, even relative to a dfd. + EXPECT_SYSCALL_FAIL(ECAPMODE, + execveat(AT_FDCWD, abspath, argv_pass_, null_envp, 0)); + EXPECT_SYSCALL_FAIL(E_NO_TRAVERSE_CAPABILITY, + execveat(dfd, abspath, argv_pass_, null_envp, 0)); + + // Can't execveat() a relative path ("..//./"). + char *p = cwd + strlen(cwd); + while (*p != '/') p--; + char buffer[1024] = "../"; + strcat(buffer, ++p); + strcat(buffer, "/"); + strcat(buffer, exec_prog_); + EXPECT_SYSCALL_FAIL(E_NO_TRAVERSE_CAPABILITY, + execveat(dfd, buffer, argv_pass_, null_envp, 0)); + exit(HasFailure() ? 99 : 123); + } + int status; + EXPECT_EQ(child, waitpid(child, &status, 0)); + EXPECT_TRUE(WIFEXITED(status)) << "0x" << std::hex << status; + EXPECT_EQ(123, WEXITSTATUS(status)); + free(abspath); + close(dfd); +} +#endif Index: head/contrib/capsicum-test/ioctl.cc =================================================================== --- head/contrib/capsicum-test/ioctl.cc +++ head/contrib/capsicum-test/ioctl.cc @@ -0,0 +1,234 @@ +// Test that ioctl works in capability mode. +#include +#include +#include +#include +#include + +#include "capsicum.h" +#include "capsicum-test.h" + +// Ensure that ioctl() works consistently for both regular file descriptors and +// capability-wrapped ones. +TEST(Ioctl, Basic) { + cap_rights_t rights_ioctl; + cap_rights_init(&rights_ioctl, CAP_IOCTL); + cap_rights_t rights_many; + cap_rights_init(&rights_many, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_FSTAT, CAP_FSYNC); + + int fd = open("/etc/passwd", O_RDONLY); + EXPECT_OK(fd); + int fd_no = dup(fd); + EXPECT_OK(fd_no); + EXPECT_OK(cap_rights_limit(fd, &rights_ioctl)); + EXPECT_OK(cap_rights_limit(fd_no, &rights_many)); + + // Check that CAP_IOCTL is required. + int bytes; + EXPECT_OK(ioctl(fd, FIONREAD, &bytes)); + EXPECT_NOTCAPABLE(ioctl(fd_no, FIONREAD, &bytes)); + + int one = 1; + EXPECT_OK(ioctl(fd, FIOCLEX, &one)); + EXPECT_NOTCAPABLE(ioctl(fd_no, FIOCLEX, &one)); + + close(fd); + close(fd_no); +} + +#ifdef HAVE_CAP_IOCTLS_LIMIT +TEST(Ioctl, SubRightNormalFD) { + int fd = open("/etc/passwd", O_RDONLY); + EXPECT_OK(fd); + + // Restrict the ioctl(2) subrights of a normal FD. + cap_ioctl_t ioctl_nread = FIONREAD; + EXPECT_OK(cap_ioctls_limit(fd, &ioctl_nread, 1)); + int bytes; + EXPECT_OK(ioctl(fd, FIONREAD, &bytes)); + int one = 1; + EXPECT_NOTCAPABLE(ioctl(fd, FIOCLEX, &one)); + + // Expect to have all primary rights. + cap_rights_t rights; + EXPECT_OK(cap_rights_get(fd, &rights)); + cap_rights_t all; + CAP_SET_ALL(&all); + EXPECT_RIGHTS_EQ(&all, &rights); + cap_ioctl_t ioctls[16]; + memset(ioctls, 0, sizeof(ioctls)); + ssize_t nioctls = cap_ioctls_get(fd, ioctls, 16); + EXPECT_OK(nioctls); + EXPECT_EQ(1, nioctls); + EXPECT_EQ((cap_ioctl_t)FIONREAD, ioctls[0]); + + // Can't widen the subrights. + cap_ioctl_t both_ioctls[2] = {FIONREAD, FIOCLEX}; + EXPECT_NOTCAPABLE(cap_ioctls_limit(fd, both_ioctls, 2)); + + close(fd); +} + +TEST(Ioctl, PreserveSubRights) { + int fd = open("/etc/passwd", O_RDONLY); + EXPECT_OK(fd); + cap_rights_t rights; + cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_IOCTL); + EXPECT_OK(cap_rights_limit(fd, &rights)); + cap_ioctl_t ioctl_nread = FIONREAD; + EXPECT_OK(cap_ioctls_limit(fd, &ioctl_nread, 1)); + + cap_rights_t cur_rights; + cap_ioctl_t ioctls[16]; + ssize_t nioctls; + EXPECT_OK(cap_rights_get(fd, &cur_rights)); + EXPECT_RIGHTS_EQ(&rights, &cur_rights); + nioctls = cap_ioctls_get(fd, ioctls, 16); + EXPECT_OK(nioctls); + EXPECT_EQ(1, nioctls); + EXPECT_EQ((cap_ioctl_t)FIONREAD, ioctls[0]); + + // Limiting the top-level rights leaves the subrights unaffected... + cap_rights_clear(&rights, CAP_READ); + EXPECT_OK(cap_rights_limit(fd, &rights)); + nioctls = cap_ioctls_get(fd, ioctls, 16); + EXPECT_OK(nioctls); + EXPECT_EQ(1, nioctls); + EXPECT_EQ((cap_ioctl_t)FIONREAD, ioctls[0]); + + // ... until we remove CAP_IOCTL + cap_rights_clear(&rights, CAP_IOCTL); + EXPECT_OK(cap_rights_limit(fd, &rights)); + nioctls = cap_ioctls_get(fd, ioctls, 16); + EXPECT_OK(nioctls); + EXPECT_EQ(0, nioctls); + EXPECT_EQ(-1, cap_ioctls_limit(fd, &ioctl_nread, 1)); + + close(fd); +} + +TEST(Ioctl, SubRights) { + int fd = open("/etc/passwd", O_RDONLY); + EXPECT_OK(fd); + + cap_ioctl_t ioctls[16]; + ssize_t nioctls; + memset(ioctls, 0, sizeof(ioctls)); + nioctls = cap_ioctls_get(fd, ioctls, 16); + EXPECT_OK(nioctls); + EXPECT_EQ(CAP_IOCTLS_ALL, nioctls); + + cap_rights_t rights_ioctl; + cap_rights_init(&rights_ioctl, CAP_IOCTL); + EXPECT_OK(cap_rights_limit(fd, &rights_ioctl)); + + nioctls = cap_ioctls_get(fd, ioctls, 16); + EXPECT_OK(nioctls); + EXPECT_EQ(CAP_IOCTLS_ALL, nioctls); + + // Check operations that need CAP_IOCTL with subrights pristine => OK. + int bytes; + EXPECT_OK(ioctl(fd, FIONREAD, &bytes)); + int one = 1; + EXPECT_OK(ioctl(fd, FIOCLEX, &one)); + + // Check operations that need CAP_IOCTL with all relevant subrights => OK. + cap_ioctl_t both_ioctls[2] = {FIONREAD, FIOCLEX}; + EXPECT_OK(cap_ioctls_limit(fd, both_ioctls, 2)); + EXPECT_OK(ioctl(fd, FIONREAD, &bytes)); + EXPECT_OK(ioctl(fd, FIOCLEX, &one)); + + + // Check what happens if we ask for subrights but don't have the space for them. + cap_ioctl_t before = 0xBBBBBBBB; + cap_ioctl_t one_ioctl = 0; + cap_ioctl_t after = 0xAAAAAAAA; + nioctls = cap_ioctls_get(fd, &one_ioctl, 1); + EXPECT_EQ(2, nioctls); + EXPECT_EQ(0xBBBBBBBB, before); + EXPECT_TRUE(one_ioctl == FIONREAD || one_ioctl == FIOCLEX); + EXPECT_EQ(0xAAAAAAAA, after); + + // Check operations that need CAP_IOCTL with particular subrights. + int fd_nread = dup(fd); + int fd_clex = dup(fd); + cap_ioctl_t ioctl_nread = FIONREAD; + cap_ioctl_t ioctl_clex = FIOCLEX; + EXPECT_OK(cap_ioctls_limit(fd_nread, &ioctl_nread, 1)); + EXPECT_OK(cap_ioctls_limit(fd_clex, &ioctl_clex, 1)); + EXPECT_OK(ioctl(fd_nread, FIONREAD, &bytes)); + EXPECT_NOTCAPABLE(ioctl(fd_clex, FIONREAD, &bytes)); + EXPECT_OK(ioctl(fd_clex, FIOCLEX, &one)); + EXPECT_NOTCAPABLE(ioctl(fd_nread, FIOCLEX, &one)); + + // Also check we can retrieve the subrights. + memset(ioctls, 0, sizeof(ioctls)); + nioctls = cap_ioctls_get(fd_nread, ioctls, 16); + EXPECT_OK(nioctls); + EXPECT_EQ(1, nioctls); + EXPECT_EQ((cap_ioctl_t)FIONREAD, ioctls[0]); + memset(ioctls, 0, sizeof(ioctls)); + nioctls = cap_ioctls_get(fd_clex, ioctls, 16); + EXPECT_OK(nioctls); + EXPECT_EQ(1, nioctls); + EXPECT_EQ((cap_ioctl_t)FIOCLEX, ioctls[0]); + // And that we can't widen the subrights. + EXPECT_NOTCAPABLE(cap_ioctls_limit(fd_nread, both_ioctls, 2)); + EXPECT_NOTCAPABLE(cap_ioctls_limit(fd_clex, both_ioctls, 2)); + close(fd_nread); + close(fd_clex); + + // Check operations that need CAP_IOCTL with no subrights => ENOTCAPABLE. + EXPECT_OK(cap_ioctls_limit(fd, NULL, 0)); + EXPECT_NOTCAPABLE(ioctl(fd, FIONREAD, &bytes)); + EXPECT_NOTCAPABLE(ioctl(fd, FIOCLEX, &one)); + + close(fd); +} + +#ifdef CAP_IOCTLS_LIMIT_MAX +TEST(Ioctl, TooManySubRights) { + int fd = open("/etc/passwd", O_RDONLY); + EXPECT_OK(fd); + + cap_ioctl_t ioctls[CAP_IOCTLS_LIMIT_MAX + 1]; + for (int ii = 0; ii <= CAP_IOCTLS_LIMIT_MAX; ii++) { + ioctls[ii] = ii + 1; + } + + cap_rights_t rights_ioctl; + cap_rights_init(&rights_ioctl, CAP_IOCTL); + EXPECT_OK(cap_rights_limit(fd, &rights_ioctl)); + + // Can only limit to a certain number of ioctls + EXPECT_EQ(-1, cap_ioctls_limit(fd, ioctls, CAP_IOCTLS_LIMIT_MAX + 1)); + EXPECT_EQ(EINVAL, errno); + EXPECT_OK(cap_ioctls_limit(fd, ioctls, CAP_IOCTLS_LIMIT_MAX)); + + close(fd); +} +#else +TEST(Ioctl, ManySubRights) { + int fd = open("/etc/passwd", O_RDONLY); + EXPECT_OK(fd); + + const int nioctls = 150000; + cap_ioctl_t* ioctls = (cap_ioctl_t*)calloc(nioctls, sizeof(cap_ioctl_t)); + for (int ii = 0; ii < nioctls; ii++) { + ioctls[ii] = ii + 1; + } + + cap_rights_t rights_ioctl; + cap_rights_init(&rights_ioctl, CAP_IOCTL); + EXPECT_OK(cap_rights_limit(fd, &rights_ioctl)); + + EXPECT_OK(cap_ioctls_limit(fd, ioctls, nioctls)); + // Limit to a subset; if this takes a long time then there's an + // O(N^2) implementation of the ioctl list comparison. + EXPECT_OK(cap_ioctls_limit(fd, ioctls, nioctls - 1)); + + close(fd); +} +#endif + +#endif Index: head/contrib/capsicum-test/linux.cc =================================================================== --- head/contrib/capsicum-test/linux.cc +++ head/contrib/capsicum-test/linux.cc @@ -0,0 +1,1503 @@ +// Tests of Linux-specific functionality +#ifdef __linux__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include // Requires e.g. libcap-dev package for POSIX.1e capabilities headers +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "capsicum.h" +#include "syscalls.h" +#include "capsicum-test.h" + +TEST(Linux, TimerFD) { + int fd = timerfd_create(CLOCK_MONOTONIC, 0); + + cap_rights_t r_ro; + cap_rights_init(&r_ro, CAP_READ); + cap_rights_t r_wo; + cap_rights_init(&r_wo, CAP_WRITE); + cap_rights_t r_rw; + cap_rights_init(&r_rw, CAP_READ, CAP_WRITE); + cap_rights_t r_rwpoll; + cap_rights_init(&r_rwpoll, CAP_READ, CAP_WRITE, CAP_EVENT); + + int cap_fd_ro = dup(fd); + EXPECT_OK(cap_fd_ro); + EXPECT_OK(cap_rights_limit(cap_fd_ro, &r_ro)); + int cap_fd_wo = dup(fd); + EXPECT_OK(cap_fd_wo); + EXPECT_OK(cap_rights_limit(cap_fd_wo, &r_wo)); + int cap_fd_rw = dup(fd); + EXPECT_OK(cap_fd_rw); + EXPECT_OK(cap_rights_limit(cap_fd_rw, &r_rw)); + int cap_fd_all = dup(fd); + EXPECT_OK(cap_fd_all); + EXPECT_OK(cap_rights_limit(cap_fd_all, &r_rwpoll)); + + struct itimerspec old_ispec; + struct itimerspec ispec; + ispec.it_interval.tv_sec = 0; + ispec.it_interval.tv_nsec = 0; + ispec.it_value.tv_sec = 0; + ispec.it_value.tv_nsec = 100000000; // 100ms + EXPECT_NOTCAPABLE(timerfd_settime(cap_fd_ro, 0, &ispec, NULL)); + EXPECT_NOTCAPABLE(timerfd_settime(cap_fd_wo, 0, &ispec, &old_ispec)); + EXPECT_OK(timerfd_settime(cap_fd_wo, 0, &ispec, NULL)); + EXPECT_OK(timerfd_settime(cap_fd_rw, 0, &ispec, NULL)); + EXPECT_OK(timerfd_settime(cap_fd_all, 0, &ispec, NULL)); + + EXPECT_NOTCAPABLE(timerfd_gettime(cap_fd_wo, &old_ispec)); + EXPECT_OK(timerfd_gettime(cap_fd_ro, &old_ispec)); + EXPECT_OK(timerfd_gettime(cap_fd_rw, &old_ispec)); + EXPECT_OK(timerfd_gettime(cap_fd_all, &old_ispec)); + + // To be able to poll() for the timer pop, still need CAP_EVENT. + struct pollfd poll_fd; + for (int ii = 0; ii < 3; ii++) { + poll_fd.revents = 0; + poll_fd.events = POLLIN; + switch (ii) { + case 0: poll_fd.fd = cap_fd_ro; break; + case 1: poll_fd.fd = cap_fd_wo; break; + case 2: poll_fd.fd = cap_fd_rw; break; + } + // Poll immediately returns with POLLNVAL + EXPECT_OK(poll(&poll_fd, 1, 400)); + EXPECT_EQ(0, (poll_fd.revents & POLLIN)); + EXPECT_NE(0, (poll_fd.revents & POLLNVAL)); + } + + poll_fd.fd = cap_fd_all; + EXPECT_OK(poll(&poll_fd, 1, 400)); + EXPECT_NE(0, (poll_fd.revents & POLLIN)); + EXPECT_EQ(0, (poll_fd.revents & POLLNVAL)); + + EXPECT_OK(timerfd_gettime(cap_fd_all, &old_ispec)); + EXPECT_EQ(0, old_ispec.it_value.tv_sec); + EXPECT_EQ(0, old_ispec.it_value.tv_nsec); + EXPECT_EQ(0, old_ispec.it_interval.tv_sec); + EXPECT_EQ(0, old_ispec.it_interval.tv_nsec); + + close(cap_fd_all); + close(cap_fd_rw); + close(cap_fd_wo); + close(cap_fd_ro); + close(fd); +} + +FORK_TEST(Linux, SignalFD) { + if (force_mt) { + TEST_SKIPPED("multi-threaded run clashes with signals"); + return; + } + pid_t me = getpid(); + sigset_t mask; + sigemptyset(&mask); + sigaddset(&mask, SIGUSR1); + + // Block signals before registering against a new signal FD. + EXPECT_OK(sigprocmask(SIG_BLOCK, &mask, NULL)); + int fd = signalfd(-1, &mask, 0); + EXPECT_OK(fd); + + cap_rights_t r_rs; + cap_rights_init(&r_rs, CAP_READ, CAP_SEEK); + cap_rights_t r_ws; + cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK); + cap_rights_t r_sig; + cap_rights_init(&r_sig, CAP_FSIGNAL); + cap_rights_t r_rssig; + cap_rights_init(&r_rssig, CAP_FSIGNAL, CAP_READ, CAP_SEEK); + cap_rights_t r_rssig_poll; + cap_rights_init(&r_rssig_poll, CAP_FSIGNAL, CAP_READ, CAP_SEEK, CAP_EVENT); + + // Various capability variants. + int cap_fd_none = dup(fd); + EXPECT_OK(cap_fd_none); + EXPECT_OK(cap_rights_limit(cap_fd_none, &r_ws)); + int cap_fd_read = dup(fd); + EXPECT_OK(cap_fd_read); + EXPECT_OK(cap_rights_limit(cap_fd_read, &r_rs)); + int cap_fd_sig = dup(fd); + EXPECT_OK(cap_fd_sig); + EXPECT_OK(cap_rights_limit(cap_fd_sig, &r_sig)); + int cap_fd_sig_read = dup(fd); + EXPECT_OK(cap_fd_sig_read); + EXPECT_OK(cap_rights_limit(cap_fd_sig_read, &r_rssig)); + int cap_fd_all = dup(fd); + EXPECT_OK(cap_fd_all); + EXPECT_OK(cap_rights_limit(cap_fd_all, &r_rssig_poll)); + + struct signalfd_siginfo fdsi; + + // Need CAP_READ to read the signal information + kill(me, SIGUSR1); + EXPECT_NOTCAPABLE(read(cap_fd_none, &fdsi, sizeof(struct signalfd_siginfo))); + EXPECT_NOTCAPABLE(read(cap_fd_sig, &fdsi, sizeof(struct signalfd_siginfo))); + int len = read(cap_fd_read, &fdsi, sizeof(struct signalfd_siginfo)); + EXPECT_OK(len); + EXPECT_EQ(sizeof(struct signalfd_siginfo), (size_t)len); + EXPECT_EQ(SIGUSR1, (int)fdsi.ssi_signo); + + // Need CAP_FSIGNAL to modify the signal mask. + sigemptyset(&mask); + sigaddset(&mask, SIGUSR1); + sigaddset(&mask, SIGUSR2); + EXPECT_OK(sigprocmask(SIG_BLOCK, &mask, NULL)); + EXPECT_NOTCAPABLE(signalfd(cap_fd_none, &mask, 0)); + EXPECT_NOTCAPABLE(signalfd(cap_fd_read, &mask, 0)); + EXPECT_EQ(cap_fd_sig, signalfd(cap_fd_sig, &mask, 0)); + + // Need CAP_EVENT to get notification of a signal in poll(2). + kill(me, SIGUSR2); + + struct pollfd poll_fd; + poll_fd.revents = 0; + poll_fd.events = POLLIN; + poll_fd.fd = cap_fd_sig_read; + EXPECT_OK(poll(&poll_fd, 1, 400)); + EXPECT_EQ(0, (poll_fd.revents & POLLIN)); + EXPECT_NE(0, (poll_fd.revents & POLLNVAL)); + + poll_fd.fd = cap_fd_all; + EXPECT_OK(poll(&poll_fd, 1, 400)); + EXPECT_NE(0, (poll_fd.revents & POLLIN)); + EXPECT_EQ(0, (poll_fd.revents & POLLNVAL)); +} + +TEST(Linux, EventFD) { + int fd = eventfd(0, 0); + EXPECT_OK(fd); + + cap_rights_t r_rs; + cap_rights_init(&r_rs, CAP_READ, CAP_SEEK); + cap_rights_t r_ws; + cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK); + cap_rights_t r_rws; + cap_rights_init(&r_rws, CAP_READ, CAP_WRITE, CAP_SEEK); + cap_rights_t r_rwspoll; + cap_rights_init(&r_rwspoll, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_EVENT); + + int cap_ro = dup(fd); + EXPECT_OK(cap_ro); + EXPECT_OK(cap_rights_limit(cap_ro, &r_rs)); + int cap_wo = dup(fd); + EXPECT_OK(cap_wo); + EXPECT_OK(cap_rights_limit(cap_wo, &r_ws)); + int cap_rw = dup(fd); + EXPECT_OK(cap_rw); + EXPECT_OK(cap_rights_limit(cap_rw, &r_rws)); + int cap_all = dup(fd); + EXPECT_OK(cap_all); + EXPECT_OK(cap_rights_limit(cap_all, &r_rwspoll)); + + pid_t child = fork(); + if (child == 0) { + // Child: write counter to eventfd + uint64_t u = 42; + EXPECT_NOTCAPABLE(write(cap_ro, &u, sizeof(u))); + EXPECT_OK(write(cap_wo, &u, sizeof(u))); + exit(HasFailure()); + } + + sleep(1); // Allow child to write + + struct pollfd poll_fd; + poll_fd.revents = 0; + poll_fd.events = POLLIN; + poll_fd.fd = cap_rw; + EXPECT_OK(poll(&poll_fd, 1, 400)); + EXPECT_EQ(0, (poll_fd.revents & POLLIN)); + EXPECT_NE(0, (poll_fd.revents & POLLNVAL)); + + poll_fd.fd = cap_all; + EXPECT_OK(poll(&poll_fd, 1, 400)); + EXPECT_NE(0, (poll_fd.revents & POLLIN)); + EXPECT_EQ(0, (poll_fd.revents & POLLNVAL)); + + uint64_t u; + EXPECT_NOTCAPABLE(read(cap_wo, &u, sizeof(u))); + EXPECT_OK(read(cap_ro, &u, sizeof(u))); + EXPECT_EQ(42, (int)u); + + // Wait for the child. + int status; + EXPECT_EQ(child, waitpid(child, &status, 0)); + int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1; + EXPECT_EQ(0, rc); + + close(cap_all); + close(cap_rw); + close(cap_wo); + close(cap_ro); + close(fd); +} + +FORK_TEST(Linux, epoll) { + int sock_fds[2]; + EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, sock_fds)); + // Queue some data. + char buffer[4] = {1, 2, 3, 4}; + EXPECT_OK(write(sock_fds[1], buffer, sizeof(buffer))); + + EXPECT_OK(cap_enter()); // Enter capability mode. + + int epoll_fd = epoll_create(1); + EXPECT_OK(epoll_fd); + + cap_rights_t r_rs; + cap_rights_init(&r_rs, CAP_READ, CAP_SEEK); + cap_rights_t r_ws; + cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK); + cap_rights_t r_rws; + cap_rights_init(&r_rws, CAP_READ, CAP_WRITE, CAP_SEEK); + cap_rights_t r_rwspoll; + cap_rights_init(&r_rwspoll, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_EVENT); + cap_rights_t r_epoll; + cap_rights_init(&r_epoll, CAP_EPOLL_CTL); + + int cap_epoll_wo = dup(epoll_fd); + EXPECT_OK(cap_epoll_wo); + EXPECT_OK(cap_rights_limit(cap_epoll_wo, &r_ws)); + int cap_epoll_ro = dup(epoll_fd); + EXPECT_OK(cap_epoll_ro); + EXPECT_OK(cap_rights_limit(cap_epoll_ro, &r_rs)); + int cap_epoll_rw = dup(epoll_fd); + EXPECT_OK(cap_epoll_rw); + EXPECT_OK(cap_rights_limit(cap_epoll_rw, &r_rws)); + int cap_epoll_poll = dup(epoll_fd); + EXPECT_OK(cap_epoll_poll); + EXPECT_OK(cap_rights_limit(cap_epoll_poll, &r_rwspoll)); + int cap_epoll_ctl = dup(epoll_fd); + EXPECT_OK(cap_epoll_ctl); + EXPECT_OK(cap_rights_limit(cap_epoll_ctl, &r_epoll)); + + // Can only modify the FDs being monitored if the CAP_EPOLL_CTL right is present. + struct epoll_event eev; + memset(&eev, 0, sizeof(eev)); + eev.events = EPOLLIN|EPOLLOUT|EPOLLPRI; + EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_ro, EPOLL_CTL_ADD, sock_fds[0], &eev)); + EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_wo, EPOLL_CTL_ADD, sock_fds[0], &eev)); + EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_rw, EPOLL_CTL_ADD, sock_fds[0], &eev)); + EXPECT_OK(epoll_ctl(cap_epoll_ctl, EPOLL_CTL_ADD, sock_fds[0], &eev)); + eev.events = EPOLLIN|EPOLLOUT; + EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_ro, EPOLL_CTL_MOD, sock_fds[0], &eev)); + EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_wo, EPOLL_CTL_MOD, sock_fds[0], &eev)); + EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_rw, EPOLL_CTL_MOD, sock_fds[0], &eev)); + EXPECT_OK(epoll_ctl(cap_epoll_ctl, EPOLL_CTL_MOD, sock_fds[0], &eev)); + + // Running epoll_pwait(2) requires CAP_EVENT. + eev.events = 0; + EXPECT_NOTCAPABLE(epoll_pwait(cap_epoll_ro, &eev, 1, 100, NULL)); + EXPECT_NOTCAPABLE(epoll_pwait(cap_epoll_wo, &eev, 1, 100, NULL)); + EXPECT_NOTCAPABLE(epoll_pwait(cap_epoll_rw, &eev, 1, 100, NULL)); + EXPECT_OK(epoll_pwait(cap_epoll_poll, &eev, 1, 100, NULL)); + EXPECT_EQ(EPOLLIN, eev.events & EPOLLIN); + + EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_ro, EPOLL_CTL_DEL, sock_fds[0], &eev)); + EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_wo, EPOLL_CTL_DEL, sock_fds[0], &eev)); + EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_rw, EPOLL_CTL_DEL, sock_fds[0], &eev)); + EXPECT_OK(epoll_ctl(epoll_fd, EPOLL_CTL_DEL, sock_fds[0], &eev)); + + close(cap_epoll_ctl); + close(cap_epoll_poll); + close(cap_epoll_rw); + close(cap_epoll_ro); + close(cap_epoll_wo); + close(epoll_fd); + close(sock_fds[1]); + close(sock_fds[0]); +} + +TEST(Linux, fstatat) { + int fd = open(TmpFile("cap_fstatat"), O_CREAT|O_RDWR, 0644); + EXPECT_OK(fd); + unsigned char buffer[] = {1, 2, 3, 4}; + EXPECT_OK(write(fd, buffer, sizeof(buffer))); + cap_rights_t rights; + int cap_rf = dup(fd); + EXPECT_OK(cap_rf); + EXPECT_OK(cap_rights_limit(cap_rf, cap_rights_init(&rights, CAP_READ, CAP_FSTAT))); + int cap_ro = dup(fd); + EXPECT_OK(cap_ro); + EXPECT_OK(cap_rights_limit(cap_ro, cap_rights_init(&rights, CAP_READ))); + + struct stat info; + EXPECT_OK(fstatat(fd, "", &info, AT_EMPTY_PATH)); + EXPECT_NOTCAPABLE(fstatat(cap_ro, "", &info, AT_EMPTY_PATH)); + EXPECT_OK(fstatat(cap_rf, "", &info, AT_EMPTY_PATH)); + + close(cap_ro); + close(cap_rf); + close(fd); + + int dir = open(tmpdir.c_str(), O_RDONLY); + EXPECT_OK(dir); + int dir_rf = dup(dir); + EXPECT_OK(dir_rf); + EXPECT_OK(cap_rights_limit(dir_rf, cap_rights_init(&rights, CAP_READ, CAP_FSTAT))); + int dir_ro = dup(fd); + EXPECT_OK(dir_ro); + EXPECT_OK(cap_rights_limit(dir_ro, cap_rights_init(&rights, CAP_READ))); + + EXPECT_OK(fstatat(dir, "cap_fstatat", &info, AT_EMPTY_PATH)); + EXPECT_NOTCAPABLE(fstatat(dir_ro, "cap_fstatat", &info, AT_EMPTY_PATH)); + EXPECT_OK(fstatat(dir_rf, "cap_fstatat", &info, AT_EMPTY_PATH)); + + close(dir_ro); + close(dir_rf); + close(dir); + + unlink(TmpFile("cap_fstatat")); +} + +// fanotify support may not be available at compile-time +#ifdef __NR_fanotify_init +TEST(Linux, fanotify) { + REQUIRE_ROOT(); + int fa_fd = fanotify_init(FAN_CLASS_NOTIF, O_RDWR); + EXPECT_OK(fa_fd); + if (fa_fd < 0) return; // May not be enabled + + cap_rights_t r_rs; + cap_rights_init(&r_rs, CAP_READ, CAP_SEEK); + cap_rights_t r_ws; + cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK); + cap_rights_t r_rws; + cap_rights_init(&r_rws, CAP_READ, CAP_WRITE, CAP_SEEK); + cap_rights_t r_rwspoll; + cap_rights_init(&r_rwspoll, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_EVENT); + cap_rights_t r_rwsnotify; + cap_rights_init(&r_rwsnotify, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_NOTIFY); + cap_rights_t r_rsl; + cap_rights_init(&r_rsl, CAP_READ, CAP_SEEK, CAP_LOOKUP); + cap_rights_t r_rslstat; + cap_rights_init(&r_rslstat, CAP_READ, CAP_SEEK, CAP_LOOKUP, CAP_FSTAT); + cap_rights_t r_rsstat; + cap_rights_init(&r_rsstat, CAP_READ, CAP_SEEK, CAP_FSTAT); + + int cap_fd_ro = dup(fa_fd); + EXPECT_OK(cap_fd_ro); + EXPECT_OK(cap_rights_limit(cap_fd_ro, &r_rs)); + int cap_fd_wo = dup(fa_fd); + EXPECT_OK(cap_fd_wo); + EXPECT_OK(cap_rights_limit(cap_fd_wo, &r_ws)); + int cap_fd_rw = dup(fa_fd); + EXPECT_OK(cap_fd_rw); + EXPECT_OK(cap_rights_limit(cap_fd_rw, &r_rws)); + int cap_fd_poll = dup(fa_fd); + EXPECT_OK(cap_fd_poll); + EXPECT_OK(cap_rights_limit(cap_fd_poll, &r_rwspoll)); + int cap_fd_not = dup(fa_fd); + EXPECT_OK(cap_fd_not); + EXPECT_OK(cap_rights_limit(cap_fd_not, &r_rwsnotify)); + + int rc = mkdir(TmpFile("cap_notify"), 0755); + EXPECT_TRUE(rc == 0 || errno == EEXIST); + int dfd = open(TmpFile("cap_notify"), O_RDONLY); + EXPECT_OK(dfd); + int fd = open(TmpFile("cap_notify/file"), O_CREAT|O_RDWR, 0644); + close(fd); + int cap_dfd = dup(dfd); + EXPECT_OK(cap_dfd); + EXPECT_OK(cap_rights_limit(cap_dfd, &r_rslstat)); + EXPECT_OK(cap_dfd); + int cap_dfd_rs = dup(dfd); + EXPECT_OK(cap_dfd_rs); + EXPECT_OK(cap_rights_limit(cap_dfd_rs, &r_rs)); + EXPECT_OK(cap_dfd_rs); + int cap_dfd_rsstat = dup(dfd); + EXPECT_OK(cap_dfd_rsstat); + EXPECT_OK(cap_rights_limit(cap_dfd_rsstat, &r_rsstat)); + EXPECT_OK(cap_dfd_rsstat); + int cap_dfd_rsl = dup(dfd); + EXPECT_OK(cap_dfd_rsl); + EXPECT_OK(cap_rights_limit(cap_dfd_rsl, &r_rsl)); + EXPECT_OK(cap_dfd_rsl); + + // Need CAP_NOTIFY to change what's monitored. + EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_ro, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd, NULL)); + EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_wo, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd, NULL)); + EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_rw, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd, NULL)); + EXPECT_OK(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd, NULL)); + + // Need CAP_FSTAT on the thing monitored. + EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd_rs, NULL)); + EXPECT_OK(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd_rsstat, NULL)); + + // Too add monitoring of a file under a dfd, need CAP_LOOKUP|CAP_FSTAT on the dfd. + EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY, cap_dfd_rsstat, "file")); + EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY, cap_dfd_rsl, "file")); + EXPECT_OK(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY, cap_dfd, "file")); + + pid_t child = fork(); + if (child == 0) { + // Child: Perform activity in the directory under notify. + sleep(1); + unlink(TmpFile("cap_notify/temp")); + int fd = open(TmpFile("cap_notify/temp"), O_CREAT|O_RDWR, 0644); + close(fd); + exit(0); + } + + // Need CAP_EVENT to poll. + struct pollfd poll_fd; + poll_fd.revents = 0; + poll_fd.events = POLLIN; + poll_fd.fd = cap_fd_rw; + EXPECT_OK(poll(&poll_fd, 1, 1400)); + EXPECT_EQ(0, (poll_fd.revents & POLLIN)); + EXPECT_NE(0, (poll_fd.revents & POLLNVAL)); + + poll_fd.fd = cap_fd_not; + EXPECT_OK(poll(&poll_fd, 1, 1400)); + EXPECT_EQ(0, (poll_fd.revents & POLLIN)); + EXPECT_NE(0, (poll_fd.revents & POLLNVAL)); + + poll_fd.fd = cap_fd_poll; + EXPECT_OK(poll(&poll_fd, 1, 1400)); + EXPECT_NE(0, (poll_fd.revents & POLLIN)); + EXPECT_EQ(0, (poll_fd.revents & POLLNVAL)); + + // Need CAP_READ to read. + struct fanotify_event_metadata ev; + memset(&ev, 0, sizeof(ev)); + EXPECT_NOTCAPABLE(read(cap_fd_wo, &ev, sizeof(ev))); + rc = read(fa_fd, &ev, sizeof(ev)); + EXPECT_OK(rc); + EXPECT_EQ((int)sizeof(struct fanotify_event_metadata), rc); + EXPECT_EQ(child, ev.pid); + EXPECT_NE(0, ev.fd); + + // TODO(drysdale): reinstate if/when capsicum-linux propagates rights + // to fanotify-generated FDs. +#ifdef OMIT + // fanotify(7) gives us a FD for the changed file. This should + // only have rights that are a subset of those for the original + // monitored directory file descriptor. + cap_rights_t rights; + CAP_SET_ALL(&rights); + EXPECT_OK(cap_rights_get(ev.fd, &rights)); + EXPECT_RIGHTS_IN(&rights, &r_rslstat); +#endif + + // Wait for the child. + int status; + EXPECT_EQ(child, waitpid(child, &status, 0)); + rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1; + EXPECT_EQ(0, rc); + + close(cap_dfd_rsstat); + close(cap_dfd_rsl); + close(cap_dfd_rs); + close(cap_dfd); + close(dfd); + unlink(TmpFile("cap_notify/file")); + unlink(TmpFile("cap_notify/temp")); + rmdir(TmpFile("cap_notify")); + close(cap_fd_not); + close(cap_fd_poll); + close(cap_fd_rw); + close(cap_fd_wo); + close(cap_fd_ro); + close(fa_fd); +} +#endif + +TEST(Linux, inotify) { + int i_fd = inotify_init(); + EXPECT_OK(i_fd); + + cap_rights_t r_rs; + cap_rights_init(&r_rs, CAP_READ, CAP_SEEK); + cap_rights_t r_ws; + cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK); + cap_rights_t r_rws; + cap_rights_init(&r_rws, CAP_READ, CAP_WRITE, CAP_SEEK); + cap_rights_t r_rwsnotify; + cap_rights_init(&r_rwsnotify, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_NOTIFY); + + int cap_fd_ro = dup(i_fd); + EXPECT_OK(cap_fd_ro); + EXPECT_OK(cap_rights_limit(cap_fd_ro, &r_rs)); + int cap_fd_wo = dup(i_fd); + EXPECT_OK(cap_fd_wo); + EXPECT_OK(cap_rights_limit(cap_fd_wo, &r_ws)); + int cap_fd_rw = dup(i_fd); + EXPECT_OK(cap_fd_rw); + EXPECT_OK(cap_rights_limit(cap_fd_rw, &r_rws)); + int cap_fd_all = dup(i_fd); + EXPECT_OK(cap_fd_all); + EXPECT_OK(cap_rights_limit(cap_fd_all, &r_rwsnotify)); + + int fd = open(TmpFile("cap_inotify"), O_CREAT|O_RDWR, 0644); + EXPECT_NOTCAPABLE(inotify_add_watch(cap_fd_rw, TmpFile("cap_inotify"), IN_ACCESS|IN_MODIFY)); + int wd = inotify_add_watch(i_fd, TmpFile("cap_inotify"), IN_ACCESS|IN_MODIFY); + EXPECT_OK(wd); + + unsigned char buffer[] = {1, 2, 3, 4}; + EXPECT_OK(write(fd, buffer, sizeof(buffer))); + + struct inotify_event iev; + memset(&iev, 0, sizeof(iev)); + EXPECT_NOTCAPABLE(read(cap_fd_wo, &iev, sizeof(iev))); + int rc = read(cap_fd_ro, &iev, sizeof(iev)); + EXPECT_OK(rc); + EXPECT_EQ((int)sizeof(iev), rc); + EXPECT_EQ(wd, iev.wd); + + EXPECT_NOTCAPABLE(inotify_rm_watch(cap_fd_wo, wd)); + EXPECT_OK(inotify_rm_watch(cap_fd_all, wd)); + + close(fd); + close(cap_fd_all); + close(cap_fd_rw); + close(cap_fd_wo); + close(cap_fd_ro); + close(i_fd); + unlink(TmpFile("cap_inotify")); +} + +TEST(Linux, ArchChange) { + const char* prog_candidates[] = {"./mini-me.32", "./mini-me.x32", "./mini-me.64"}; + const char* progs[] = {NULL, NULL, NULL}; + char* argv_pass[] = {(char*)"to-come", (char*)"--capmode", NULL}; + char* null_envp[] = {NULL}; + int fds[3]; + int count = 0; + + for (int ii = 0; ii < 3; ii++) { + fds[count] = open(prog_candidates[ii], O_RDONLY); + if (fds[count] >= 0) { + progs[count] = prog_candidates[ii]; + count++; + } + } + if (count == 0) { + TEST_SKIPPED("no different-architecture programs available"); + return; + } + + for (int ii = 0; ii < count; ii++) { + // Fork-and-exec a binary of this architecture. + pid_t child = fork(); + if (child == 0) { + EXPECT_OK(cap_enter()); // Enter capability mode + if (verbose) fprintf(stderr, "[%d] call fexecve(%s, %s)\n", + getpid_(), progs[ii], argv_pass[1]); + argv_pass[0] = (char *)progs[ii]; + int rc = fexecve_(fds[ii], argv_pass, null_envp); + fprintf(stderr, "fexecve(%s) returned %d errno %d\n", progs[ii], rc, errno); + exit(99); // Should not reach here. + } + int status; + EXPECT_EQ(child, waitpid(child, &status, 0)); + int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1; + EXPECT_EQ(0, rc); + close(fds[ii]); + } +} + +FORK_TEST(Linux, Namespace) { + REQUIRE_ROOT(); + pid_t me = getpid_(); + + // Create a new UTS namespace. + EXPECT_OK(unshare(CLONE_NEWUTS)); + // Open an FD to its symlink. + char buffer[256]; + sprintf(buffer, "/proc/%d/ns/uts", me); + int ns_fd = open(buffer, O_RDONLY); + + cap_rights_t r_rwlstat; + cap_rights_init(&r_rwlstat, CAP_READ, CAP_WRITE, CAP_LOOKUP, CAP_FSTAT); + cap_rights_t r_rwlstatns; + cap_rights_init(&r_rwlstatns, CAP_READ, CAP_WRITE, CAP_LOOKUP, CAP_FSTAT, CAP_SETNS); + + int cap_fd = dup(ns_fd); + EXPECT_OK(cap_fd); + EXPECT_OK(cap_rights_limit(cap_fd, &r_rwlstat)); + int cap_fd_setns = dup(ns_fd); + EXPECT_OK(cap_fd_setns); + EXPECT_OK(cap_rights_limit(cap_fd_setns, &r_rwlstatns)); + EXPECT_NOTCAPABLE(setns(cap_fd, CLONE_NEWUTS)); + EXPECT_OK(setns(cap_fd_setns, CLONE_NEWUTS)); + + EXPECT_OK(cap_enter()); // Enter capability mode. + + // No setns(2) but unshare(2) is allowed. + EXPECT_CAPMODE(setns(ns_fd, CLONE_NEWUTS)); + EXPECT_OK(unshare(CLONE_NEWUTS)); +} + +static void SendFD(int fd, int over) { + struct msghdr mh; + mh.msg_name = NULL; // No address needed + mh.msg_namelen = 0; + char buffer1[1024]; + struct iovec iov[1]; + iov[0].iov_base = buffer1; + iov[0].iov_len = sizeof(buffer1); + mh.msg_iov = iov; + mh.msg_iovlen = 1; + char buffer2[1024]; + mh.msg_control = buffer2; + mh.msg_controllen = CMSG_LEN(sizeof(int)); + struct cmsghdr *cmptr = CMSG_FIRSTHDR(&mh); + cmptr->cmsg_level = SOL_SOCKET; + cmptr->cmsg_type = SCM_RIGHTS; + cmptr->cmsg_len = CMSG_LEN(sizeof(int)); + *(int *)CMSG_DATA(cmptr) = fd; + buffer1[0] = 0; + iov[0].iov_len = 1; + int rc = sendmsg(over, &mh, 0); + EXPECT_OK(rc); +} + +static int ReceiveFD(int over) { + struct msghdr mh; + mh.msg_name = NULL; // No address needed + mh.msg_namelen = 0; + char buffer1[1024]; + struct iovec iov[1]; + iov[0].iov_base = buffer1; + iov[0].iov_len = sizeof(buffer1); + mh.msg_iov = iov; + mh.msg_iovlen = 1; + char buffer2[1024]; + mh.msg_control = buffer2; + mh.msg_controllen = sizeof(buffer2); + int rc = recvmsg(over, &mh, 0); + EXPECT_OK(rc); + EXPECT_LE(CMSG_LEN(sizeof(int)), mh.msg_controllen); + struct cmsghdr *cmptr = CMSG_FIRSTHDR(&mh); + int fd = *(int*)CMSG_DATA(cmptr); + EXPECT_EQ(CMSG_LEN(sizeof(int)), cmptr->cmsg_len); + cmptr = CMSG_NXTHDR(&mh, cmptr); + EXPECT_TRUE(cmptr == NULL); + return fd; +} + +static int shared_pd = -1; +static int shared_sock_fds[2]; + +static int ChildFunc(void *arg) { + // This function is running in a new PID namespace, and so is pid 1. + if (verbose) fprintf(stderr, " ChildFunc: pid=%d, ppid=%d\n", getpid_(), getppid()); + EXPECT_EQ(1, getpid_()); + EXPECT_EQ(0, getppid()); + + // The shared process descriptor is outside our namespace, so we cannot + // get its pid. + if (verbose) fprintf(stderr, " ChildFunc: shared_pd=%d\n", shared_pd); + pid_t shared_child = -1; + EXPECT_OK(pdgetpid(shared_pd, &shared_child)); + if (verbose) fprintf(stderr, " ChildFunc: corresponding pid=%d\n", shared_child); + EXPECT_EQ(0, shared_child); + + // But we can pdkill() it even so. + if (verbose) fprintf(stderr, " ChildFunc: call pdkill(pd=%d)\n", shared_pd); + EXPECT_OK(pdkill(shared_pd, SIGINT)); + + int pd; + pid_t child = pdfork(&pd, 0); + EXPECT_OK(child); + if (child == 0) { + // Child: expect pid 2. + if (verbose) fprintf(stderr, " child of ChildFunc: pid=%d, ppid=%d\n", getpid_(), getppid()); + EXPECT_EQ(2, getpid_()); + EXPECT_EQ(1, getppid()); + while (true) { + if (verbose) fprintf(stderr, " child of ChildFunc: \"I aten't dead\"\n"); + sleep(1); + } + exit(0); + } + EXPECT_EQ(2, child); + EXPECT_PID_ALIVE(child); + if (verbose) fprintf(stderr, " ChildFunc: pdfork() -> pd=%d, corresponding pid=%d state='%c'\n", + pd, child, ProcessState(child)); + + pid_t pid; + EXPECT_OK(pdgetpid(pd, &pid)); + EXPECT_EQ(child, pid); + + sleep(2); + + // Send the process descriptor over UNIX domain socket back to parent. + SendFD(pd, shared_sock_fds[1]); + + // Wait for death of (grand)child, killed by our parent. + if (verbose) fprintf(stderr, " ChildFunc: wait on pid=%d\n", child); + int status; + EXPECT_EQ(child, wait4(child, &status, __WALL, NULL)); + + if (verbose) fprintf(stderr, " ChildFunc: return 0\n"); + return 0; +} + +#define STACK_SIZE (1024 * 1024) +static char child_stack[STACK_SIZE]; + +// TODO(drysdale): fork into a user namespace first so REQUIRE_ROOT can be removed. +TEST(Linux, PidNamespacePdFork) { + REQUIRE_ROOT(); + // Pass process descriptors in both directions across a PID namespace boundary. + // pdfork() off a child before we start, holding its process descriptor in a global + // variable that's accessible to children. + pid_t firstborn = pdfork(&shared_pd, 0); + EXPECT_OK(firstborn); + if (firstborn == 0) { + while (true) { + if (verbose) fprintf(stderr, " Firstborn: \"I aten't dead\"\n"); + sleep(1); + } + exit(0); + } + EXPECT_PID_ALIVE(firstborn); + if (verbose) fprintf(stderr, "Parent: pre-pdfork()ed pd=%d, pid=%d state='%c'\n", + shared_pd, firstborn, ProcessState(firstborn)); + sleep(2); + + // Prepare sockets to communicate with child process. + EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, shared_sock_fds)); + + // Clone into a child process with a new pid namespace. + pid_t child = clone(ChildFunc, child_stack + STACK_SIZE, + CLONE_FILES|CLONE_NEWPID|SIGCHLD, NULL); + EXPECT_OK(child); + EXPECT_PID_ALIVE(child); + if (verbose) fprintf(stderr, "Parent: child is %d state='%c'\n", child, ProcessState(child)); + + // Ensure the child runs. First thing it does is to kill our firstborn, using shared_pd. + sleep(1); + EXPECT_PID_DEAD(firstborn); + + // But we can still retrieve firstborn's PID, as it's not been reaped yet. + pid_t child0; + EXPECT_OK(pdgetpid(shared_pd, &child0)); + EXPECT_EQ(firstborn, child0); + if (verbose) fprintf(stderr, "Parent: check on firstborn: pdgetpid(pd=%d) -> child=%d state='%c'\n", + shared_pd, child0, ProcessState(child0)); + + // Now reap it. + int status; + EXPECT_EQ(firstborn, waitpid(firstborn, &status, __WALL)); + + // Get the process descriptor of the child-of-child via socket transfer. + int grandchild_pd = ReceiveFD(shared_sock_fds[0]); + + // Our notion of the pid associated with the grandchild is in the main PID namespace. + pid_t grandchild; + EXPECT_OK(pdgetpid(grandchild_pd, &grandchild)); + EXPECT_NE(2, grandchild); + if (verbose) fprintf(stderr, "Parent: pre-pdkill: pdgetpid(grandchild_pd=%d) -> grandchild=%d state='%c'\n", + grandchild_pd, grandchild, ProcessState(grandchild)); + EXPECT_PID_ALIVE(grandchild); + + // Kill the grandchild via the process descriptor. + EXPECT_OK(pdkill(grandchild_pd, SIGINT)); + usleep(10000); + if (verbose) fprintf(stderr, "Parent: post-pdkill: pdgetpid(grandchild_pd=%d) -> grandchild=%d state='%c'\n", + grandchild_pd, grandchild, ProcessState(grandchild)); + EXPECT_PID_DEAD(grandchild); + + sleep(2); + + // Wait for the child. + EXPECT_EQ(child, waitpid(child, &status, WNOHANG)); + int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1; + EXPECT_EQ(0, rc); + + close(shared_sock_fds[0]); + close(shared_sock_fds[1]); + close(shared_pd); + close(grandchild_pd); +} + +int NSInit(void *data) { + // This function is running in a new PID namespace, and so is pid 1. + if (verbose) fprintf(stderr, " NSInit: pid=%d, ppid=%d\n", getpid_(), getppid()); + EXPECT_EQ(1, getpid_()); + EXPECT_EQ(0, getppid()); + + int pd; + pid_t child = pdfork(&pd, 0); + EXPECT_OK(child); + if (child == 0) { + // Child: loop forever until terminated. + if (verbose) fprintf(stderr, " child of NSInit: pid=%d, ppid=%d\n", getpid_(), getppid()); + while (true) { + if (verbose) fprintf(stderr, " child of NSInit: \"I aten't dead\"\n"); + usleep(100000); + } + exit(0); + } + EXPECT_EQ(2, child); + EXPECT_PID_ALIVE(child); + if (verbose) fprintf(stderr, " NSInit: pdfork() -> pd=%d, corresponding pid=%d state='%c'\n", + pd, child, ProcessState(child)); + sleep(1); + + // Send the process descriptor over UNIX domain socket back to parent. + SendFD(pd, shared_sock_fds[1]); + close(pd); + + // Wait for a byte back in the other direction. + int value; + if (verbose) fprintf(stderr, " NSInit: block waiting for value\n"); + read(shared_sock_fds[1], &value, sizeof(value)); + + if (verbose) fprintf(stderr, " NSInit: return 0\n"); + return 0; +} + +TEST(Linux, DeadNSInit) { + REQUIRE_ROOT(); + + // Prepare sockets to communicate with child process. + EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, shared_sock_fds)); + + // Clone into a child process with a new pid namespace. + pid_t child = clone(NSInit, child_stack + STACK_SIZE, + CLONE_FILES|CLONE_NEWPID|SIGCHLD, NULL); + usleep(10000); + EXPECT_OK(child); + EXPECT_PID_ALIVE(child); + if (verbose) fprintf(stderr, "Parent: child is %d state='%c'\n", child, ProcessState(child)); + + // Get the process descriptor of the child-of-child via socket transfer. + int grandchild_pd = ReceiveFD(shared_sock_fds[0]); + pid_t grandchild; + EXPECT_OK(pdgetpid(grandchild_pd, &grandchild)); + if (verbose) fprintf(stderr, "Parent: grandchild is %d state='%c'\n", grandchild, ProcessState(grandchild)); + + // Send an int to the child to trigger its termination. Grandchild should also + // go, as its init process is gone. + int zero = 0; + if (verbose) fprintf(stderr, "Parent: write 0 to pipe\n"); + write(shared_sock_fds[0], &zero, sizeof(zero)); + EXPECT_PID_ZOMBIE(child); + EXPECT_PID_GONE(grandchild); + + // Wait for the child. + int status; + EXPECT_EQ(child, waitpid(child, &status, WNOHANG)); + int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1; + EXPECT_EQ(0, rc); + EXPECT_PID_GONE(child); + + close(shared_sock_fds[0]); + close(shared_sock_fds[1]); + close(grandchild_pd); + + if (verbose) { + fprintf(stderr, "Parent: child %d in state='%c'\n", child, ProcessState(child)); + fprintf(stderr, "Parent: grandchild %d in state='%c'\n", grandchild, ProcessState(grandchild)); + } +} + +TEST(Linux, DeadNSInit2) { + REQUIRE_ROOT(); + + // Prepare sockets to communicate with child process. + EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, shared_sock_fds)); + + // Clone into a child process with a new pid namespace. + pid_t child = clone(NSInit, child_stack + STACK_SIZE, + CLONE_FILES|CLONE_NEWPID|SIGCHLD, NULL); + usleep(10000); + EXPECT_OK(child); + EXPECT_PID_ALIVE(child); + if (verbose) fprintf(stderr, "Parent: child is %d state='%c'\n", child, ProcessState(child)); + + // Get the process descriptor of the child-of-child via socket transfer. + int grandchild_pd = ReceiveFD(shared_sock_fds[0]); + pid_t grandchild; + EXPECT_OK(pdgetpid(grandchild_pd, &grandchild)); + if (verbose) fprintf(stderr, "Parent: grandchild is %d state='%c'\n", grandchild, ProcessState(grandchild)); + + // Kill the grandchild + EXPECT_OK(pdkill(grandchild_pd, SIGINT)); + usleep(10000); + EXPECT_PID_ZOMBIE(grandchild); + // Close the process descriptor, so there are now no procdesc references to grandchild. + close(grandchild_pd); + + // Send an int to the child to trigger its termination. Grandchild should also + // go, as its init process is gone. + int zero = 0; + if (verbose) fprintf(stderr, "Parent: write 0 to pipe\n"); + write(shared_sock_fds[0], &zero, sizeof(zero)); + EXPECT_PID_ZOMBIE(child); + EXPECT_PID_GONE(grandchild); + + // Wait for the child. + int status; + EXPECT_EQ(child, waitpid(child, &status, WNOHANG)); + int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1; + EXPECT_EQ(0, rc); + + close(shared_sock_fds[0]); + close(shared_sock_fds[1]); + + if (verbose) { + fprintf(stderr, "Parent: child %d in state='%c'\n", child, ProcessState(child)); + fprintf(stderr, "Parent: grandchild %d in state='%c'\n", grandchild, ProcessState(grandchild)); + } +} + +#ifdef __x86_64__ +FORK_TEST(Linux, CheckHighWord) { + EXPECT_OK(cap_enter()); // Enter capability mode. + + int rc = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0); + EXPECT_OK(rc); + EXPECT_EQ(1, rc); // no_new_privs = 1 + + // Set some of the high 32-bits of argument zero. + uint64_t big_cmd = PR_GET_NO_NEW_PRIVS | 0x100000000LL; + EXPECT_CAPMODE(syscall(__NR_prctl, big_cmd, 0, 0, 0, 0)); +} +#endif + +FORK_TEST(Linux, PrctlOpenatBeneath) { + // Set no_new_privs = 1 + EXPECT_OK(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); + int rc = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0); + EXPECT_OK(rc); + EXPECT_EQ(1, rc); // no_new_privs = 1 + + // Set openat-beneath mode + EXPECT_OK(prctl(PR_SET_OPENAT_BENEATH, 1, 0, 0, 0)); + rc = prctl(PR_GET_OPENAT_BENEATH, 0, 0, 0, 0); + EXPECT_OK(rc); + EXPECT_EQ(1, rc); // openat_beneath = 1 + + // Clear openat-beneath mode + EXPECT_OK(prctl(PR_SET_OPENAT_BENEATH, 0, 0, 0, 0)); + rc = prctl(PR_GET_OPENAT_BENEATH, 0, 0, 0, 0); + EXPECT_OK(rc); + EXPECT_EQ(0, rc); // openat_beneath = 0 + + EXPECT_OK(cap_enter()); // Enter capability mode + + // Expect to be in openat_beneath mode + rc = prctl(PR_GET_OPENAT_BENEATH, 0, 0, 0, 0); + EXPECT_OK(rc); + EXPECT_EQ(1, rc); // openat_beneath = 1 + + // Expect this to be immutable. + EXPECT_CAPMODE(prctl(PR_SET_OPENAT_BENEATH, 0, 0, 0, 0)); + rc = prctl(PR_GET_OPENAT_BENEATH, 0, 0, 0, 0); + EXPECT_OK(rc); + EXPECT_EQ(1, rc); // openat_beneath = 1 + +} + +FORK_TEST(Linux, NoNewPrivs) { + if (getuid() == 0) { + // If root, drop CAP_SYS_ADMIN POSIX.1e capability. + struct __user_cap_header_struct hdr; + hdr.version = _LINUX_CAPABILITY_VERSION_3; + hdr.pid = getpid_(); + struct __user_cap_data_struct data[3]; + EXPECT_OK(capget(&hdr, &data[0])); + data[0].effective &= ~(1 << CAP_SYS_ADMIN); + data[0].permitted &= ~(1 << CAP_SYS_ADMIN); + data[0].inheritable &= ~(1 << CAP_SYS_ADMIN); + EXPECT_OK(capset(&hdr, &data[0])); + } + int rc = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0); + EXPECT_OK(rc); + EXPECT_EQ(0, rc); // no_new_privs == 0 + + // Can't enter seccomp-bpf mode with no_new_privs == 0 + struct sock_filter filter[] = { + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW) + }; + struct sock_fprog bpf; + bpf.len = (sizeof(filter) / sizeof(filter[0])); + bpf.filter = filter; + rc = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bpf, 0, 0); + EXPECT_EQ(-1, rc); + EXPECT_EQ(EACCES, errno); + + // Set no_new_privs = 1 + EXPECT_OK(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); + rc = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0); + EXPECT_OK(rc); + EXPECT_EQ(1, rc); // no_new_privs = 1 + + // Can now turn on seccomp mode + EXPECT_OK(prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bpf, 0, 0)); +} + +/* Macros for BPF generation */ +#define BPF_RETURN_ERRNO(err) \ + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO | (err & 0xFFFF)) +#define BPF_KILL_PROCESS \ + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL) +#define BPF_ALLOW \ + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW) +#define EXAMINE_SYSCALL \ + BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct seccomp_data, nr)) +#define ALLOW_SYSCALL(name) \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_##name, 0, 1), \ + BPF_ALLOW +#define KILL_SYSCALL(name) \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_##name, 0, 1), \ + BPF_KILL_PROCESS +#define FAIL_SYSCALL(name, err) \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_##name, 0, 1), \ + BPF_RETURN_ERRNO(err) + +TEST(Linux, CapModeWithBPF) { + pid_t child = fork(); + EXPECT_OK(child); + if (child == 0) { + int fd = open(TmpFile("cap_bpf_capmode"), O_CREAT|O_RDWR, 0644); + cap_rights_t rights; + cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_FSYNC); + EXPECT_OK(cap_rights_limit(fd, &rights)); + + struct sock_filter filter[] = { EXAMINE_SYSCALL, + FAIL_SYSCALL(fchmod, ENOMEM), + FAIL_SYSCALL(fstat, ENOEXEC), + ALLOW_SYSCALL(close), + KILL_SYSCALL(fsync), + BPF_ALLOW }; + struct sock_fprog bpf = {.len = (sizeof(filter) / sizeof(filter[0])), + .filter = filter}; + // Set up seccomp-bpf first. + EXPECT_OK(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); + EXPECT_OK(prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bpf, 0, 0)); + + EXPECT_OK(cap_enter()); // Enter capability mode. + + // fchmod is allowed by Capsicum, but failed by BPF. + EXPECT_SYSCALL_FAIL(ENOMEM, fchmod(fd, 0644)); + // open is allowed by BPF, but failed by Capsicum + EXPECT_SYSCALL_FAIL(ECAPMODE, open(TmpFile("cap_bpf_capmode"), O_RDONLY)); + // fstat is failed by both BPF and Capsicum; tie-break is on errno + struct stat buf; + EXPECT_SYSCALL_FAIL(ENOEXEC, fstat(fd, &buf)); + // fsync is allowed by Capsicum, but BPF's SIGSYS generation take precedence + fsync(fd); // terminate with unhandled SIGSYS + exit(0); + } + int status; + EXPECT_EQ(child, waitpid(child, &status, 0)); + EXPECT_TRUE(WIFSIGNALED(status)); + EXPECT_EQ(SIGSYS, WTERMSIG(status)); + unlink(TmpFile("cap_bpf_capmode")); +} + +TEST(Linux, AIO) { + int fd = open(TmpFile("cap_aio"), O_CREAT|O_RDWR, 0644); + EXPECT_OK(fd); + + cap_rights_t r_rs; + cap_rights_init(&r_rs, CAP_READ, CAP_SEEK); + cap_rights_t r_ws; + cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK); + cap_rights_t r_rwssync; + cap_rights_init(&r_rwssync, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_FSYNC); + + int cap_ro = dup(fd); + EXPECT_OK(cap_ro); + EXPECT_OK(cap_rights_limit(cap_ro, &r_rs)); + EXPECT_OK(cap_ro); + int cap_wo = dup(fd); + EXPECT_OK(cap_wo); + EXPECT_OK(cap_rights_limit(cap_wo, &r_ws)); + EXPECT_OK(cap_wo); + int cap_all = dup(fd); + EXPECT_OK(cap_all); + EXPECT_OK(cap_rights_limit(cap_all, &r_rwssync)); + EXPECT_OK(cap_all); + + // Linux: io_setup, io_submit, io_getevents, io_cancel, io_destroy + aio_context_t ctx = 0; + EXPECT_OK(syscall(__NR_io_setup, 10, &ctx)); + + unsigned char buffer[32] = {1, 2, 3, 4}; + struct iocb req; + memset(&req, 0, sizeof(req)); + req.aio_reqprio = 0; + req.aio_fildes = fd; + uintptr_t bufaddr = (uintptr_t)buffer; + req.aio_buf = (__u64)bufaddr; + req.aio_nbytes = 4; + req.aio_offset = 0; + struct iocb* reqs[1] = {&req}; + + // Write operation + req.aio_lio_opcode = IOCB_CMD_PWRITE; + req.aio_fildes = cap_ro; + EXPECT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs)); + req.aio_fildes = cap_wo; + EXPECT_OK(syscall(__NR_io_submit, ctx, 1, reqs)); + + // Sync operation + req.aio_lio_opcode = IOCB_CMD_FSYNC; + EXPECT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs)); + req.aio_lio_opcode = IOCB_CMD_FDSYNC; + EXPECT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs)); + // Even with CAP_FSYNC, turns out fsync/fdsync aren't implemented + req.aio_fildes = cap_all; + EXPECT_FAIL_NOT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs)); + req.aio_lio_opcode = IOCB_CMD_FSYNC; + EXPECT_FAIL_NOT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs)); + + // Read operation + req.aio_lio_opcode = IOCB_CMD_PREAD; + req.aio_fildes = cap_wo; + EXPECT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs)); + req.aio_fildes = cap_ro; + EXPECT_OK(syscall(__NR_io_submit, ctx, 1, reqs)); + + EXPECT_OK(syscall(__NR_io_destroy, ctx)); + + close(cap_all); + close(cap_wo); + close(cap_ro); + close(fd); + unlink(TmpFile("cap_aio")); +} + +#ifndef KCMP_FILE +#define KCMP_FILE 0 +#endif +TEST(Linux, Kcmp) { + // This requires CONFIG_CHECKPOINT_RESTORE in kernel config. + int fd = open("/etc/passwd", O_RDONLY); + EXPECT_OK(fd); + pid_t parent = getpid_(); + + errno = 0; + int rc = syscall(__NR_kcmp, parent, parent, KCMP_FILE, fd, fd); + if (rc == -1 && errno == ENOSYS) { + TEST_SKIPPED("kcmp(2) gives -ENOSYS"); + return; + } + + pid_t child = fork(); + if (child == 0) { + // Child: limit rights on FD. + child = getpid_(); + EXPECT_OK(syscall(__NR_kcmp, parent, child, KCMP_FILE, fd, fd)); + cap_rights_t rights; + cap_rights_init(&rights, CAP_READ, CAP_WRITE); + EXPECT_OK(cap_rights_limit(fd, &rights)); + // A capability wrapping a normal FD is different (from a kcmp(2) perspective) + // than the original file. + EXPECT_NE(0, syscall(__NR_kcmp, parent, child, KCMP_FILE, fd, fd)); + exit(HasFailure()); + } + // Wait for the child. + int status; + EXPECT_EQ(child, waitpid(child, &status, 0)); + rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1; + EXPECT_EQ(0, rc); + + close(fd); +} + +TEST(Linux, ProcFS) { + cap_rights_t rights; + cap_rights_init(&rights, CAP_READ, CAP_SEEK); + int fd = open("/etc/passwd", O_RDONLY); + EXPECT_OK(fd); + lseek(fd, 4, SEEK_SET); + int cap = dup(fd); + EXPECT_OK(cap); + EXPECT_OK(cap_rights_limit(cap, &rights)); + pid_t me = getpid_(); + + char buffer[1024]; + sprintf(buffer, "/proc/%d/fdinfo/%d", me, cap); + int procfd = open(buffer, O_RDONLY); + EXPECT_OK(procfd) << " failed to open " << buffer; + if (procfd < 0) return; + int proccap = dup(procfd); + EXPECT_OK(proccap); + EXPECT_OK(cap_rights_limit(proccap, &rights)); + + EXPECT_OK(read(proccap, buffer, sizeof(buffer))); + // The fdinfo should include the file pos of the underlying file + EXPECT_NE((char*)NULL, strstr(buffer, "pos:\t4")); + // ...and the rights of the Capsicum capability. + EXPECT_NE((char*)NULL, strstr(buffer, "rights:\t0x")); + + close(procfd); + close(proccap); + close(cap); + close(fd); +} + +FORK_TEST(Linux, ProcessClocks) { + pid_t self = getpid_(); + pid_t child = fork(); + EXPECT_OK(child); + if (child == 0) { + child = getpid_(); + usleep(100000); + exit(0); + } + + EXPECT_OK(cap_enter()); // Enter capability mode. + + // Nefariously build a clock ID for the child's CPU time. + // This relies on knowledge of the internal layout of clock IDs. + clockid_t child_clock; + child_clock = ((~child) << 3) | 0x0; + struct timespec ts; + memset(&ts, 0, sizeof(ts)); + + // TODO(drysdale): Should not be possible to retrieve info about a + // different process, as the PID global namespace should be locked + // down. + EXPECT_OK(clock_gettime(child_clock, &ts)); + if (verbose) fprintf(stderr, "[parent: %d] clock_gettime(child=%d->0x%08x) is %ld.%09ld \n", + self, child, child_clock, (long)ts.tv_sec, (long)ts.tv_nsec); + + child_clock = ((~1) << 3) | 0x0; + memset(&ts, 0, sizeof(ts)); + EXPECT_OK(clock_gettime(child_clock, &ts)); + if (verbose) fprintf(stderr, "[parent: %d] clock_gettime(init=1->0x%08x) is %ld.%09ld \n", + self, child_clock, (long)ts.tv_sec, (long)ts.tv_nsec); + + // Orphan the child. +} + +TEST(Linux, SetLease) { + int fd_all = open(TmpFile("cap_lease"), O_CREAT|O_RDWR, 0644); + EXPECT_OK(fd_all); + int fd_rw = dup(fd_all); + EXPECT_OK(fd_rw); + + cap_rights_t r_all; + cap_rights_init(&r_all, CAP_READ, CAP_WRITE, CAP_FLOCK, CAP_FSIGNAL); + EXPECT_OK(cap_rights_limit(fd_all, &r_all)); + + cap_rights_t r_rw; + cap_rights_init(&r_rw, CAP_READ, CAP_WRITE); + EXPECT_OK(cap_rights_limit(fd_rw, &r_rw)); + + EXPECT_NOTCAPABLE(fcntl(fd_rw, F_SETLEASE, F_WRLCK)); + EXPECT_NOTCAPABLE(fcntl(fd_rw, F_GETLEASE)); + + if (!tmpdir_on_tmpfs) { // tmpfs doesn't support leases + EXPECT_OK(fcntl(fd_all, F_SETLEASE, F_WRLCK)); + EXPECT_EQ(F_WRLCK, fcntl(fd_all, F_GETLEASE)); + + EXPECT_OK(fcntl(fd_all, F_SETLEASE, F_UNLCK, 0)); + EXPECT_EQ(F_UNLCK, fcntl(fd_all, F_GETLEASE)); + } + close(fd_all); + close(fd_rw); + unlink(TmpFile("cap_lease")); +} + +TEST(Linux, InvalidRightsSyscall) { + int fd = open(TmpFile("cap_invalid_rights"), O_RDONLY|O_CREAT, 0644); + EXPECT_OK(fd); + + cap_rights_t rights; + cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_FCHMOD, CAP_FSTAT); + + // Use the raw syscall throughout. + EXPECT_EQ(0, syscall(__NR_cap_rights_limit, fd, &rights, 0, 0, NULL, 0)); + + // Directly access the syscall, and find all unseemly manner of use for it. + // - Invalid flags + EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, 0, NULL, 1)); + EXPECT_EQ(EINVAL, errno); + // - Specify an fcntl subright, but no CAP_FCNTL set + EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, CAP_FCNTL_GETFL, 0, NULL, 0)); + EXPECT_EQ(EINVAL, errno); + // - Specify an ioctl subright, but no CAP_IOCTL set + unsigned int ioctl1 = 1; + EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, 1, &ioctl1, 0)); + EXPECT_EQ(EINVAL, errno); + // - N ioctls, but null pointer passed + EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, 1, NULL, 0)); + EXPECT_EQ(EINVAL, errno); + // - Invalid nioctls + EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, -2, NULL, 0)); + EXPECT_EQ(EINVAL, errno); + // - Null primary rights + EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, NULL, 0, 0, NULL, 0)); + EXPECT_EQ(EFAULT, errno); + // - Invalid index bitmask + rights.cr_rights[0] |= 3ULL << 57; + EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, 0, NULL, 0)); + EXPECT_EQ(EINVAL, errno); + // - Invalid version + rights.cr_rights[0] |= 2ULL << 62; + EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, 0, NULL, 0)); + EXPECT_EQ(EINVAL, errno); + + close(fd); + unlink(TmpFile("cap_invalid_rights")); +} + +FORK_TEST_ON(Linux, OpenByHandleAt, TmpFile("cap_openbyhandle_testfile")) { + REQUIRE_ROOT(); + int dir = open(tmpdir.c_str(), O_RDONLY); + EXPECT_OK(dir); + int fd = openat(dir, "cap_openbyhandle_testfile", O_RDWR|O_CREAT, 0644); + EXPECT_OK(fd); + const char* message = "Saved text"; + EXPECT_OK(write(fd, message, strlen(message))); + close(fd); + + struct file_handle* fhandle = (struct file_handle*)malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); + fhandle->handle_bytes = MAX_HANDLE_SZ; + int mount_id; + EXPECT_OK(name_to_handle_at(dir, "cap_openbyhandle_testfile", fhandle, &mount_id, 0)); + + fd = open_by_handle_at(dir, fhandle, O_RDONLY); + EXPECT_OK(fd); + char buffer[200]; + EXPECT_OK(read(fd, buffer, 199)); + EXPECT_EQ(std::string(message), std::string(buffer)); + close(fd); + + // Cannot issue open_by_handle_at after entering capability mode. + cap_enter(); + EXPECT_CAPMODE(open_by_handle_at(dir, fhandle, O_RDONLY)); + + close(dir); +} + +int getrandom_(void *buf, size_t buflen, unsigned int flags) { +#ifdef __NR_getrandom + return syscall(__NR_getrandom, buf, buflen, flags); +#else + errno = ENOSYS; + return -1; +#endif +} + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0) +#include // Requires 3.17 kernel +FORK_TEST(Linux, GetRandom) { + EXPECT_OK(cap_enter()); + unsigned char buffer[1024]; + unsigned char buffer2[1024]; + EXPECT_OK(getrandom_(buffer, sizeof(buffer), GRND_NONBLOCK)); + EXPECT_OK(getrandom_(buffer2, sizeof(buffer2), GRND_NONBLOCK)); + EXPECT_NE(0, memcmp(buffer, buffer2, sizeof(buffer))); +} +#endif + +int memfd_create_(const char *name, unsigned int flags) { +#ifdef __NR_memfd_create + return syscall(__NR_memfd_create, name, flags); +#else + errno = ENOSYS; + return -1; +#endif +} + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0) +#include // Requires 3.17 kernel +TEST(Linux, MemFDDeathTest) { + int memfd = memfd_create_("capsicum-test", MFD_ALLOW_SEALING); + if (memfd == -1 && errno == ENOSYS) { + TEST_SKIPPED("memfd_create(2) gives -ENOSYS"); + return; + } + const int LEN = 16; + EXPECT_OK(ftruncate(memfd, LEN)); + int memfd_ro = dup(memfd); + int memfd_rw = dup(memfd); + EXPECT_OK(memfd_ro); + EXPECT_OK(memfd_rw); + cap_rights_t rights; + EXPECT_OK(cap_rights_limit(memfd_ro, cap_rights_init(&rights, CAP_MMAP_R, CAP_FSTAT))); + EXPECT_OK(cap_rights_limit(memfd_rw, cap_rights_init(&rights, CAP_MMAP_RW, CAP_FCHMOD))); + + unsigned char *p_ro = (unsigned char *)mmap(NULL, LEN, PROT_READ, MAP_SHARED, memfd_ro, 0); + EXPECT_NE((unsigned char *)MAP_FAILED, p_ro); + unsigned char *p_rw = (unsigned char *)mmap(NULL, LEN, PROT_READ|PROT_WRITE, MAP_SHARED, memfd_rw, 0); + EXPECT_NE((unsigned char *)MAP_FAILED, p_rw); + EXPECT_EQ(MAP_FAILED, + mmap(NULL, LEN, PROT_READ|PROT_WRITE, MAP_SHARED, memfd_ro, 0)); + + *p_rw = 42; + EXPECT_EQ(42, *p_ro); + EXPECT_DEATH(*p_ro = 42, ""); + +#ifndef F_ADD_SEALS + // Hack for when libc6 does not yet include the updated linux/fcntl.h from kernel 3.17 +#define _F_LINUX_SPECIFIC_BASE F_SETLEASE +#define F_ADD_SEALS (_F_LINUX_SPECIFIC_BASE + 9) +#define F_GET_SEALS (_F_LINUX_SPECIFIC_BASE + 10) +#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */ +#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ +#define F_SEAL_GROW 0x0004 /* prevent file from growing */ +#define F_SEAL_WRITE 0x0008 /* prevent writes */ +#endif + + // Reading the seal information requires CAP_FSTAT. + int seals = fcntl(memfd, F_GET_SEALS); + EXPECT_OK(seals); + if (verbose) fprintf(stderr, "seals are %08x on base fd\n", seals); + int seals_ro = fcntl(memfd_ro, F_GET_SEALS); + EXPECT_EQ(seals, seals_ro); + if (verbose) fprintf(stderr, "seals are %08x on read-only fd\n", seals_ro); + int seals_rw = fcntl(memfd_rw, F_GET_SEALS); + EXPECT_NOTCAPABLE(seals_rw); + + // Fail to seal as a writable mapping exists. + EXPECT_EQ(-1, fcntl(memfd_rw, F_ADD_SEALS, F_SEAL_WRITE)); + EXPECT_EQ(EBUSY, errno); + *p_rw = 42; + + // Seal the rw version; need to unmap first. + munmap(p_rw, LEN); + munmap(p_ro, LEN); + EXPECT_OK(fcntl(memfd_rw, F_ADD_SEALS, F_SEAL_WRITE)); + + seals = fcntl(memfd, F_GET_SEALS); + EXPECT_OK(seals); + if (verbose) fprintf(stderr, "seals are %08x on base fd\n", seals); + seals_ro = fcntl(memfd_ro, F_GET_SEALS); + EXPECT_EQ(seals, seals_ro); + if (verbose) fprintf(stderr, "seals are %08x on read-only fd\n", seals_ro); + + // Remove the CAP_FCHMOD right, can no longer add seals. + EXPECT_OK(cap_rights_limit(memfd_rw, cap_rights_init(&rights, CAP_MMAP_RW))); + EXPECT_NOTCAPABLE(fcntl(memfd_rw, F_ADD_SEALS, F_SEAL_WRITE)); + + close(memfd); + close(memfd_ro); + close(memfd_rw); +} +#endif + +#else +void noop() {} +#endif Index: head/contrib/capsicum-test/makefile =================================================================== --- head/contrib/capsicum-test/makefile +++ head/contrib/capsicum-test/makefile @@ -0,0 +1,36 @@ +all: capsicum-test smoketest mini-me mini-me.noexec mini-me.setuid $(EXTRA_PROGS) +OBJECTS=capsicum-test-main.o capsicum-test.o capability-fd.o fexecve.o procdesc.o capmode.o fcntl.o ioctl.o openat.o sysctl.o select.o mqueue.o socket.o sctp.o capability-fd-pair.o linux.o overhead.o rename.o + +GTEST_DIR=gtest-1.8.1 +GTEST_INCS=-I$(GTEST_DIR)/include -I$(GTEST_DIR) +GTEST_FLAGS=-DGTEST_USE_OWN_TR1_TUPLE=1 -DGTEST_HAS_TR1_TUPLE=1 +CXXFLAGS+=$(ARCHFLAG) -Wall -g $(GTEST_INCS) $(GTEST_FLAGS) --std=c++11 +CFLAGS+=$(ARCHFLAG) -Wall -g + +capsicum-test: $(OBJECTS) libgtest.a $(LOCAL_LIBS) + $(CXX) $(CXXFLAGS) -g -o $@ $(OBJECTS) libgtest.a -lpthread -lrt $(LIBSCTP) $(LIBCAPRIGHTS) + +# Small statically-linked program for fexecve tests +# (needs to be statically linked so that execve()ing it +# doesn't involve ld.so traversing the filesystem). +mini-me: mini-me.c + $(CC) $(CFLAGS) -static -o $@ $< +mini-me.noexec: mini-me + cp mini-me $@ && chmod -x $@ +mini-me.setuid: mini-me + rm -f $@ && cp mini-me $@&& sudo chown root $@ && sudo chmod u+s $@ + +# Simple C test of Capsicum syscalls +SMOKETEST_OBJECTS=smoketest.o +smoketest: $(SMOKETEST_OBJECTS) $(LOCAL_LIBS) + $(CC) $(CFLAGS) -o $@ $(SMOKETEST_OBJECTS) $(LIBCAPRIGHTS) + +test: capsicum-test mini-me mini-me.noexec mini-me.setuid $(EXTRA_PROGS) + ./capsicum-test +gtest-all.o: + $(CXX) $(ARCHFLAG) -I$(GTEST_DIR)/include -I$(GTEST_DIR) $(GTEST_FLAGS) -c ${GTEST_DIR}/src/gtest-all.cc +libgtest.a: gtest-all.o + $(AR) -rv libgtest.a gtest-all.o + +clean: + rm -rf gtest-all.o libgtest.a capsicum-test mini-me mini-me.noexec smoketest $(SMOKETEST_OBJECTS) $(OBJECTS) $(LOCAL_CLEAN) $(EXTRA_PROGS) Index: head/contrib/capsicum-test/mini-me.c =================================================================== --- head/contrib/capsicum-test/mini-me.c +++ head/contrib/capsicum-test/mini-me.c @@ -0,0 +1,38 @@ +#include +#include +#include +#include +#include + +int main(int argc, char* argv[]) { + if (argc == 2 && !strcmp(argv[1], "--pass")) { + fprintf(stderr,"[%d] %s immediately returning 0\n", getpid(), argv[0]); + return 0; + } + + if (argc == 2 && !strcmp(argv[1], "--fail")) { + fprintf(stderr,"[%d] %s immediately returning 1\n", getpid(), argv[0]); + return 1; + } + + if (argc == 2 && !strcmp(argv[1], "--checkroot")) { + int rc = (geteuid() == 0); + fprintf(stderr,"[uid:%d] %s immediately returning (geteuid() == 0) = %d\n", geteuid(), argv[0], rc); + return rc; + } + + if (argc == 2 && !strcmp(argv[1], "--capmode")) { + /* Expect to already be in capability mode: check we can't open a file */ + int rc = 0; + + int fd = open("/etc/passwd", O_RDONLY); + if (fd > 0) { + fprintf(stderr,"[%d] %s unexpectedly able to open file\n", getpid(), argv[0]); + rc = 1; + } + fprintf(stderr,"[%d] %s --capmode returning %d\n", getpid(), argv[0], rc); + return rc; + } + + return -1; +} Index: head/contrib/capsicum-test/mqueue.cc =================================================================== --- head/contrib/capsicum-test/mqueue.cc +++ head/contrib/capsicum-test/mqueue.cc @@ -0,0 +1,100 @@ +// Tests for POSIX message queue functionality. + +#include +#include +#include +#include + +#include + +#include "capsicum.h" +#include "syscalls.h" +#include "capsicum-test.h" + +// Run a test case in a forked process, possibly cleaning up a +// message after completion +#define FORK_TEST_ON_MQ(test_case_name, test_name, test_mq) \ + static void test_case_name##_##test_name##_ForkTest(); \ + TEST(test_case_name, test_name ## Forked) { \ + _RUN_FORKED_FN(test_case_name##_##test_name##_ForkTest, \ + #test_case_name, #test_name); \ + const char *mqname = test_mq; \ + if (mqname) mq_unlink_(mqname); \ + } \ + static void test_case_name##_##test_name##_ForkTest() + +static bool invoked; +void seen_it_done_it(int) { + invoked = true; +} + +FORK_TEST_ON_MQ(PosixMqueue, CapMode, "/cap_mq") { + int mq = mq_open_("/cap_mq", O_RDWR|O_CREAT, 0644, NULL); + // On FreeBSD, turn on message queue support with: + // - 'kldload mqueuefs' + // - 'options P1003_1B_MQUEUE' in kernel build config. + if (mq < 0 && errno == ENOSYS) { + TEST_SKIPPED("mq_open -> -ENOSYS"); + return; + } + EXPECT_OK(mq); + cap_rights_t r_read; + cap_rights_init(&r_read, CAP_READ); + cap_rights_t r_write; + cap_rights_init(&r_write, CAP_WRITE); + cap_rights_t r_poll; + cap_rights_init(&r_poll, CAP_EVENT); + + int cap_read_mq = dup(mq); + EXPECT_OK(cap_read_mq); + EXPECT_OK(cap_rights_limit(cap_read_mq, &r_read)); + int cap_write_mq = dup(mq); + EXPECT_OK(cap_write_mq); + EXPECT_OK(cap_rights_limit(cap_write_mq, &r_write)); + int cap_poll_mq = dup(mq); + EXPECT_OK(cap_poll_mq); + EXPECT_OK(cap_rights_limit(cap_poll_mq, &r_poll)); + EXPECT_OK(mq_close_(mq)); + + signal(SIGUSR2, seen_it_done_it); + + EXPECT_OK(cap_enter()); // Enter capability mode + + // Can no longer access the message queue via the POSIX IPC namespace. + EXPECT_CAPMODE(mq_open_("/cap_mw", O_RDWR|O_CREAT, 0644, NULL)); + + struct sigevent se; + se.sigev_notify = SIGEV_SIGNAL; + se.sigev_signo = SIGUSR2; + EXPECT_OK(mq_notify_(cap_poll_mq, &se)); + EXPECT_NOTCAPABLE(mq_notify_(cap_read_mq, &se)); + EXPECT_NOTCAPABLE(mq_notify_(cap_write_mq, &se)); + + const unsigned int kPriority = 10; + const char* message = "xyzzy"; + struct timespec ts; + ts.tv_sec = 1; + ts.tv_nsec = 0; + EXPECT_OK(mq_timedsend_(cap_write_mq, message, strlen(message) + 1, kPriority, &ts)); + EXPECT_NOTCAPABLE(mq_timedsend_(cap_read_mq, message, strlen(message) + 1, kPriority, &ts)); + + sleep(1); // Give the notification a chance to arrive. + EXPECT_TRUE(invoked); + + struct mq_attr mqa; + EXPECT_OK(mq_getattr_(cap_poll_mq, &mqa)); + EXPECT_OK(mq_setattr_(cap_poll_mq, &mqa, NULL)); + EXPECT_NOTCAPABLE(mq_getattr_(cap_write_mq, &mqa)); + + char* buffer = (char *)malloc(mqa.mq_msgsize); + unsigned int priority; + EXPECT_NOTCAPABLE(mq_timedreceive_(cap_write_mq, buffer, mqa.mq_msgsize, &priority, &ts)); + EXPECT_OK(mq_timedreceive_(cap_read_mq, buffer, mqa.mq_msgsize, &priority, &ts)); + EXPECT_EQ(std::string(message), std::string(buffer)); + EXPECT_EQ(kPriority, priority); + free(buffer); + + close(cap_read_mq); + close(cap_write_mq); + close(cap_poll_mq); +} Index: head/contrib/capsicum-test/openat.cc =================================================================== --- head/contrib/capsicum-test/openat.cc +++ head/contrib/capsicum-test/openat.cc @@ -0,0 +1,361 @@ +#include +#include +#include +#include + +#include + +#include "capsicum.h" +#include "capsicum-test.h" +#include "syscalls.h" + +// Check an open call works and close the resulting fd. +#define EXPECT_OPEN_OK(f) do { \ + int _fd = f; \ + EXPECT_OK(_fd); \ + close(_fd); \ + } while (0) + +static void CreateFile(const char *filename, const char *contents) { + int fd = open(filename, O_CREAT|O_RDWR, 0644); + EXPECT_OK(fd); + EXPECT_OK(write(fd, contents, strlen(contents))); + close(fd); +} + +// Test openat(2) in a variety of sitations to ensure that it obeys Capsicum +// "strict relative" rules: +// +// 1. Use strict relative lookups in capability mode or when operating +// relative to a capability. +// 2. When performing strict relative lookups, absolute paths (including +// symlinks to absolute paths) are not allowed, nor are paths containing +// '..' components. +// +// These rules apply when: +// - the directory FD is a Capsicum capability +// - the process is in capability mode +// - the openat(2) operation includes the O_BENEATH flag. +FORK_TEST(Openat, Relative) { + int etc = open("/etc/", O_RDONLY); + EXPECT_OK(etc); + + cap_rights_t r_base; + cap_rights_init(&r_base, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_LOOKUP, CAP_FCNTL, CAP_IOCTL); + cap_rights_t r_ro; + cap_rights_init(&r_ro, CAP_READ); + cap_rights_t r_rl; + cap_rights_init(&r_rl, CAP_READ, CAP_LOOKUP); + + int etc_cap = dup(etc); + EXPECT_OK(etc_cap); + EXPECT_OK(cap_rights_limit(etc_cap, &r_ro)); + int etc_cap_ro = dup(etc); + EXPECT_OK(etc_cap_ro); + EXPECT_OK(cap_rights_limit(etc_cap_ro, &r_rl)); + int etc_cap_base = dup(etc); + EXPECT_OK(etc_cap_base); + EXPECT_OK(cap_rights_limit(etc_cap_base, &r_base)); +#ifdef HAVE_CAP_FCNTLS_LIMIT + // Also limit fcntl(2) subrights. + EXPECT_OK(cap_fcntls_limit(etc_cap_base, CAP_FCNTL_GETFL)); +#endif +#ifdef HAVE_CAP_IOCTLS_LIMIT + // Also limit ioctl(2) subrights. + cap_ioctl_t ioctl_nread = FIONREAD; + EXPECT_OK(cap_ioctls_limit(etc_cap_base, &ioctl_nread, 1)); +#endif + + // openat(2) with regular file descriptors in non-capability mode + // Should Just Work (tm). + EXPECT_OPEN_OK(openat(etc, "/etc/passwd", O_RDONLY)); + EXPECT_OPEN_OK(openat(AT_FDCWD, "/etc/passwd", O_RDONLY)); + EXPECT_OPEN_OK(openat(etc, "passwd", O_RDONLY)); + EXPECT_OPEN_OK(openat(etc, "../etc/passwd", O_RDONLY)); + + // Lookups relative to capabilities should be strictly relative. + // When not in capability mode, we don't actually require CAP_LOOKUP. + EXPECT_OPEN_OK(openat(etc_cap_ro, "passwd", O_RDONLY)); + EXPECT_OPEN_OK(openat(etc_cap_base, "passwd", O_RDONLY)); + + // Performing openat(2) on a path with leading slash ignores + // the provided directory FD. + EXPECT_OPEN_OK(openat(etc_cap_ro, "/etc/passwd", O_RDONLY)); + EXPECT_OPEN_OK(openat(etc_cap_base, "/etc/passwd", O_RDONLY)); + // Relative lookups that go upward are not allowed. + EXPECT_OPENAT_FAIL_TRAVERSAL(etc_cap_ro, "../etc/passwd", O_RDONLY); + EXPECT_OPENAT_FAIL_TRAVERSAL(etc_cap_base, "../etc/passwd", O_RDONLY); + + // A file opened relative to a capability should itself be a capability. + int fd = openat(etc_cap_base, "passwd", O_RDONLY); + EXPECT_OK(fd); + cap_rights_t rights; + EXPECT_OK(cap_rights_get(fd, &rights)); + EXPECT_RIGHTS_IN(&rights, &r_base); +#ifdef HAVE_CAP_FCNTLS_LIMIT + cap_fcntl_t fcntls; + EXPECT_OK(cap_fcntls_get(fd, &fcntls)); + EXPECT_EQ((cap_fcntl_t)CAP_FCNTL_GETFL, fcntls); +#endif +#ifdef HAVE_CAP_IOCTLS_LIMIT + cap_ioctl_t ioctls[16]; + ssize_t nioctls; + memset(ioctls, 0, sizeof(ioctls)); + nioctls = cap_ioctls_get(fd, ioctls, 16); + EXPECT_OK(nioctls); + EXPECT_EQ(1, nioctls); + EXPECT_EQ((cap_ioctl_t)FIONREAD, ioctls[0]); +#endif + close(fd); + + // Enter capability mode; now ALL lookups are strictly relative. + EXPECT_OK(cap_enter()); + + // Relative lookups on regular files or capabilities with CAP_LOOKUP + // ought to succeed. + EXPECT_OPEN_OK(openat(etc, "passwd", O_RDONLY)); + EXPECT_OPEN_OK(openat(etc_cap_ro, "passwd", O_RDONLY)); + EXPECT_OPEN_OK(openat(etc_cap_base, "passwd", O_RDONLY)); + + // Lookup relative to capabilities without CAP_LOOKUP should fail. + EXPECT_NOTCAPABLE(openat(etc_cap, "passwd", O_RDONLY)); + + // Absolute lookups should fail. + EXPECT_CAPMODE(openat(AT_FDCWD, "/etc/passwd", O_RDONLY)); + EXPECT_OPENAT_FAIL_TRAVERSAL(etc, "/etc/passwd", O_RDONLY); + EXPECT_OPENAT_FAIL_TRAVERSAL(etc_cap_ro, "/etc/passwd", O_RDONLY); + + // Lookups containing '..' should fail in capability mode. + EXPECT_OPENAT_FAIL_TRAVERSAL(etc, "../etc/passwd", O_RDONLY); + EXPECT_OPENAT_FAIL_TRAVERSAL(etc_cap_ro, "../etc/passwd", O_RDONLY); + EXPECT_OPENAT_FAIL_TRAVERSAL(etc_cap_base, "../etc/passwd", O_RDONLY); + + fd = openat(etc, "passwd", O_RDONLY); + EXPECT_OK(fd); + + // A file opened relative to a capability should itself be a capability. + fd = openat(etc_cap_base, "passwd", O_RDONLY); + EXPECT_OK(fd); + EXPECT_OK(cap_rights_get(fd, &rights)); + EXPECT_RIGHTS_IN(&rights, &r_base); + close(fd); + + fd = openat(etc_cap_ro, "passwd", O_RDONLY); + EXPECT_OK(fd); + EXPECT_OK(cap_rights_get(fd, &rights)); + EXPECT_RIGHTS_IN(&rights, &r_rl); + close(fd); +} + +#define TOPDIR "cap_topdir" +#define SUBDIR TOPDIR "/subdir" +class OpenatTest : public ::testing::Test { + public: + // Build a collection of files, subdirs and symlinks: + // /tmp/cap_topdir/ + // /topfile + // /subdir/ + // /subdir/bottomfile + // /symlink.samedir -> topfile + // /dsymlink.samedir -> ./ + // /symlink.down -> subdir/bottomfile + // /dsymlink.down -> subdir/ + // /symlink.absolute_out -> /etc/passwd + // /dsymlink.absolute_out -> /etc/ + // /symlink.relative_in -> ../../tmp/cap_topdir/topfile + // /dsymlink.relative_in -> ../../tmp/cap_topdir/ + // /symlink.relative_out -> ../../etc/passwd + // /dsymlink.relative_out -> ../../etc/ + // /subdir/dsymlink.absolute_in -> /tmp/cap_topdir/ + // /subdir/dsymlink.up -> ../ + // /subdir/symlink.absolute_in -> /tmp/cap_topdir/topfile + // /subdir/symlink.up -> ../topfile + // (In practice, this is a little more complicated because tmpdir might + // not be "/tmp".) + OpenatTest() { + // Create a couple of nested directories + int rc = mkdir(TmpFile(TOPDIR), 0755); + EXPECT_OK(rc); + if (rc < 0) { + EXPECT_EQ(EEXIST, errno); + } + rc = mkdir(TmpFile(SUBDIR), 0755); + EXPECT_OK(rc); + if (rc < 0) { + EXPECT_EQ(EEXIST, errno); + } + + // Figure out a path prefix (like "../..") that gets us to the root + // directory from TmpFile(TOPDIR). + const char *p = TmpFile(TOPDIR); // maybe "/tmp/somewhere/cap_topdir" + std::string dots2root = ".."; + while (*p++ != '\0') { + if (*p == '/') { + dots2root += "/.."; + } + } + + // Create normal files in each. + CreateFile(TmpFile(TOPDIR "/topfile"), "Top-level file"); + CreateFile(TmpFile(SUBDIR "/bottomfile"), "File in subdirectory"); + + // Create various symlinks to files. + EXPECT_OK(symlink("topfile", TmpFile(TOPDIR "/symlink.samedir"))); + EXPECT_OK(symlink("subdir/bottomfile", TmpFile(TOPDIR "/symlink.down"))); + EXPECT_OK(symlink(TmpFile(TOPDIR "/topfile"), TmpFile(SUBDIR "/symlink.absolute_in"))); + EXPECT_OK(symlink("/etc/passwd", TmpFile(TOPDIR "/symlink.absolute_out"))); + std::string dots2top = dots2root + TmpFile(TOPDIR "/topfile"); + EXPECT_OK(symlink(dots2top.c_str(), TmpFile(TOPDIR "/symlink.relative_in"))); + std::string dots2passwd = dots2root + "/etc/passwd"; + EXPECT_OK(symlink(dots2passwd.c_str(), TmpFile(TOPDIR "/symlink.relative_out"))); + EXPECT_OK(symlink("../topfile", TmpFile(SUBDIR "/symlink.up"))); + + // Create various symlinks to directories. + EXPECT_OK(symlink("./", TmpFile(TOPDIR "/dsymlink.samedir"))); + EXPECT_OK(symlink("subdir/", TmpFile(TOPDIR "/dsymlink.down"))); + EXPECT_OK(symlink(TmpFile(TOPDIR "/"), TmpFile(SUBDIR "/dsymlink.absolute_in"))); + EXPECT_OK(symlink("/etc/", TmpFile(TOPDIR "/dsymlink.absolute_out"))); + std::string dots2cwd = dots2root + tmpdir + "/"; + EXPECT_OK(symlink(dots2cwd.c_str(), TmpFile(TOPDIR "/dsymlink.relative_in"))); + std::string dots2etc = dots2root + "/etc/"; + EXPECT_OK(symlink(dots2etc.c_str(), TmpFile(TOPDIR "/dsymlink.relative_out"))); + EXPECT_OK(symlink("../", TmpFile(SUBDIR "/dsymlink.up"))); + + // Open directory FDs for those directories and for cwd. + dir_fd_ = open(TmpFile(TOPDIR), O_RDONLY); + EXPECT_OK(dir_fd_); + sub_fd_ = open(TmpFile(SUBDIR), O_RDONLY); + EXPECT_OK(sub_fd_); + cwd_ = openat(AT_FDCWD, ".", O_RDONLY); + EXPECT_OK(cwd_); + // Move into the directory for the test. + EXPECT_OK(fchdir(dir_fd_)); + } + ~OpenatTest() { + fchdir(cwd_); + close(cwd_); + close(sub_fd_); + close(dir_fd_); + unlink(TmpFile(SUBDIR "/symlink.up")); + unlink(TmpFile(SUBDIR "/symlink.absolute_in")); + unlink(TmpFile(TOPDIR "/symlink.absolute_out")); + unlink(TmpFile(TOPDIR "/symlink.relative_in")); + unlink(TmpFile(TOPDIR "/symlink.relative_out")); + unlink(TmpFile(TOPDIR "/symlink.down")); + unlink(TmpFile(TOPDIR "/symlink.samedir")); + unlink(TmpFile(SUBDIR "/dsymlink.up")); + unlink(TmpFile(SUBDIR "/dsymlink.absolute_in")); + unlink(TmpFile(TOPDIR "/dsymlink.absolute_out")); + unlink(TmpFile(TOPDIR "/dsymlink.relative_in")); + unlink(TmpFile(TOPDIR "/dsymlink.relative_out")); + unlink(TmpFile(TOPDIR "/dsymlink.down")); + unlink(TmpFile(TOPDIR "/dsymlink.samedir")); + unlink(TmpFile(SUBDIR "/bottomfile")); + unlink(TmpFile(TOPDIR "/topfile")); + rmdir(TmpFile(SUBDIR)); + rmdir(TmpFile(TOPDIR)); + } + + // Check openat(2) policing that is common across capabilities, capability mode and O_BENEATH. + void CheckPolicing(int oflag) { + // OK for normal access. + EXPECT_OPEN_OK(openat(dir_fd_, "topfile", O_RDONLY|oflag)); + EXPECT_OPEN_OK(openat(dir_fd_, "subdir/bottomfile", O_RDONLY|oflag)); + EXPECT_OPEN_OK(openat(sub_fd_, "bottomfile", O_RDONLY|oflag)); + EXPECT_OPEN_OK(openat(sub_fd_, ".", O_RDONLY|oflag)); + + // Can't open paths with ".." in them. + EXPECT_OPENAT_FAIL_TRAVERSAL(sub_fd_, "../topfile", O_RDONLY|oflag); + EXPECT_OPENAT_FAIL_TRAVERSAL(sub_fd_, "../subdir/bottomfile", O_RDONLY|oflag); + EXPECT_OPENAT_FAIL_TRAVERSAL(sub_fd_, "..", O_RDONLY|oflag); + +#ifdef HAVE_OPENAT_INTERMEDIATE_DOTDOT + // OK for dotdot lookups that don't escape the top directory + EXPECT_OPEN_OK(openat(dir_fd_, "subdir/../topfile", O_RDONLY|oflag)); +#endif + + // Check that we can't escape the top directory by the cunning + // ruse of going via a subdirectory. + EXPECT_OPENAT_FAIL_TRAVERSAL(dir_fd_, "subdir/../../etc/passwd", O_RDONLY|oflag); + + // Should only be able to open symlinks that stay within the directory. + EXPECT_OPEN_OK(openat(dir_fd_, "symlink.samedir", O_RDONLY|oflag)); + EXPECT_OPEN_OK(openat(dir_fd_, "symlink.down", O_RDONLY|oflag)); + EXPECT_OPENAT_FAIL_TRAVERSAL(dir_fd_, "symlink.absolute_out", O_RDONLY|oflag); + EXPECT_OPENAT_FAIL_TRAVERSAL(dir_fd_, "symlink.relative_in", O_RDONLY|oflag); + EXPECT_OPENAT_FAIL_TRAVERSAL(dir_fd_, "symlink.relative_out", O_RDONLY|oflag); + EXPECT_OPENAT_FAIL_TRAVERSAL(sub_fd_, "symlink.absolute_in", O_RDONLY|oflag); + EXPECT_OPENAT_FAIL_TRAVERSAL(sub_fd_, "symlink.up", O_RDONLY|oflag); + + EXPECT_OPEN_OK(openat(dir_fd_, "dsymlink.samedir/topfile", O_RDONLY|oflag)); + EXPECT_OPEN_OK(openat(dir_fd_, "dsymlink.down/bottomfile", O_RDONLY|oflag)); + EXPECT_OPENAT_FAIL_TRAVERSAL(dir_fd_, "dsymlink.absolute_out/passwd", O_RDONLY|oflag); + EXPECT_OPENAT_FAIL_TRAVERSAL(dir_fd_, "dsymlink.relative_in/topfile", O_RDONLY|oflag); + EXPECT_OPENAT_FAIL_TRAVERSAL(dir_fd_, "dsymlink.relative_out/passwd", O_RDONLY|oflag); + EXPECT_OPENAT_FAIL_TRAVERSAL(sub_fd_, "dsymlink.absolute_in/topfile", O_RDONLY|oflag); + EXPECT_OPENAT_FAIL_TRAVERSAL(sub_fd_, "dsymlink.up/topfile", O_RDONLY|oflag); + + // Although recall that O_NOFOLLOW prevents symlink following in final component. + EXPECT_SYSCALL_FAIL(E_TOO_MANY_LINKS, openat(dir_fd_, "symlink.samedir", O_RDONLY|O_NOFOLLOW|oflag)); + EXPECT_SYSCALL_FAIL(E_TOO_MANY_LINKS, openat(dir_fd_, "symlink.down", O_RDONLY|O_NOFOLLOW|oflag)); + } + + protected: + int dir_fd_; + int sub_fd_; + int cwd_; +}; + +TEST_F(OpenatTest, WithCapability) { + // Any kind of symlink can be opened relative to an ordinary directory FD. + EXPECT_OPEN_OK(openat(dir_fd_, "symlink.samedir", O_RDONLY)); + EXPECT_OPEN_OK(openat(dir_fd_, "symlink.down", O_RDONLY)); + EXPECT_OPEN_OK(openat(dir_fd_, "symlink.absolute_out", O_RDONLY)); + EXPECT_OPEN_OK(openat(dir_fd_, "symlink.relative_in", O_RDONLY)); + EXPECT_OPEN_OK(openat(dir_fd_, "symlink.relative_out", O_RDONLY)); + EXPECT_OPEN_OK(openat(sub_fd_, "symlink.absolute_in", O_RDONLY)); + EXPECT_OPEN_OK(openat(sub_fd_, "symlink.up", O_RDONLY)); + + // Now make both DFDs into Capsicum capabilities. + cap_rights_t r_rl; + cap_rights_init(&r_rl, CAP_READ, CAP_LOOKUP, CAP_FCHDIR); + EXPECT_OK(cap_rights_limit(dir_fd_, &r_rl)); + EXPECT_OK(cap_rights_limit(sub_fd_, &r_rl)); + CheckPolicing(0); + // Use of AT_FDCWD is independent of use of a capability. + // Can open paths starting with "/" against a capability dfd, because the dfd is ignored. +} + +FORK_TEST_F(OpenatTest, InCapabilityMode) { + EXPECT_OK(cap_enter()); // Enter capability mode + CheckPolicing(0); + + // Use of AT_FDCWD is banned in capability mode. + EXPECT_CAPMODE(openat(AT_FDCWD, "topfile", O_RDONLY)); + EXPECT_CAPMODE(openat(AT_FDCWD, "subdir/bottomfile", O_RDONLY)); + EXPECT_CAPMODE(openat(AT_FDCWD, "/etc/passwd", O_RDONLY)); + + // Can't open paths starting with "/" in capability mode. + EXPECT_OPENAT_FAIL_TRAVERSAL(dir_fd_, "/etc/passwd", O_RDONLY); + EXPECT_OPENAT_FAIL_TRAVERSAL(sub_fd_, "/etc/passwd", O_RDONLY); +} + +#ifdef O_BENEATH +TEST_F(OpenatTest, WithFlag) { + CheckPolicing(O_BENEATH); + + // Check with AT_FDCWD. + EXPECT_OPEN_OK(openat(AT_FDCWD, "topfile", O_RDONLY|O_BENEATH)); + EXPECT_OPEN_OK(openat(AT_FDCWD, "subdir/bottomfile", O_RDONLY|O_BENEATH)); + + // Can't open paths starting with "/" with O_BENEATH specified. + EXPECT_OPENAT_FAIL_TRAVERSAL(AT_FDCWD, "/etc/passwd", O_RDONLY|O_BENEATH); + EXPECT_OPENAT_FAIL_TRAVERSAL(dir_fd_, "/etc/passwd", O_RDONLY|O_BENEATH); + EXPECT_OPENAT_FAIL_TRAVERSAL(sub_fd_, "/etc/passwd", O_RDONLY|O_BENEATH); +} + +FORK_TEST_F(OpenatTest, WithFlagInCapabilityMode) { + EXPECT_OK(cap_enter()); // Enter capability mode + CheckPolicing(O_BENEATH); +} +#endif Index: head/contrib/capsicum-test/overhead.cc =================================================================== --- head/contrib/capsicum-test/overhead.cc +++ head/contrib/capsicum-test/overhead.cc @@ -0,0 +1,45 @@ +#include +#include +#include +#include +#include + +#include "capsicum.h" +#include "syscalls.h" +#include "capsicum-test.h" + +#ifdef HAVE_SYSCALL +double RepeatSyscall(int count, int nr, long arg1, long arg2, long arg3) { + const clock_t t0 = clock(); // or gettimeofday or whatever + for (int ii = 0; ii < count; ii++) { + syscall(nr, arg1, arg2, arg3); + } + const clock_t t1 = clock(); + return (t1 - t0) / (double)CLOCKS_PER_SEC; +} + +typedef int (*EntryFn)(void); + +double CompareSyscall(EntryFn entry_fn, int count, int nr, + long arg1, long arg2, long arg3) { + double bare = RepeatSyscall(count, nr, arg1, arg2, arg3); + EXPECT_OK(entry_fn()); + double capmode = RepeatSyscall(count, nr, arg1, arg2, arg3); + if (verbose) fprintf(stderr, "%d iterations bare=%fs capmode=%fs ratio=%.2f%%\n", + count, bare, capmode, 100.0*capmode/bare); + if (bare==0.0) { + if (capmode==0.0) return 1.0; + return 999.0; + } + return capmode/bare; +} + +FORK_TEST(Overhead, GetTid) { + EXPECT_GT(10, CompareSyscall(&cap_enter, 10000, __NR_gettid, 0, 0, 0)); +} +FORK_TEST(Overhead, Seek) { + int fd = open("/etc/passwd", O_RDONLY); + EXPECT_GT(50, CompareSyscall(&cap_enter, 10000, __NR_lseek, fd, 0, SEEK_SET)); + close(fd); +} +#endif Index: head/contrib/capsicum-test/procdesc.cc =================================================================== --- head/contrib/capsicum-test/procdesc.cc +++ head/contrib/capsicum-test/procdesc.cc @@ -0,0 +1,977 @@ +// Tests for the process descriptor API for Linux. +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "capsicum.h" +#include "syscalls.h" +#include "capsicum-test.h" + +#ifndef __WALL +// Linux requires __WALL in order for waitpid(specific_pid,...) to +// see and reap any specific pid. Define this to nothing for platforms +// (FreeBSD) where it doesn't exist, to reduce macroing. +#define __WALL 0 +#endif + +// TODO(drysdale): it would be nice to use proper synchronization between +// processes, rather than synchronization-via-sleep; faster too. + + +//------------------------------------------------ +// Utilities for the tests. + +static pid_t pdwait4_(int pd, int *status, int options, struct rusage *ru) { +#ifdef HAVE_PDWAIT4 + return pdwait4(pd, status, options, ru); +#else + // Simulate pdwait4() with wait4(pdgetpid()); this won't work in capability mode. + pid_t pid = -1; + int rc = pdgetpid(pd, &pid); + if (rc < 0) { + return rc; + } + options |= __WALL; + return wait4(pid, status, options, ru); +#endif +} + +static void print_rusage(FILE *f, struct rusage *ru) { + fprintf(f, " User CPU time=%ld.%06ld\n", (long)ru->ru_utime.tv_sec, (long)ru->ru_utime.tv_usec); + fprintf(f, " System CPU time=%ld.%06ld\n", (long)ru->ru_stime.tv_sec, (long)ru->ru_stime.tv_usec); + fprintf(f, " Max RSS=%ld\n", ru->ru_maxrss); +} + +static void print_stat(FILE *f, const struct stat *stat) { + fprintf(f, + "{ .st_dev=%ld, st_ino=%ld, st_mode=%04o, st_nlink=%ld, st_uid=%d, st_gid=%d,\n" + " .st_rdev=%ld, .st_size=%ld, st_blksize=%ld, .st_block=%ld,\n " +#ifdef HAVE_STAT_BIRTHTIME + ".st_birthtime=%ld, " +#endif + ".st_atime=%ld, .st_mtime=%ld, .st_ctime=%ld}\n", + (long)stat->st_dev, (long)stat->st_ino, stat->st_mode, + (long)stat->st_nlink, stat->st_uid, stat->st_gid, + (long)stat->st_rdev, (long)stat->st_size, (long)stat->st_blksize, + (long)stat->st_blocks, +#ifdef HAVE_STAT_BIRTHTIME + (long)stat->st_birthtime, +#endif + (long)stat->st_atime, (long)stat->st_mtime, (long)stat->st_ctime); +} + +static std::map had_signal; +static void handle_signal(int x) { + had_signal[x] = true; +} + +// Check that the given child process terminates as expected. +void CheckChildFinished(pid_t pid, bool signaled=false) { + // Wait for the child to finish. + int rc; + int status = 0; + do { + rc = waitpid(pid, &status, __WALL); + if (rc < 0) { + fprintf(stderr, "Warning: waitpid error %s (%d)\n", strerror(errno), errno); + ADD_FAILURE() << "Failed to wait for child"; + break; + } else if (rc == pid) { + break; + } + } while (true); + EXPECT_EQ(pid, rc); + if (rc == pid) { + if (signaled) { + EXPECT_TRUE(WIFSIGNALED(status)); + } else { + EXPECT_TRUE(WIFEXITED(status)) << std::hex << status; + EXPECT_EQ(0, WEXITSTATUS(status)); + } + } +} + +//------------------------------------------------ +// Basic tests of process descriptor functionality + +TEST(Pdfork, Simple) { + int pd = -1; + pid_t parent = getpid_(); + int pid = pdfork(&pd, 0); + EXPECT_OK(pid); + if (pid == 0) { + // Child: check pid values. + EXPECT_EQ(-1, pd); + EXPECT_NE(parent, getpid_()); + EXPECT_EQ(parent, getppid()); + sleep(1); + exit(0); + } + usleep(100); // ensure the child has a chance to run + EXPECT_NE(-1, pd); + EXPECT_PID_ALIVE(pid); + int pid_got; + EXPECT_OK(pdgetpid(pd, &pid_got)); + EXPECT_EQ(pid, pid_got); + + // Wait long enough for the child to exit(). + sleep(2); + EXPECT_PID_ZOMBIE(pid); + + // Wait for the the child. + int status; + struct rusage ru; + memset(&ru, 0, sizeof(ru)); + int waitrc = pdwait4_(pd, &status, 0, &ru); + EXPECT_EQ(pid, waitrc); + if (verbose) { + fprintf(stderr, "For pd %d pid %d:\n", pd, pid); + print_rusage(stderr, &ru); + } + EXPECT_PID_GONE(pid); + + // Can only pdwait4(pd) once (as initial call reaps zombie). + memset(&ru, 0, sizeof(ru)); + EXPECT_EQ(-1, pdwait4_(pd, &status, 0, &ru)); + EXPECT_EQ(ECHILD, errno); + + EXPECT_OK(close(pd)); +} + +TEST(Pdfork, InvalidFlag) { + int pd = -1; + int pid = pdfork(&pd, PD_DAEMON<<5); + if (pid == 0) { + exit(1); + } + EXPECT_EQ(-1, pid); + EXPECT_EQ(EINVAL, errno); + if (pid > 0) waitpid(pid, NULL, __WALL); +} + +TEST(Pdfork, TimeCheck) { + time_t now = time(NULL); // seconds since epoch + EXPECT_NE(-1, now); + if (verbose) fprintf(stderr, "Calling pdfork around %ld\n", (long)(long)now); + + int pd = -1; + pid_t pid = pdfork(&pd, 0); + EXPECT_OK(pid); + if (pid == 0) { + // Child: check we didn't get a valid process descriptor then exit. + EXPECT_EQ(-1, pdgetpid(pd, &pid)); + EXPECT_EQ(EBADF, errno); + exit(HasFailure()); + } + +#ifdef HAVE_PROCDESC_FSTAT + // Parent process. Ensure that [acm]times have been set correctly. + struct stat stat; + memset(&stat, 0, sizeof(stat)); + EXPECT_OK(fstat(pd, &stat)); + if (verbose) print_stat(stderr, &stat); + +#ifdef HAVE_STAT_BIRTHTIME + EXPECT_GE(now, stat.st_birthtime); + EXPECT_EQ(stat.st_birthtime, stat.st_atime); +#endif + EXPECT_LT((now - stat.st_atime), 2); + EXPECT_EQ(stat.st_atime, stat.st_ctime); + EXPECT_EQ(stat.st_ctime, stat.st_mtime); +#endif + + // Wait for the child to finish. + pid_t pd_pid = -1; + EXPECT_OK(pdgetpid(pd, &pd_pid)); + EXPECT_EQ(pid, pd_pid); + CheckChildFinished(pid); +} + +TEST(Pdfork, UseDescriptor) { + int pd = -1; + pid_t pid = pdfork(&pd, 0); + EXPECT_OK(pid); + if (pid == 0) { + // Child: immediately exit + exit(0); + } + CheckChildFinished(pid); +} + +TEST(Pdfork, NonProcessDescriptor) { + int fd = open("/etc/passwd", O_RDONLY); + EXPECT_OK(fd); + // pd*() operations should fail on a non-process descriptor. + EXPECT_EQ(-1, pdkill(fd, SIGUSR1)); + int status; + EXPECT_EQ(-1, pdwait4_(fd, &status, 0, NULL)); + pid_t pid; + EXPECT_EQ(-1, pdgetpid(fd, &pid)); + close(fd); +} + +static void *SubThreadMain(void *) { + while (true) { + if (verbose) fprintf(stderr, " subthread: \"I aten't dead\"\n"); + usleep(100000); + } + return NULL; +} + +static void *ThreadMain(void *) { + int pd; + pid_t child = pdfork(&pd, 0); + if (child == 0) { + // Child: start a subthread then loop + pthread_t child_subthread; + EXPECT_OK(pthread_create(&child_subthread, NULL, SubThreadMain, NULL)); + while (true) { + if (verbose) fprintf(stderr, " pdforked process %d: \"I aten't dead\"\n", getpid()); + usleep(100000); + } + exit(0); + } + if (verbose) fprintf(stderr, " thread generated pd %d\n", pd); + sleep(2); + + // Pass the process descriptor back to the main thread. + return reinterpret_cast(pd); +} + +TEST(Pdfork, FromThread) { + // Fire off a new thread to do all of the creation work. + pthread_t child_thread; + EXPECT_OK(pthread_create(&child_thread, NULL, ThreadMain, NULL)); + void *data; + EXPECT_OK(pthread_join(child_thread, &data)); + int pd = reinterpret_cast(data); + if (verbose) fprintf(stderr, "retrieved pd %d from terminated thread\n", pd); + + // Kill and reap. + pid_t pid; + EXPECT_OK(pdgetpid(pd, &pid)); + EXPECT_OK(pdkill(pd, SIGKILL)); + int status; + EXPECT_EQ(pid, pdwait4_(pd, &status, 0, NULL)); + EXPECT_TRUE(WIFSIGNALED(status)); +} + +//------------------------------------------------ +// More complicated tests. + + +// Test fixture that pdfork()s off a child process, which terminates +// when it receives anything on a pipe. +class PipePdforkBase : public ::testing::Test { + public: + PipePdforkBase(int pdfork_flags) : pd_(-1), pid_(-1) { + had_signal.clear(); + int pipes[2]; + EXPECT_OK(pipe(pipes)); + pipe_ = pipes[1]; + int parent = getpid_(); + if (verbose) fprintf(stderr, "[%d] about to pdfork()\n", getpid_()); + int rc = pdfork(&pd_, pdfork_flags); + EXPECT_OK(rc); + if (rc == 0) { + // Child process: blocking-read an int from the pipe then exit with that value. + EXPECT_NE(parent, getpid_()); + EXPECT_EQ(parent, getppid()); + if (verbose) fprintf(stderr, " [%d] child of %d waiting for value on pipe\n", getpid_(), getppid()); + read(pipes[0], &rc, sizeof(rc)); + if (verbose) fprintf(stderr, " [%d] got value %d on pipe, exiting\n", getpid_(), rc); + exit(rc); + } + pid_ = rc; + usleep(100); // ensure the child has a chance to run + } + ~PipePdforkBase() { + // Terminate by any means necessary. + if (pd_ > 0) { + pdkill(pd_, SIGKILL); + close(pd_); + } + if (pid_ > 0) { + kill(pid_, SIGKILL); + waitpid(pid_, NULL, __WALL|WNOHANG); + } + // Check signal expectations. + EXPECT_FALSE(had_signal[SIGCHLD]); + } + int TerminateChild() { + // Tell the child to exit. + int zero = 0; + if (verbose) fprintf(stderr, "[%d] write 0 to pipe\n", getpid_()); + return write(pipe_, &zero, sizeof(zero)); + } + protected: + int pd_; + int pipe_; + pid_t pid_; +}; + +class PipePdfork : public PipePdforkBase { + public: + PipePdfork() : PipePdforkBase(0) {} +}; + +class PipePdforkDaemon : public PipePdforkBase { + public: + PipePdforkDaemon() : PipePdforkBase(PD_DAEMON) {} +}; + +// Can we poll a process descriptor? +TEST_F(PipePdfork, Poll) { + // Poll the process descriptor, nothing happening. + struct pollfd fdp; + fdp.fd = pd_; + fdp.events = POLLIN | POLLERR | POLLHUP; + fdp.revents = 0; + EXPECT_EQ(0, poll(&fdp, 1, 0)); + + TerminateChild(); + + // Poll again, should have activity on the process descriptor. + EXPECT_EQ(1, poll(&fdp, 1, 2000)); + EXPECT_TRUE(fdp.revents & POLLHUP); + + // Poll a third time, still have POLLHUP. + fdp.revents = 0; + EXPECT_EQ(1, poll(&fdp, 1, 0)); + EXPECT_TRUE(fdp.revents & POLLHUP); +} + +// Can multiple processes poll on the same descriptor? +TEST_F(PipePdfork, PollMultiple) { + int child = fork(); + EXPECT_OK(child); + if (child == 0) { + // Child: wait to give time for setup, then write to the pipe (which will + // induce exit of the pdfork()ed process) and exit. + sleep(1); + TerminateChild(); + exit(0); + } + usleep(100); // ensure the child has a chance to run + + // Fork again + int doppel = fork(); + EXPECT_OK(doppel); + // We now have: + // pid A: main process, here + // |--pid B: pdfork()ed process, blocked on read() + // |--pid C: fork()ed process, in sleep(1) above + // +--pid D: doppel process, here + + // Both A and D execute the following code. + // First, check no activity on the process descriptor yet. + struct pollfd fdp; + fdp.fd = pd_; + fdp.events = POLLIN | POLLERR | POLLHUP; + fdp.revents = 0; + EXPECT_EQ(0, poll(&fdp, 1, 0)); + + // Now, wait (indefinitely) for activity on the process descriptor. + // We expect: + // - pid C will finish its sleep, write to the pipe and exit + // - pid B will unblock from read(), and exit + // - this will generate an event on the process descriptor... + // - ...in both process A and process D. + EXPECT_EQ(1, poll(&fdp, 1, 2000)); + EXPECT_TRUE(fdp.revents & POLLHUP); + + if (doppel == 0) { + // Child: process D exits. + exit(0); + } else { + // Parent: wait on process D. + int rc = 0; + waitpid(doppel, &rc, __WALL); + EXPECT_TRUE(WIFEXITED(rc)); + EXPECT_EQ(0, WEXITSTATUS(rc)); + // Also wait on process B. + CheckChildFinished(child); + } +} + +// Check that exit status/rusage for a dead pdfork()ed child can be retrieved +// via any process descriptor, multiple times. +TEST_F(PipePdfork, MultipleRetrieveExitStatus) { + EXPECT_PID_ALIVE(pid_); + int pd_copy = dup(pd_); + EXPECT_LT(0, TerminateChild()); + + int status; + struct rusage ru; + memset(&ru, 0, sizeof(ru)); + int waitrc = pdwait4_(pd_copy, &status, 0, &ru); + EXPECT_EQ(pid_, waitrc); + if (verbose) { + fprintf(stderr, "For pd %d -> pid %d:\n", pd_, pid_); + print_rusage(stderr, &ru); + } + EXPECT_PID_GONE(pid_); + +#ifdef NOTYET + // Child has been reaped, so original process descriptor dangles but + // still has access to rusage information. + memset(&ru, 0, sizeof(ru)); + EXPECT_EQ(0, pdwait4_(pd_, &status, 0, &ru)); +#endif + close(pd_copy); +} + +TEST_F(PipePdfork, ChildExit) { + EXPECT_PID_ALIVE(pid_); + EXPECT_LT(0, TerminateChild()); + EXPECT_PID_DEAD(pid_); + + int status; + int rc = pdwait4_(pd_, &status, 0, NULL); + EXPECT_OK(rc); + EXPECT_EQ(pid_, rc); + pid_ = 0; +} + +#ifdef HAVE_PROC_FDINFO +TEST_F(PipePdfork, FdInfo) { + char buffer[1024]; + sprintf(buffer, "/proc/%d/fdinfo/%d", getpid_(), pd_); + int procfd = open(buffer, O_RDONLY); + EXPECT_OK(procfd); + + EXPECT_OK(read(procfd, buffer, sizeof(buffer))); + // The fdinfo should include the file pos of the underlying file + EXPECT_NE((char*)NULL, strstr(buffer, "pos:\t0")) << buffer; + // ...and the underlying pid + char pidline[256]; + sprintf(pidline, "pid:\t%d", pid_); + EXPECT_NE((char*)NULL, strstr(buffer, pidline)) << buffer; + close(procfd); +} +#endif + +// Closing a normal process descriptor terminates the underlying process. +TEST_F(PipePdfork, Close) { + sighandler_t original = signal(SIGCHLD, handle_signal); + EXPECT_PID_ALIVE(pid_); + int status; + EXPECT_EQ(0, waitpid(pid_, &status, __WALL|WNOHANG)); + + EXPECT_OK(close(pd_)); + pd_ = -1; + EXPECT_FALSE(had_signal[SIGCHLD]); + EXPECT_PID_DEAD(pid_); + +#ifdef __FreeBSD__ + EXPECT_EQ(-1, waitpid(pid_, NULL, __WALL)); + EXPECT_EQ(errno, ECHILD); +#else + // Having closed the process descriptor means that pdwait4(pd) now doesn't work. + int rc = pdwait4_(pd_, &status, 0, NULL); + EXPECT_EQ(-1, rc); + EXPECT_EQ(EBADF, errno); + + // Closing all process descriptors means the the child can only be reaped via pid. + EXPECT_EQ(pid_, waitpid(pid_, &status, __WALL|WNOHANG)); +#endif + signal(SIGCHLD, original); +} + +TEST_F(PipePdfork, CloseLast) { + sighandler_t original = signal(SIGCHLD, handle_signal); + // Child should only die when last process descriptor is closed. + EXPECT_PID_ALIVE(pid_); + int pd_other = dup(pd_); + + EXPECT_OK(close(pd_)); + pd_ = -1; + + EXPECT_PID_ALIVE(pid_); + int status; + EXPECT_EQ(0, waitpid(pid_, &status, __WALL|WNOHANG)); + + // Can no longer pdwait4() the closed process descriptor... + EXPECT_EQ(-1, pdwait4_(pd_, &status, WNOHANG, NULL)); + EXPECT_EQ(EBADF, errno); + // ...but can pdwait4() the still-open process descriptor. + errno = 0; + EXPECT_EQ(0, pdwait4_(pd_other, &status, WNOHANG, NULL)); + EXPECT_EQ(0, errno); + + EXPECT_OK(close(pd_other)); + EXPECT_PID_DEAD(pid_); + + EXPECT_FALSE(had_signal[SIGCHLD]); + signal(SIGCHLD, original); +} + +FORK_TEST(Pdfork, OtherUser) { + REQUIRE_ROOT(); + int pd; + pid_t pid = pdfork(&pd, 0); + EXPECT_OK(pid); + if (pid == 0) { + // Child process: loop forever. + while (true) usleep(100000); + } + usleep(100); + + // Now that the second process has been pdfork()ed, change euid. + setuid(other_uid); + if (verbose) fprintf(stderr, "uid=%d euid=%d\n", getuid(), geteuid()); + + // Fail to kill child with normal PID operation. + EXPECT_EQ(-1, kill(pid, SIGKILL)); + EXPECT_EQ(EPERM, errno); + EXPECT_PID_ALIVE(pid); + + // Succeed with pdkill though. + EXPECT_OK(pdkill(pd, SIGKILL)); + EXPECT_PID_ZOMBIE(pid); + + int status; + int rc = pdwait4_(pd, &status, WNOHANG, NULL); + EXPECT_OK(rc); + EXPECT_EQ(pid, rc); + EXPECT_TRUE(WIFSIGNALED(status)); +} + +TEST_F(PipePdfork, WaitPidThenPd) { + TerminateChild(); + int status; + // If we waitpid(pid) first... + int rc = waitpid(pid_, &status, __WALL); + EXPECT_OK(rc); + EXPECT_EQ(pid_, rc); + +#ifdef NOTYET + // ...the zombie is reaped but we can still subsequently pdwait4(pd). + EXPECT_EQ(0, pdwait4_(pd_, &status, 0, NULL)); +#endif +} + +TEST_F(PipePdfork, WaitPdThenPid) { + TerminateChild(); + int status; + // If we pdwait4(pd) first... + int rc = pdwait4_(pd_, &status, 0, NULL); + EXPECT_OK(rc); + EXPECT_EQ(pid_, rc); + + // ...the zombie is reaped and cannot subsequently waitpid(pid). + EXPECT_EQ(-1, waitpid(pid_, &status, __WALL)); + EXPECT_EQ(ECHILD, errno); +} + +// Setting PD_DAEMON prevents close() from killing the child. +TEST_F(PipePdforkDaemon, Close) { + EXPECT_OK(close(pd_)); + pd_ = -1; + EXPECT_PID_ALIVE(pid_); + + // Can still explicitly kill it via the pid. + if (pid_ > 0) { + EXPECT_OK(kill(pid_, SIGKILL)); + EXPECT_PID_DEAD(pid_); + } +} + +static void TestPdkill(pid_t pid, int pd) { + EXPECT_PID_ALIVE(pid); + // SIGCONT is ignored by default. + EXPECT_OK(pdkill(pd, SIGCONT)); + EXPECT_PID_ALIVE(pid); + + // SIGINT isn't + EXPECT_OK(pdkill(pd, SIGINT)); + EXPECT_PID_DEAD(pid); + + // pdkill() on zombie is no-op. + errno = 0; + EXPECT_EQ(0, pdkill(pd, SIGINT)); + EXPECT_EQ(0, errno); + + // pdkill() on reaped process gives -ESRCH. + CheckChildFinished(pid, true); + EXPECT_EQ(-1, pdkill(pd, SIGINT)); + EXPECT_EQ(ESRCH, errno); +} + +TEST_F(PipePdfork, Pdkill) { + TestPdkill(pid_, pd_); +} + +TEST_F(PipePdforkDaemon, Pdkill) { + TestPdkill(pid_, pd_); +} + +TEST(Pdfork, PdkillOtherSignal) { + int pd = -1; + int pid = pdfork(&pd, 0); + EXPECT_OK(pid); + if (pid == 0) { + // Child: watch for SIGUSR1 forever. + had_signal.clear(); + signal(SIGUSR1, handle_signal); + while (!had_signal[SIGUSR1]) { + usleep(100000); + } + exit(123); + } + sleep(1); + + // Send an invalid signal. + EXPECT_EQ(-1, pdkill(pd, 0xFFFF)); + EXPECT_EQ(EINVAL, errno); + + // Send an expected SIGUSR1 to the pdfork()ed child. + EXPECT_PID_ALIVE(pid); + pdkill(pd, SIGUSR1); + EXPECT_PID_DEAD(pid); + + // Child's exit status confirms whether it received the signal. + int status; + int rc = waitpid(pid, &status, __WALL); + EXPECT_OK(rc); + EXPECT_EQ(pid, rc); + EXPECT_TRUE(WIFEXITED(status)) << "0x" << std::hex << rc; + EXPECT_EQ(123, WEXITSTATUS(status)); +} + +pid_t PdforkParentDeath(int pdfork_flags) { + // Set up: + // pid A: main process, here + // +--pid B: fork()ed process, sleep(4)s then exits + // +--pid C: pdfork()ed process, looping forever + int sock_fds[2]; + EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, sock_fds)); + if (verbose) fprintf(stderr, "[%d] parent about to fork()...\n", getpid_()); + pid_t child = fork(); + EXPECT_OK(child); + if (child == 0) { + int pd; + if (verbose) fprintf(stderr, " [%d] child about to pdfork()...\n", getpid_()); + pid_t grandchild = pdfork(&pd, pdfork_flags); + if (grandchild == 0) { + while (true) { + if (verbose) fprintf(stderr, " [%d] grandchild: \"I aten't dead\"\n", getpid_()); + sleep(1); + } + } + if (verbose) fprintf(stderr, " [%d] pdfork()ed grandchild %d, sending ID to parent\n", getpid_(), grandchild); + // send grandchild pid to parent + write(sock_fds[1], &grandchild, sizeof(grandchild)); + sleep(4); + if (verbose) fprintf(stderr, " [%d] child terminating\n", getpid_()); + exit(0); + } + if (verbose) fprintf(stderr, "[%d] fork()ed child is %d\n", getpid_(), child); + pid_t grandchild; + read(sock_fds[0], &grandchild, sizeof(grandchild)); + if (verbose) fprintf(stderr, "[%d] receive grandchild id %d\n", getpid_(), grandchild); + EXPECT_PID_ALIVE(child); + EXPECT_PID_ALIVE(grandchild); + sleep(6); + // Child dies, closing its process descriptor for the grandchild. + EXPECT_PID_DEAD(child); + CheckChildFinished(child); + return grandchild; +} + +TEST(Pdfork, Bagpuss) { + // "And of course when Bagpuss goes to sleep, all his friends go to sleep too" + pid_t grandchild = PdforkParentDeath(0); + // By default: child death => closed process descriptor => grandchild death. + EXPECT_PID_DEAD(grandchild); +} + +TEST(Pdfork, BagpussDaemon) { + pid_t grandchild = PdforkParentDeath(PD_DAEMON); + // With PD_DAEMON: child death => closed process descriptor => no effect on grandchild. + EXPECT_PID_ALIVE(grandchild); + if (grandchild > 0) { + EXPECT_OK(kill(grandchild, SIGKILL)); + } +} + +// The exit of a pdfork()ed process should not generate SIGCHLD. +TEST_F(PipePdfork, NoSigchld) { + had_signal.clear(); + sighandler_t original = signal(SIGCHLD, handle_signal); + TerminateChild(); + int rc = 0; + // Can waitpid() for the specific pid of the pdfork()ed child. + EXPECT_EQ(pid_, waitpid(pid_, &rc, __WALL)); + EXPECT_TRUE(WIFEXITED(rc)) << "0x" << std::hex << rc; + EXPECT_FALSE(had_signal[SIGCHLD]); + signal(SIGCHLD, original); +} + +// The exit of a pdfork()ed process whose process descriptors have +// all been closed should generate SIGCHLD. The child process needs +// PD_DAEMON to survive the closure of the process descriptors. +TEST_F(PipePdforkDaemon, NoPDSigchld) { + had_signal.clear(); + sighandler_t original = signal(SIGCHLD, handle_signal); + + EXPECT_OK(close(pd_)); + TerminateChild(); +#ifdef __FreeBSD__ + EXPECT_EQ(-1, waitpid(pid_, NULL, __WALL)); + EXPECT_EQ(errno, ECHILD); +#else + int rc = 0; + // Can waitpid() for the specific pid of the pdfork()ed child. + EXPECT_EQ(pid_, waitpid(pid_, &rc, __WALL)); + EXPECT_TRUE(WIFEXITED(rc)) << "0x" << std::hex << rc; +#endif + EXPECT_FALSE(had_signal[SIGCHLD]); + signal(SIGCHLD, original); +} + +#ifdef HAVE_PROCDESC_FSTAT +TEST_F(PipePdfork, ModeBits) { + // Owner rwx bits indicate liveness of child + struct stat stat; + memset(&stat, 0, sizeof(stat)); + EXPECT_OK(fstat(pd_, &stat)); + if (verbose) print_stat(stderr, &stat); + EXPECT_EQ(S_IRWXU, (long)(stat.st_mode & S_IRWXU)); + + TerminateChild(); + usleep(100000); + + memset(&stat, 0, sizeof(stat)); + EXPECT_OK(fstat(pd_, &stat)); + if (verbose) print_stat(stderr, &stat); + EXPECT_EQ(0, (int)(stat.st_mode & S_IRWXU)); +} +#endif + +TEST_F(PipePdfork, WildcardWait) { + // TODO(FreeBSD): make wildcard wait ignore pdfork()ed children + // https://bugs.freebsd.org/201054 + TerminateChild(); + sleep(1); // Ensure child is truly dead. + + // Wildcard waitpid(-1) should not see the pdfork()ed child because + // there is still a process descriptor for it. + int rc; + EXPECT_EQ(-1, waitpid(-1, &rc, WNOHANG)); + EXPECT_EQ(ECHILD, errno); + + EXPECT_OK(close(pd_)); + pd_ = -1; +} + +FORK_TEST(Pdfork, Pdkill) { + had_signal.clear(); + int pd; + pid_t pid = pdfork(&pd, 0); + EXPECT_OK(pid); + + if (pid == 0) { + // Child: set a SIGINT handler and sleep. + had_signal.clear(); + signal(SIGINT, handle_signal); + if (verbose) fprintf(stderr, "[%d] child about to sleep(10)\n", getpid_()); + int left = sleep(10); + if (verbose) fprintf(stderr, "[%d] child slept, %d sec left, had[SIGINT]=%d\n", + getpid_(), left, had_signal[SIGINT]); + // Expect this sleep to be interrupted by the signal (and so left > 0). + exit(left == 0); + } + + // Parent: get child's PID. + pid_t pd_pid; + EXPECT_OK(pdgetpid(pd, &pd_pid)); + EXPECT_EQ(pid, pd_pid); + + // Interrupt the child after a second. + sleep(1); + EXPECT_OK(pdkill(pd, SIGINT)); + + // Make sure the child finished properly (caught signal then exited). + CheckChildFinished(pid); +} + +FORK_TEST(Pdfork, PdkillSignal) { + int pd; + pid_t pid = pdfork(&pd, 0); + EXPECT_OK(pid); + + if (pid == 0) { + // Child: sleep. No SIGINT handler. + if (verbose) fprintf(stderr, "[%d] child about to sleep(10)\n", getpid_()); + int left = sleep(10); + if (verbose) fprintf(stderr, "[%d] child slept, %d sec left\n", getpid_(), left); + exit(99); + } + + // Kill the child (as it doesn't handle SIGINT). + sleep(1); + EXPECT_OK(pdkill(pd, SIGINT)); + + // Make sure the child finished properly (terminated by signal). + CheckChildFinished(pid, true); +} + +//------------------------------------------------ +// Test interactions with other parts of Capsicum: +// - capability mode +// - capabilities + +FORK_TEST(Pdfork, DaemonUnrestricted) { + EXPECT_OK(cap_enter()); + int fd; + + // Capability mode leaves pdfork() available, with and without flag. + int rc; + rc = pdfork(&fd, PD_DAEMON); + EXPECT_OK(rc); + if (rc == 0) { + // Child: immediately terminate. + exit(0); + } + + rc = pdfork(&fd, 0); + EXPECT_OK(rc); + if (rc == 0) { + // Child: immediately terminate. + exit(0); + } +} + +TEST(Pdfork, MissingRights) { + pid_t parent = getpid_(); + int pd = -1; + pid_t pid = pdfork(&pd, 0); + EXPECT_OK(pid); + if (pid == 0) { + // Child: loop forever. + EXPECT_NE(parent, getpid_()); + while (true) sleep(1); + } + // Create two capabilities from the process descriptor. + cap_rights_t r_ro; + cap_rights_init(&r_ro, CAP_READ, CAP_LOOKUP); + int cap_incapable = dup(pd); + EXPECT_OK(cap_incapable); + EXPECT_OK(cap_rights_limit(cap_incapable, &r_ro)); + cap_rights_t r_pdall; + cap_rights_init(&r_pdall, CAP_PDGETPID, CAP_PDWAIT, CAP_PDKILL); + int cap_capable = dup(pd); + EXPECT_OK(cap_capable); + EXPECT_OK(cap_rights_limit(cap_capable, &r_pdall)); + + pid_t other_pid; + EXPECT_NOTCAPABLE(pdgetpid(cap_incapable, &other_pid)); + EXPECT_NOTCAPABLE(pdkill(cap_incapable, SIGINT)); + int status; + EXPECT_NOTCAPABLE(pdwait4_(cap_incapable, &status, 0, NULL)); + + EXPECT_OK(pdgetpid(cap_capable, &other_pid)); + EXPECT_EQ(pid, other_pid); + EXPECT_OK(pdkill(cap_capable, SIGINT)); + int rc = pdwait4_(pd, &status, 0, NULL); + EXPECT_OK(rc); + EXPECT_EQ(pid, rc); +} + + +//------------------------------------------------ +// Passing process descriptors between processes. + +TEST_F(PipePdfork, PassProcessDescriptor) { + int sock_fds[2]; + EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, sock_fds)); + + struct msghdr mh; + mh.msg_name = NULL; // No address needed + mh.msg_namelen = 0; + char buffer1[1024]; + struct iovec iov[1]; + iov[0].iov_base = buffer1; + iov[0].iov_len = sizeof(buffer1); + mh.msg_iov = iov; + mh.msg_iovlen = 1; + char buffer2[1024]; + mh.msg_control = buffer2; + mh.msg_controllen = sizeof(buffer2); + struct cmsghdr *cmptr; + + if (verbose) fprintf(stderr, "[%d] about to fork()\n", getpid_()); + pid_t child2 = fork(); + if (child2 == 0) { + // Child: close our copy of the original process descriptor. + close(pd_); + + // Child: wait to receive process descriptor over socket + if (verbose) fprintf(stderr, " [%d] child of %d waiting for process descriptor on socket\n", getpid_(), getppid()); + int rc = recvmsg(sock_fds[0], &mh, 0); + EXPECT_OK(rc); + EXPECT_LE(CMSG_LEN(sizeof(int)), mh.msg_controllen); + cmptr = CMSG_FIRSTHDR(&mh); + int pd = *(int*)CMSG_DATA(cmptr); + EXPECT_EQ(CMSG_LEN(sizeof(int)), cmptr->cmsg_len); + cmptr = CMSG_NXTHDR(&mh, cmptr); + EXPECT_TRUE(cmptr == NULL); + if (verbose) fprintf(stderr, " [%d] got process descriptor %d on socket\n", getpid_(), pd); + + // Child: confirm we can do pd*() operations on the process descriptor + pid_t other; + EXPECT_OK(pdgetpid(pd, &other)); + if (verbose) fprintf(stderr, " [%d] process descriptor %d is pid %d\n", getpid_(), pd, other); + + sleep(2); + if (verbose) fprintf(stderr, " [%d] close process descriptor %d\n", getpid_(), pd); + close(pd); + + // Last process descriptor closed, expect death + EXPECT_PID_DEAD(other); + + exit(HasFailure()); + } + usleep(1000); // Ensure subprocess runs + + // Send the process descriptor over the pipe to the sub-process + mh.msg_controllen = CMSG_LEN(sizeof(int)); + cmptr = CMSG_FIRSTHDR(&mh); + cmptr->cmsg_level = SOL_SOCKET; + cmptr->cmsg_type = SCM_RIGHTS; + cmptr->cmsg_len = CMSG_LEN(sizeof(int)); + *(int *)CMSG_DATA(cmptr) = pd_; + buffer1[0] = 0; + iov[0].iov_len = 1; + sleep(1); + if (verbose) fprintf(stderr, "[%d] send process descriptor %d on socket\n", getpid_(), pd_); + int rc = sendmsg(sock_fds[1], &mh, 0); + EXPECT_OK(rc); + + if (verbose) fprintf(stderr, "[%d] close process descriptor %d\n", getpid_(), pd_); + close(pd_); // Not last open process descriptor + + // wait for child2 + int status; + EXPECT_EQ(child2, waitpid(child2, &status, __WALL)); + rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1; + EXPECT_EQ(0, rc); + + // confirm death all round + EXPECT_PID_DEAD(child2); + EXPECT_PID_DEAD(pid_); +} Index: head/contrib/capsicum-test/rename.cc =================================================================== --- head/contrib/capsicum-test/rename.cc +++ head/contrib/capsicum-test/rename.cc @@ -0,0 +1,49 @@ +#include +#include + +#include "./capsicum-test.h" + +// There was a Capsicum-related regression in FreeBSD renameat, +// which affects certain cases independent of Capsicum or capability mode +// +// added to test the renameat syscall for the case that +// - the "to" file already exists +// - the "to" file is specified by an absolute path +// - the "to" file descriptor is used +// (this descriptor should be ignored if absolute path is provided) +// +// details at: https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=222258 + + +const char * create_tmp_src(const char* filename) { + const char *src_path = TmpFile(filename); + int src_fd = open(src_path, O_CREAT|O_RDWR, 0644); + close(src_fd); + return src_path; +} + +TEST(Rename, AbsDesignationSame) { + const char *src_path = create_tmp_src("rename_test"); + EXPECT_OK(rename(src_path, src_path)); + unlink(src_path); +} + +TEST(RenameAt, AbsDesignationSame) { + const char *src_path = create_tmp_src("renameat_test"); + const char *dir_path = TmpFile("renameat_test_dir"); + + EXPECT_OK(mkdir(dir_path, 0755)); + // random temporary directory descriptor + int dfd = open(dir_path, O_DIRECTORY); + + // Various rename from/to the same absolute path; in each case the source + // and dest directory FDs should be irrelevant. + EXPECT_OK(renameat(AT_FDCWD, src_path, AT_FDCWD, src_path)); + EXPECT_OK(renameat(AT_FDCWD, src_path, dfd, src_path)); + EXPECT_OK(renameat(dfd, src_path, AT_FDCWD, src_path)); + EXPECT_OK(renameat(dfd, src_path, dfd, src_path)); + + close(dfd); + rmdir(dir_path); + unlink(src_path); +} Index: head/contrib/capsicum-test/sctp.cc =================================================================== --- head/contrib/capsicum-test/sctp.cc +++ head/contrib/capsicum-test/sctp.cc @@ -0,0 +1,212 @@ +// Tests of SCTP functionality +// Requires: libsctp-dev package on Debian Linux, CONFIG_IP_SCTP in kernel config +#ifdef HAVE_SCTP +#include +#include +#include +#include +#include +#include + +#include "syscalls.h" +#include "capsicum.h" +#include "capsicum-test.h" + +static cap_rights_t r_ro; +static cap_rights_t r_wo; +static cap_rights_t r_rw; +static cap_rights_t r_all; +static cap_rights_t r_all_nopeel; +#define DO_PEELOFF 0x1A +#define DO_TERM 0x1B + +static int SctpClient(int port, unsigned char byte) { + // Create sockets + int sock = socket(AF_INET, SOCK_SEQPACKET, IPPROTO_SCTP); + EXPECT_OK(sock); + if (sock < 0) return sock; + int cap_sock_ro = dup(sock); + EXPECT_OK(cap_sock_ro); + EXPECT_OK(cap_rights_limit(cap_sock_ro, &r_rw)); + int cap_sock_rw = dup(sock); + EXPECT_OK(cap_sock_rw); + EXPECT_OK(cap_rights_limit(cap_sock_rw, &r_rw)); + int cap_sock_all = dup(sock); + EXPECT_OK(cap_sock_all); + EXPECT_OK(cap_rights_limit(cap_sock_all, &r_all)); + close(sock); + + // Send a message. Requires CAP_WRITE and CAP_CONNECT. + struct sockaddr_in serv_addr; + memset(&serv_addr, 0, sizeof(serv_addr)); + serv_addr.sin_family = AF_INET; + serv_addr.sin_addr.s_addr = inet_addr("127.0.0.1"); + serv_addr.sin_port = htons(port); + + EXPECT_NOTCAPABLE(sctp_sendmsg(cap_sock_ro, &byte, 1, + (struct sockaddr*)&serv_addr, sizeof(serv_addr), + 0, 0, 1, 0, 0)); + EXPECT_NOTCAPABLE(sctp_sendmsg(cap_sock_rw, &byte, 1, + (struct sockaddr*)&serv_addr, sizeof(serv_addr), + 0, 0, 1, 0, 0)); + if (verbose) fprintf(stderr, " [%d]sctp_sendmsg(%02x)\n", getpid_(), byte); + EXPECT_OK(sctp_sendmsg(cap_sock_all, &byte, 1, + (struct sockaddr*)&serv_addr, sizeof(serv_addr), + 0, 0, 1, 0, 0)); + close(cap_sock_ro); + close(cap_sock_rw); + return cap_sock_all; +} + + +TEST(Sctp, Socket) { + int sock = socket(AF_INET, SOCK_SEQPACKET, IPPROTO_SCTP); + EXPECT_OK(sock); + if (sock < 0) return; + + cap_rights_init(&r_ro, CAP_READ); + cap_rights_init(&r_wo, CAP_WRITE); + cap_rights_init(&r_rw, CAP_READ, CAP_WRITE); + cap_rights_init(&r_all, CAP_READ, CAP_WRITE, CAP_SOCK_CLIENT, CAP_SOCK_SERVER); + cap_rights_init(&r_all_nopeel, CAP_READ, CAP_WRITE, CAP_SOCK_CLIENT, CAP_SOCK_SERVER); + cap_rights_clear(&r_all_nopeel, CAP_PEELOFF); + + int cap_sock_wo = dup(sock); + EXPECT_OK(cap_sock_wo); + EXPECT_OK(cap_rights_limit(cap_sock_wo, &r_wo)); + int cap_sock_rw = dup(sock); + EXPECT_OK(cap_sock_rw); + EXPECT_OK(cap_rights_limit(cap_sock_rw, &r_rw)); + int cap_sock_all = dup(sock); + EXPECT_OK(cap_sock_all); + EXPECT_OK(cap_rights_limit(cap_sock_all, &r_all)); + int cap_sock_all_nopeel = dup(sock); + EXPECT_OK(cap_sock_all_nopeel); + EXPECT_OK(cap_rights_limit(cap_sock_all_nopeel, &r_all_nopeel)); + close(sock); + + struct sockaddr_in addr; + memset(&addr, 0, sizeof(addr)); + addr.sin_family = AF_INET; + addr.sin_port = htons(0); + addr.sin_addr.s_addr = htonl(INADDR_ANY); + socklen_t len = sizeof(addr); + + // Can only bind the fully-capable socket. + EXPECT_NOTCAPABLE(bind(cap_sock_rw, (struct sockaddr *)&addr, len)); + EXPECT_OK(bind(cap_sock_all, (struct sockaddr *)&addr, len)); + + EXPECT_OK(getsockname(cap_sock_all, (struct sockaddr *)&addr, &len)); + int port = ntohs(addr.sin_port); + + // Now we know the port involved, fork off children to run clients. + pid_t child1 = fork(); + if (child1 == 0) { + // Child process 1: wait for server setup + sleep(1); + // Send a message that triggers peeloff + int client_sock = SctpClient(port, DO_PEELOFF); + sleep(1); + close(client_sock); + exit(HasFailure()); + } + + pid_t child2 = fork(); + if (child2 == 0) { + // Child process 2: wait for server setup + sleep(2); + // Send a message that triggers server exit + int client_sock = SctpClient(port, DO_TERM); + close(client_sock); + exit(HasFailure()); + } + + // Can only listen on the fully-capable socket. + EXPECT_NOTCAPABLE(listen(cap_sock_rw, 3)); + EXPECT_OK(listen(cap_sock_all, 3)); + + // Can only do socket operations on the fully-capable socket. + len = sizeof(addr); + EXPECT_NOTCAPABLE(getsockname(cap_sock_rw, (struct sockaddr*)&addr, &len)); + + struct sctp_event_subscribe events; + memset(&events, 0, sizeof(events)); + events.sctp_association_event = 1; + events.sctp_data_io_event = 1; + EXPECT_NOTCAPABLE(setsockopt(cap_sock_rw, IPPROTO_SCTP, SCTP_EVENTS, &events, sizeof(events))); + len = sizeof(events); + EXPECT_NOTCAPABLE(getsockopt(cap_sock_rw, IPPROTO_SCTP, SCTP_EVENTS, &events, &len)); + memset(&events, 0, sizeof(events)); + events.sctp_association_event = 1; + events.sctp_data_io_event = 1; + EXPECT_OK(setsockopt(cap_sock_all, IPPROTO_SCTP, SCTP_EVENTS, &events, sizeof(events))); + len = sizeof(events); + EXPECT_OK(getsockopt(cap_sock_all, IPPROTO_SCTP, SCTP_EVENTS, &events, &len)); + + len = sizeof(addr); + memset(&addr, 0, sizeof(addr)); + EXPECT_OK(getsockname(cap_sock_all, (struct sockaddr*)&addr, &len)); + EXPECT_EQ(AF_INET, addr.sin_family); + EXPECT_EQ(htons(port), addr.sin_port); + + struct sockaddr_in client_addr; + socklen_t addr_len = sizeof(client_addr); + unsigned char buffer[1024]; + struct sctp_sndrcvinfo sri; + memset(&sri, 0, sizeof(sri)); + int flags = 0; + EXPECT_NOTCAPABLE(sctp_recvmsg(cap_sock_wo, buffer, sizeof(buffer), + (struct sockaddr*)&client_addr, &addr_len, + &sri, &flags)); + while (true) { + retry: + memset(&sri, 0, sizeof(sri)); + int len = sctp_recvmsg(cap_sock_rw, buffer, sizeof(buffer), + (struct sockaddr*)&client_addr, &addr_len, + &sri, &flags); + if (len < 0 && errno == EAGAIN) goto retry; + EXPECT_OK(len); + if (len > 0) { + if (verbose) fprintf(stderr, "[%d]sctp_recvmsg(%02x..)", getpid_(), (unsigned)buffer[0]); + if (buffer[0] == DO_PEELOFF) { + if (verbose) fprintf(stderr, "..peeling off association %08lx\n", (long)sri.sinfo_assoc_id); + // Peel off the association. Needs CAP_PEELOFF. + int rc1 = sctp_peeloff(cap_sock_all_nopeel, sri.sinfo_assoc_id); + EXPECT_NOTCAPABLE(rc1); + int rc2 = sctp_peeloff(cap_sock_all, sri.sinfo_assoc_id); + EXPECT_OK(rc2); + int peeled = std::max(rc1, rc2); + if (peeled > 0) { +#ifdef CAP_FROM_PEELOFF + // Peeled off FD should have same rights as original socket. + cap_rights_t rights; + EXPECT_OK(cap_rights_get(peeled, &rights)); + EXPECT_RIGHTS_EQ(&r_all, &rights); +#endif + close(peeled); + } + } else if (buffer[0] == DO_TERM) { + if (verbose) fprintf(stderr, "..terminating server\n"); + break; + } + } else if (len < 0) { + break; + } + } + + // Wait for the children. + int status; + int rc; + EXPECT_EQ(child1, waitpid(child1, &status, 0)); + rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1; + EXPECT_EQ(0, rc); + EXPECT_EQ(child2, waitpid(child2, &status, 0)); + rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1; + EXPECT_EQ(0, rc); + + close(cap_sock_wo); + close(cap_sock_rw); + close(cap_sock_all); + close(cap_sock_all_nopeel); +} +#endif Index: head/contrib/capsicum-test/select.cc =================================================================== --- head/contrib/capsicum-test/select.cc +++ head/contrib/capsicum-test/select.cc @@ -0,0 +1,142 @@ +#include +#include +#include +#include +#include + +#include "capsicum.h" +#include "syscalls.h" +#include "capsicum-test.h" + +namespace { + +int AddFDToSet(fd_set* fset, int fd, int maxfd) { + FD_SET(fd, fset); + if (fd > maxfd) maxfd = fd; + return maxfd; +} + +int InitFDSet(fd_set* fset, int *fds, int fdcount) { + FD_ZERO(fset); + int maxfd = -1; + for (int ii = 0; ii < fdcount; ii++) { + maxfd = AddFDToSet(fset, fds[ii], maxfd); + } + return maxfd; +} + +} // namespace + +FORK_TEST_ON(Select, LotsOFileDescriptors, TmpFile("cap_select")) { + int fd = open(TmpFile("cap_select"), O_RDWR | O_CREAT, 0644); + EXPECT_OK(fd); + if (fd < 0) return; + + // Create many POLL_EVENT capabilities. + const int kCapCount = 64; + int cap_fd[kCapCount]; + cap_rights_t r_poll; + cap_rights_init(&r_poll, CAP_EVENT); + for (int ii = 0; ii < kCapCount; ii++) { + cap_fd[ii] = dup(fd); + EXPECT_OK(cap_fd[ii]); + EXPECT_OK(cap_rights_limit(cap_fd[ii], &r_poll)); + } + cap_rights_t r_rw; + cap_rights_init(&r_rw, CAP_READ, CAP_WRITE, CAP_SEEK); + int cap_rw = dup(fd); + EXPECT_OK(cap_rw); + EXPECT_OK(cap_rights_limit(cap_rw, &r_rw)); + + EXPECT_OK(cap_enter()); // Enter capability mode + + struct timeval tv; + tv.tv_sec = 0; + tv.tv_usec = 100; + // Add normal file descriptor and all CAP_EVENT capabilities + fd_set rset; + fd_set wset; + int maxfd = InitFDSet(&rset, cap_fd, kCapCount); + maxfd = AddFDToSet(&rset, fd, maxfd); + InitFDSet(&wset, cap_fd, kCapCount); + AddFDToSet(&rset, fd, 0); + int ret = select(maxfd+1, &rset, &wset, NULL, &tv); + EXPECT_OK(ret); + + // Now also include the capability with no CAP_EVENT. + InitFDSet(&rset, cap_fd, kCapCount); + AddFDToSet(&rset, fd, maxfd); + maxfd = AddFDToSet(&rset, cap_rw, maxfd); + InitFDSet(&wset, cap_fd, kCapCount); + AddFDToSet(&wset, fd, maxfd); + AddFDToSet(&wset, cap_rw, maxfd); + ret = select(maxfd+1, &rset, &wset, NULL, &tv); + EXPECT_NOTCAPABLE(ret); + +#ifdef HAVE_PSELECT + // And again with pselect + struct timespec ts; + ts.tv_sec = 0; + ts.tv_nsec = 100000; + maxfd = InitFDSet(&rset, cap_fd, kCapCount); + maxfd = AddFDToSet(&rset, fd, maxfd); + InitFDSet(&wset, cap_fd, kCapCount); + AddFDToSet(&rset, fd, 0); + ret = pselect(maxfd+1, &rset, &wset, NULL, &ts, NULL); + EXPECT_OK(ret); + + InitFDSet(&rset, cap_fd, kCapCount); + AddFDToSet(&rset, fd, maxfd); + maxfd = AddFDToSet(&rset, cap_rw, maxfd); + InitFDSet(&wset, cap_fd, kCapCount); + AddFDToSet(&wset, fd, maxfd); + AddFDToSet(&wset, cap_rw, maxfd); + ret = pselect(maxfd+1, &rset, &wset, NULL, &ts, NULL); + EXPECT_NOTCAPABLE(ret); +#endif +} + +FORK_TEST_ON(Poll, LotsOFileDescriptors, TmpFile("cap_poll")) { + int fd = open(TmpFile("cap_poll"), O_RDWR | O_CREAT, 0644); + EXPECT_OK(fd); + if (fd < 0) return; + + // Create many POLL_EVENT capabilities. + const int kCapCount = 64; + struct pollfd cap_fd[kCapCount + 2]; + cap_rights_t r_poll; + cap_rights_init(&r_poll, CAP_EVENT); + for (int ii = 0; ii < kCapCount; ii++) { + cap_fd[ii].fd = dup(fd); + EXPECT_OK(cap_fd[ii].fd); + EXPECT_OK(cap_rights_limit(cap_fd[ii].fd, &r_poll)); + cap_fd[ii].events = POLLIN|POLLOUT; + } + cap_fd[kCapCount].fd = fd; + cap_fd[kCapCount].events = POLLIN|POLLOUT; + cap_rights_t r_rw; + cap_rights_init(&r_rw, CAP_READ, CAP_WRITE, CAP_SEEK); + int cap_rw = dup(fd); + EXPECT_OK(cap_rw); + EXPECT_OK(cap_rights_limit(cap_rw, &r_rw)); + cap_fd[kCapCount + 1].fd = cap_rw; + cap_fd[kCapCount + 1].events = POLLIN|POLLOUT; + + EXPECT_OK(cap_enter()); // Enter capability mode + + EXPECT_OK(poll(cap_fd, kCapCount + 1, 10)); + // Now also include the capability with no CAP_EVENT. + EXPECT_OK(poll(cap_fd, kCapCount + 2, 10)); + EXPECT_NE(0, (cap_fd[kCapCount + 1].revents & POLLNVAL)); + +#ifdef HAVE_PPOLL + // And again with ppoll + struct timespec ts; + ts.tv_sec = 0; + ts.tv_nsec = 100000; + EXPECT_OK(ppoll(cap_fd, kCapCount + 1, &ts, NULL)); + // Now also include the capability with no CAP_EVENT. + EXPECT_OK(ppoll(cap_fd, kCapCount + 2, &ts, NULL)); + EXPECT_NE(0, (cap_fd[kCapCount + 1].revents & POLLNVAL)); +#endif +} Index: head/contrib/capsicum-test/showrights =================================================================== --- head/contrib/capsicum-test/showrights +++ head/contrib/capsicum-test/showrights @@ -0,0 +1,99 @@ +#!/usr/bin/env python +import sys +import re + +_values = { # 2-tuple => name + (0x0000000000000000, 0x0000000000000100) : 'TTYHOOK', + (0x0000000000000040, 0x0000000000000000) : 'CREATE', + (0x0000000200000000, 0x0000000000000000) : 'GETSOCKNAME', + (0x0000000000000000, 0x0000000000100000) : 'KQUEUE_CHANGE', + (0x0000000000000000, 0x0000000000004000) : 'EXTATTR_LIST', + (0x0000000000000080, 0x0000000000000000) : 'FEXECVE', + (0x0000001000000000, 0x0000000000000000) : 'PEELOFF', + (0x0000000000000000, 0x0000000000800000) : 'NOTIFY', + (0x0000000000000000, 0x0000000000001000) : 'EXTATTR_DELETE', + (0x0000000040000000, 0x0000000000000000) : 'BIND', + (0x0000000000000000, 0x0000000000002000) : 'EXTATTR_GET', + (0x0000000000008000, 0x0000000000000000) : 'FCNTL', + (0x0000000000000000, 0x0000000000400000) : 'EPOLL_CTL', + (0x0000000000000004, 0x0000000000000000) : 'SEEK_TELL', + (0x000000000000000c, 0x0000000000000000) : 'SEEK', + (0x0000004000000000, 0x0000000000000000) : 'SHUTDOWN', + (0x0000000000000000, 0x0000000000000080) : 'IOCTL', + (0x0000000000000000, 0x0000000000000020) : 'EVENT', + (0x0000000400000000, 0x0000000000000000) : 'GETSOCKOPT', + (0x0000000080000000, 0x0000000000000000) : 'CONNECT', + (0x0000000000000000, 0x0000000000200000) : 'FSIGNAL', + (0x0000000000000000, 0x0000000000008000) : 'EXTATTR_SET', + (0x0000000000100000, 0x0000000000000000) : 'FSTATFS', + (0x0000000000040000, 0x0000000000000000) : 'FSCK', + (0x0000000000000000, 0x0000000000000800) : 'PDKILL_FREEBSD', + (0x0000000000000000, 0x0000000000000004) : 'SEM_GETVALUE', + (0x0000000000000000, 0x0000000000080000) : 'ACL_SET', + (0x0000000000200000, 0x0000000000000000) : 'FUTIMES', + (0x0000000000000200, 0x0000000000000000) : 'FTRUNCATE', + (0x0000000000000000, 0x0000000000000001) : 'MAC_GET', + (0x0000000000020000, 0x0000000000000000) : 'FPATHCONF', + (0x0000002000000000, 0x0000000000000000) : 'SETSOCKOPT', + (0x0000000000002000, 0x0000000000000000) : 'FCHMOD', + (0x0000000000000000, 0x0000000002000000) : 'PERFMON', + (0x0000000000004000, 0x0000000000000000) : 'FCHOWN', + (0x0000000000000400, 0x0000000000000000) : 'LOOKUP', + (0x0000000000400400, 0x0000000000000000) : 'LINKAT_TARGET', + (0x0000000000800400, 0x0000000000000000) : 'MKDIRAT', + (0x0000000001000400, 0x0000000000000000) : 'MKFIFOAT', + (0x0000000002000400, 0x0000000000000000) : 'MKNODAT', + (0x0000000004000400, 0x0000000000000000) : 'RENAMEAT_SOURCE', + (0x0000000008000400, 0x0000000000000000) : 'SYMLINKAT', + (0x0000000010000400, 0x0000000000000000) : 'UNLINKAT', + (0x0000008000000400, 0x0000000000000000) : 'BINDAT', + (0x0000010000000400, 0x0000000000000000) : 'CONNECTAT', + (0x0000020000000400, 0x0000000000000000) : 'LINKAT_SOURCE', + (0x0000040000000400, 0x0000000000000000) : 'RENAMEAT_TARGET', + (0x0000000000000010, 0x0000000000000000) : 'MMAP', + (0x000000000000003c, 0x0000000000000000) : 'MMAP_X', + (0x0000000000000000, 0x0000000001000000) : 'SETNS', + (0x0000000000080000, 0x0000000000000000) : 'FSTAT', + (0x0000000000000001, 0x0000000000000000) : 'READ', + (0x0000000000000000, 0x0000000000000008) : 'SEM_POST', + (0x0000000000000000, 0x0000000000020000) : 'ACL_DELETE', + (0x0000000000001000, 0x0000000000000000) : 'FCHFLAGS', + (0x0000000800000000, 0x0000000000000000) : 'LISTEN', + (0x0000000100000000, 0x0000000000000000) : 'GETPEERNAME', + (0x0000000000000100, 0x0000000000000000) : 'FSYNC', + (0x0000000000000000, 0x0000000004000000) : 'BPF', + (0x0000000020000000, 0x0000000000000000) : 'ACCEPT', + (0x0000000000000800, 0x0000000000000000) : 'FCHDIR', + (0x0000000000000002, 0x0000000000000000) : 'WRITE', + (0x0000000000000000, 0x0000000000000010) : 'SEM_WAIT', + (0x0000000000000000, 0x0000000000000040) : 'KQUEUE_EVENT', + (0x0000000000000000, 0x0000000000000400) : 'PDWAIT', + (0x0000000000000000, 0x0000000000040000) : 'ACL_GET', + (0x0000000000010000, 0x0000000000000000) : 'FLOCK', + (0x0000000000000000, 0x0000000000010000) : 'ACL_CHECK', + (0x0000000000000000, 0x0000000000000002) : 'MAC_SET', + (0x0000000000000000, 0x0000000000000200) : 'PDGETPID_FREEBSD', +} + + +def _map_fdinfo(line): + RIGHTS_RE = re.compile(r'(?P.*)rights:(?P\s+)0x(?P[0-9a-fA-F]+)\s+0x(?P[0-9a-fA-F]+)$') + m = RIGHTS_RE.match(line) + if m: + val0 = long(m.group('v0'), 16) + val0 = (val0 & ~(0x0200000000000000L)) + val1 = long(m.group('v1'), 16) + val1 = (val1 & ~(0x0400000000000000L)) + rights = [] + for (right, name) in _values.items(): + if ((right[0] == 0 or (val0 & right[0])) and + (right[1] == 0 or (val1 & right[1]))): + rights.append(name) + return "%srights:%s%s" % (m.group('prefix'), m.group('ws'), '|'.join(rights)) + else: + return line.rstrip() + +if __name__ == "__main__": + infile = open(sys.argv[1], 'r') if len(sys.argv) > 1 else sys.stdin + for line in infile.readlines(): + print _map_fdinfo(line) Index: head/contrib/capsicum-test/smoketest.c =================================================================== --- head/contrib/capsicum-test/smoketest.c +++ head/contrib/capsicum-test/smoketest.c @@ -0,0 +1,135 @@ +/* Small standalone test program to check the existence of Capsicum syscalls */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "capsicum.h" + +#ifdef __linux__ +// glibc on Linux caches getpid() return value. +int getpid_(void) { return syscall(__NR_getpid); } +#else +#define getpid_ getpid +#endif + +static int seen_sigchld = 0; +static void handle_signal(int x) { + fprintf(stderr, "[%d] received SIGCHLD\n", getpid_()); + seen_sigchld = 1; +} + +int main(int argc, char *argv[]) { + signal(SIGCHLD, handle_signal); + int lifetime = 4; /* seconds */ + if (1 < argc) { + lifetime = atoi(argv[1]); + } + + /* cap_rights_limit() available? */ + cap_rights_t r_rws; + cap_rights_init(&r_rws, CAP_READ, CAP_WRITE, CAP_SEEK); + int cap_fd = dup(STDOUT_FILENO); + int rc = cap_rights_limit(cap_fd, &r_rws); + fprintf(stderr, "[%d] cap_fd=%d\n", getpid_(), cap_fd); + if (rc < 0) fprintf(stderr, "*** cap_rights_limit() failed: errno=%d %s\n", errno, strerror(errno)); + + /* cap_rights_get() available? */ + cap_rights_t rights; + cap_rights_init(&rights, 0); + rc = cap_rights_get(cap_fd, &rights); + char buffer[256]; + cap_rights_describe(&rights, buffer); + fprintf(stderr, "[%d] cap_rights_get(cap_fd=%d) rc=%d rights=%s\n", getpid_(), cap_fd, rc, buffer); + if (rc < 0) fprintf(stderr, "*** cap_rights_get() failed: errno=%d %s\n", errno, strerror(errno)); + + /* fstat() policed? */ + struct stat buf; + rc = fstat(cap_fd, &buf); + fprintf(stderr, "[%d] fstat(cap_fd=%d) rc=%d errno=%d\n", getpid_(), cap_fd, rc, errno); + if (rc != -1) fprintf(stderr, "*** fstat() unexpectedly succeeded\n"); + + /* pdfork() available? */ + int pd = -1; + rc = pdfork(&pd, 0); + if (rc < 0) fprintf(stderr, "*** pdfork() failed: errno=%d %s\n", errno, strerror(errno)); + + if (rc == 0) { /* child */ + int count = 0; + while (count < 20) { + fprintf(stderr, " [%d] child alive, parent is ppid=%d\n", getpid_(), getppid()); + sleep(1); + } + fprintf(stderr, " [%d] child exit(0)\n", getpid_()); + exit(0); + } + fprintf(stderr, "[%d] pdfork() rc=%d pd=%d\n", getpid_(), rc, pd); + + /* pdgetpid() available? */ + pid_t actual_pid = rc; + pid_t got_pid = -1; + rc = pdgetpid(pd, &got_pid); + if (rc < 0) fprintf(stderr, "*** pdgetpid(pd=%d) failed: errno=%d %s\n", pd, errno, strerror(errno)); + fprintf(stderr, "[%d] pdgetpid(pd=%d)=%d, pdfork returned %d\n", getpid_(), pd, got_pid, actual_pid); + + sleep(lifetime); + + /* pdkill() available? */ + rc = pdkill(pd, SIGKILL); + fprintf(stderr, "[%d] pdkill(pd=%d, SIGKILL) -> rc=%d\n", getpid_(), pd, rc); + if (rc < 0) fprintf(stderr, "*** pdkill() failed: errno=%d %s\n", errno, strerror(errno)); + usleep(50000); /* Allow time for death and signals */ + + /* Death of a pdforked child should be invisible */ + if (seen_sigchld) fprintf(stderr, "*** SIGCHLD emitted\n"); + int status; + rc = wait4(-1, &status, WNOHANG, NULL); + if (rc > 0) fprintf(stderr, "*** wait4(-1, ...) unexpectedly found child %d\n", rc); + + fprintf(stderr, "[%d] forking off a child process to check cap_enter()\n", getpid_()); + pid_t child = fork(); + if (child == 0) { /* child */ + /* cap_getmode() / cap_enter() available? */ + unsigned int cap_mode = -1; + rc = cap_getmode(&cap_mode); + fprintf(stderr, " [%d] cap_getmode() -> rc=%d, cap_mode=%d\n", getpid_(), rc, cap_mode); + if (rc < 0) fprintf(stderr, "*** cap_getmode() failed: errno=%d %s\n", errno, strerror(errno)); + + rc = cap_enter(); + fprintf(stderr, " [%d] cap_enter() -> rc=%d\n", getpid_(), rc); + if (rc < 0) fprintf(stderr, "*** cap_enter() failed: errno=%d %s\n", errno, strerror(errno)); + + rc = cap_getmode(&cap_mode); + fprintf(stderr, " [%d] cap_getmode() -> rc=%d, cap_mode=%d\n", getpid_(), rc, cap_mode); + if (rc < 0) fprintf(stderr, "*** cap_getmode() failed: errno=%d %s\n", errno, strerror(errno)); + + /* open disallowed? */ + rc = open("/etc/passwd", O_RDONLY); + fprintf(stderr, " [%d] open('/etc/passwd') -> rc=%d, errno=%d\n", getpid_(), rc, errno); + if (rc != -1) fprintf(stderr, "*** open() unexpectedly succeeded\n"); +#ifdef ECAPMODE + if (errno != ECAPMODE) fprintf(stderr, "*** open() failed with errno %d not ECAPMODE\n", errno); +#endif + exit(0); + } + rc = wait4(child, &status, 0, NULL); + fprintf(stderr, "[%d] child %d exited with status %x\n", getpid_(), child, status); + + /* fexecve() available? */ + char* argv_pass[] = {(char*)"/bin/ls", "-l", "smoketest", NULL}; + char* null_envp[] = {NULL}; + int ls_bin = open("/bin/ls", O_RDONLY); + fprintf(stderr, "[%d] about to fexecve('/bin/ls', '-l', 'smoketest')\n", getpid_()); + rc = fexecve(ls_bin, argv_pass, null_envp); + /* should never reach here */ + fprintf(stderr, "*** fexecve(fd=%d) failed: rc=%d errno=%d %s\n", ls_bin, rc, errno, strerror(errno)); + + return 0; +} Index: head/contrib/capsicum-test/socket.cc =================================================================== --- head/contrib/capsicum-test/socket.cc +++ head/contrib/capsicum-test/socket.cc @@ -0,0 +1,340 @@ +// Tests for socket functionality. +#include +#include +#include +#include +#include +#include + +#include + +#include "capsicum.h" +#include "syscalls.h" +#include "capsicum-test.h" + +TEST(Socket, UnixDomain) { + const char* socketName = TmpFile("capsicum-test.socket"); + unlink(socketName); + cap_rights_t r_rw; + cap_rights_init(&r_rw, CAP_READ, CAP_WRITE); + cap_rights_t r_all; + cap_rights_init(&r_all, CAP_READ, CAP_WRITE, CAP_SOCK_CLIENT, CAP_SOCK_SERVER); + + pid_t child = fork(); + if (child == 0) { + // Child process: wait for server setup + sleep(1); + + // Create sockets + int sock = socket(AF_UNIX, SOCK_STREAM, 0); + EXPECT_OK(sock); + if (sock < 0) return; + + int cap_sock_rw = dup(sock); + EXPECT_OK(cap_sock_rw); + EXPECT_OK(cap_rights_limit(cap_sock_rw, &r_rw)); + int cap_sock_all = dup(sock); + EXPECT_OK(cap_sock_all); + EXPECT_OK(cap_rights_limit(cap_sock_all, &r_all)); + EXPECT_OK(close(sock)); + + // Connect socket + struct sockaddr_un un; + memset(&un, 0, sizeof(un)); + un.sun_family = AF_UNIX; + strcpy(un.sun_path, socketName); + socklen_t len = sizeof(un); + EXPECT_NOTCAPABLE(connect_(cap_sock_rw, (struct sockaddr *)&un, len)); + EXPECT_OK(connect_(cap_sock_all, (struct sockaddr *)&un, len)); + + exit(HasFailure()); + } + + int sock = socket(AF_UNIX, SOCK_STREAM, 0); + EXPECT_OK(sock); + if (sock < 0) return; + + int cap_sock_rw = dup(sock); + EXPECT_OK(cap_sock_rw); + EXPECT_OK(cap_rights_limit(cap_sock_rw, &r_rw)); + int cap_sock_all = dup(sock); + EXPECT_OK(cap_sock_all); + EXPECT_OK(cap_rights_limit(cap_sock_all, &r_all)); + EXPECT_OK(close(sock)); + + struct sockaddr_un un; + memset(&un, 0, sizeof(un)); + un.sun_family = AF_UNIX; + strcpy(un.sun_path, socketName); + socklen_t len = (sizeof(un) - sizeof(un.sun_path) + strlen(un.sun_path)); + + // Can only bind the fully-capable socket. + EXPECT_NOTCAPABLE(bind_(cap_sock_rw, (struct sockaddr *)&un, len)); + EXPECT_OK(bind_(cap_sock_all, (struct sockaddr *)&un, len)); + + // Can only listen on the fully-capable socket. + EXPECT_NOTCAPABLE(listen(cap_sock_rw, 3)); + EXPECT_OK(listen(cap_sock_all, 3)); + + // Can only do socket operations on the fully-capable socket. + len = sizeof(un); + EXPECT_NOTCAPABLE(getsockname(cap_sock_rw, (struct sockaddr*)&un, &len)); + int value = 0; + EXPECT_NOTCAPABLE(setsockopt(cap_sock_rw, SOL_SOCKET, SO_DEBUG, &value, sizeof(value))); + len = sizeof(value); + EXPECT_NOTCAPABLE(getsockopt(cap_sock_rw, SOL_SOCKET, SO_DEBUG, &value, &len)); + + len = sizeof(un); + memset(&un, 0, sizeof(un)); + EXPECT_OK(getsockname(cap_sock_all, (struct sockaddr*)&un, &len)); + EXPECT_EQ(AF_UNIX, un.sun_family); + EXPECT_EQ(std::string(socketName), std::string(un.sun_path)); + value = 0; + EXPECT_OK(setsockopt(cap_sock_all, SOL_SOCKET, SO_DEBUG, &value, sizeof(value))); + len = sizeof(value); + EXPECT_OK(getsockopt(cap_sock_all, SOL_SOCKET, SO_DEBUG, &value, &len)); + + // Accept the incoming connection + len = sizeof(un); + memset(&un, 0, sizeof(un)); + EXPECT_NOTCAPABLE(accept(cap_sock_rw, (struct sockaddr *)&un, &len)); + int conn_fd = accept(cap_sock_all, (struct sockaddr *)&un, &len); + EXPECT_OK(conn_fd); + +#ifdef CAP_FROM_ACCEPT + // New connection should also be a capability. + cap_rights_t rights; + cap_rights_init(&rights, 0); + EXPECT_OK(cap_rights_get(conn_fd, &rights)); + EXPECT_RIGHTS_IN(&rights, &r_all); +#endif + + // Wait for the child. + int status; + EXPECT_EQ(child, waitpid(child, &status, 0)); + int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1; + EXPECT_EQ(0, rc); + + close(conn_fd); + close(cap_sock_rw); + close(cap_sock_all); + unlink(socketName); +} + +TEST(Socket, TCP) { + int sock = socket(AF_INET, SOCK_STREAM, 0); + EXPECT_OK(sock); + if (sock < 0) return; + + cap_rights_t r_rw; + cap_rights_init(&r_rw, CAP_READ, CAP_WRITE); + cap_rights_t r_all; + cap_rights_init(&r_all, CAP_READ, CAP_WRITE, CAP_SOCK_CLIENT, CAP_SOCK_SERVER); + + int cap_sock_rw = dup(sock); + EXPECT_OK(cap_sock_rw); + EXPECT_OK(cap_rights_limit(cap_sock_rw, &r_rw)); + int cap_sock_all = dup(sock); + EXPECT_OK(cap_sock_all); + EXPECT_OK(cap_rights_limit(cap_sock_all, &r_all)); + close(sock); + + struct sockaddr_in addr; + memset(&addr, 0, sizeof(addr)); + addr.sin_family = AF_INET; + addr.sin_port = htons(0); + addr.sin_addr.s_addr = htonl(INADDR_ANY); + socklen_t len = sizeof(addr); + + // Can only bind the fully-capable socket. + EXPECT_NOTCAPABLE(bind_(cap_sock_rw, (struct sockaddr *)&addr, len)); + EXPECT_OK(bind_(cap_sock_all, (struct sockaddr *)&addr, len)); + + getsockname(cap_sock_all, (struct sockaddr *)&addr, &len); + int port = ntohs(addr.sin_port); + + // Now we know the port involved, fork off a child. + pid_t child = fork(); + if (child == 0) { + // Child process: wait for server setup + sleep(1); + + // Create sockets + int sock = socket(AF_INET, SOCK_STREAM, 0); + EXPECT_OK(sock); + if (sock < 0) return; + int cap_sock_rw = dup(sock); + EXPECT_OK(cap_sock_rw); + EXPECT_OK(cap_rights_limit(cap_sock_rw, &r_rw)); + int cap_sock_all = dup(sock); + EXPECT_OK(cap_sock_all); + EXPECT_OK(cap_rights_limit(cap_sock_all, &r_all)); + close(sock); + + // Connect socket + struct sockaddr_in addr; + memset(&addr, 0, sizeof(addr)); + addr.sin_family = AF_INET; + addr.sin_port = htons(port); // Pick unused port + addr.sin_addr.s_addr = inet_addr("127.0.0.1"); + socklen_t len = sizeof(addr); + EXPECT_NOTCAPABLE(connect_(cap_sock_rw, (struct sockaddr *)&addr, len)); + EXPECT_OK(connect_(cap_sock_all, (struct sockaddr *)&addr, len)); + + exit(HasFailure()); + } + + // Can only listen on the fully-capable socket. + EXPECT_NOTCAPABLE(listen(cap_sock_rw, 3)); + EXPECT_OK(listen(cap_sock_all, 3)); + + // Can only do socket operations on the fully-capable socket. + len = sizeof(addr); + EXPECT_NOTCAPABLE(getsockname(cap_sock_rw, (struct sockaddr*)&addr, &len)); + int value = 1; + EXPECT_NOTCAPABLE(setsockopt(cap_sock_rw, SOL_SOCKET, SO_REUSEPORT, &value, sizeof(value))); + len = sizeof(value); + EXPECT_NOTCAPABLE(getsockopt(cap_sock_rw, SOL_SOCKET, SO_REUSEPORT, &value, &len)); + + len = sizeof(addr); + memset(&addr, 0, sizeof(addr)); + EXPECT_OK(getsockname(cap_sock_all, (struct sockaddr*)&addr, &len)); + EXPECT_EQ(AF_INET, addr.sin_family); + EXPECT_EQ(htons(port), addr.sin_port); + value = 0; + EXPECT_OK(setsockopt(cap_sock_all, SOL_SOCKET, SO_REUSEPORT, &value, sizeof(value))); + len = sizeof(value); + EXPECT_OK(getsockopt(cap_sock_all, SOL_SOCKET, SO_REUSEPORT, &value, &len)); + + // Accept the incoming connection + len = sizeof(addr); + memset(&addr, 0, sizeof(addr)); + EXPECT_NOTCAPABLE(accept(cap_sock_rw, (struct sockaddr *)&addr, &len)); + int conn_fd = accept(cap_sock_all, (struct sockaddr *)&addr, &len); + EXPECT_OK(conn_fd); + +#ifdef CAP_FROM_ACCEPT + // New connection should also be a capability. + cap_rights_t rights; + cap_rights_init(&rights, 0); + EXPECT_OK(cap_rights_get(conn_fd, &rights)); + EXPECT_RIGHTS_IN(&rights, &r_all); +#endif + + // Wait for the child. + int status; + EXPECT_EQ(child, waitpid(child, &status, 0)); + int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1; + EXPECT_EQ(0, rc); + + close(conn_fd); + close(cap_sock_rw); + close(cap_sock_all); +} + +TEST(Socket, UDP) { + int sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); + EXPECT_OK(sock); + if (sock < 0) return; + + cap_rights_t r_rw; + cap_rights_init(&r_rw, CAP_READ, CAP_WRITE); + cap_rights_t r_all; + cap_rights_init(&r_all, CAP_READ, CAP_WRITE, CAP_SOCK_CLIENT, CAP_SOCK_SERVER); + cap_rights_t r_connect; + cap_rights_init(&r_connect, CAP_READ, CAP_WRITE, CAP_CONNECT); + + int cap_sock_rw = dup(sock); + EXPECT_OK(cap_sock_rw); + EXPECT_OK(cap_rights_limit(cap_sock_rw, &r_rw)); + int cap_sock_all = dup(sock); + EXPECT_OK(cap_sock_all); + EXPECT_OK(cap_rights_limit(cap_sock_all, &r_all)); + close(sock); + + struct sockaddr_in addr; + memset(&addr, 0, sizeof(addr)); + addr.sin_family = AF_INET; + addr.sin_port = htons(0); + addr.sin_addr.s_addr = htonl(INADDR_ANY); + socklen_t len = sizeof(addr); + + // Can only bind the fully-capable socket. + EXPECT_NOTCAPABLE(bind_(cap_sock_rw, (struct sockaddr *)&addr, len)); + EXPECT_OK(bind_(cap_sock_all, (struct sockaddr *)&addr, len)); + getsockname(cap_sock_all, (struct sockaddr *)&addr, &len); + int port = ntohs(addr.sin_port); + + // Can only do socket operations on the fully-capable socket. + len = sizeof(addr); + EXPECT_NOTCAPABLE(getsockname(cap_sock_rw, (struct sockaddr*)&addr, &len)); + int value = 1; + EXPECT_NOTCAPABLE(setsockopt(cap_sock_rw, SOL_SOCKET, SO_REUSEPORT, &value, sizeof(value))); + len = sizeof(value); + EXPECT_NOTCAPABLE(getsockopt(cap_sock_rw, SOL_SOCKET, SO_REUSEPORT, &value, &len)); + + len = sizeof(addr); + memset(&addr, 0, sizeof(addr)); + EXPECT_OK(getsockname(cap_sock_all, (struct sockaddr*)&addr, &len)); + EXPECT_EQ(AF_INET, addr.sin_family); + EXPECT_EQ(htons(port), addr.sin_port); + value = 1; + EXPECT_OK(setsockopt(cap_sock_all, SOL_SOCKET, SO_REUSEPORT, &value, sizeof(value))); + len = sizeof(value); + EXPECT_OK(getsockopt(cap_sock_all, SOL_SOCKET, SO_REUSEPORT, &value, &len)); + + pid_t child = fork(); + if (child == 0) { + int sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); + EXPECT_OK(sock); + int cap_sock_rw = dup(sock); + EXPECT_OK(cap_sock_rw); + EXPECT_OK(cap_rights_limit(cap_sock_rw, &r_rw)); + int cap_sock_connect = dup(sock); + EXPECT_OK(cap_sock_connect); + EXPECT_OK(cap_rights_limit(cap_sock_connect, &r_connect)); + close(sock); + + // Can only sendmsg(2) to an address over a socket with CAP_CONNECT. + unsigned char buffer[256]; + struct iovec iov; + memset(&iov, 0, sizeof(iov)); + iov.iov_base = buffer; + iov.iov_len = sizeof(buffer); + + struct msghdr mh; + memset(&mh, 0, sizeof(mh)); + mh.msg_iov = &iov; + mh.msg_iovlen = 1; + + struct sockaddr_in addr; + memset(&addr, 0, sizeof(addr)); + addr.sin_family = AF_INET; + addr.sin_port = htons(port); + addr.sin_addr.s_addr = inet_addr("127.0.0.1"); + mh.msg_name = &addr; + mh.msg_namelen = sizeof(addr); + + EXPECT_NOTCAPABLE(sendmsg(cap_sock_rw, &mh, 0)); + EXPECT_OK(sendmsg(cap_sock_connect, &mh, 0)); + +#ifdef HAVE_SEND_RECV_MMSG + struct mmsghdr mv; + memset(&mv, 0, sizeof(mv)); + memcpy(&mv.msg_hdr, &mh, sizeof(struct msghdr)); + EXPECT_NOTCAPABLE(sendmmsg(cap_sock_rw, &mv, 1, 0)); + EXPECT_OK(sendmmsg(cap_sock_connect, &mv, 1, 0)); +#endif + close(cap_sock_rw); + close(cap_sock_connect); + exit(HasFailure()); + } + // Wait for the child. + int status; + EXPECT_EQ(child, waitpid(child, &status, 0)); + int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1; + EXPECT_EQ(0, rc); + + close(cap_sock_rw); + close(cap_sock_all); +} Index: head/contrib/capsicum-test/syscalls.h =================================================================== --- head/contrib/capsicum-test/syscalls.h +++ head/contrib/capsicum-test/syscalls.h @@ -0,0 +1,259 @@ +/* + * Minimal portability layer for system call differences between + * Capsicum OSes. + */ +#ifndef __SYSCALLS_H__ +#define __SYSCALLS_H__ + +/************************************************************ + * FreeBSD + ************************************************************/ +#ifdef __FreeBSD__ + +/* Map umount2 (Linux) syscall to unmount (FreeBSD) syscall */ +#define umount2(T, F) unmount(T, F) + +/* Map sighandler_y (Linux) to sig_t (FreeBSD) */ +#define sighandler_t sig_t + +/* profil(2) has a first argument of char* */ +#define profil_arg1_t char + +/* FreeBSD has getdents(2) available */ +#include +#include +inline int getdents_(unsigned int fd, void *dirp, unsigned int count) { + return getdents(fd, (char*)dirp, count); +} +#include +inline int mincore_(void *addr, size_t length, unsigned char *vec) { + return mincore(addr, length, (char*)vec); +} +#define getpid_ getpid + +/* Map Linux-style sendfile to FreeBSD sendfile */ +#include +#include +inline ssize_t sendfile_(int out_fd, int in_fd, off_t *offset, size_t count) { + return sendfile(in_fd, out_fd, *offset, count, NULL, offset, 0); +} + +/* A sample mount(2) call */ +#include +#include +inline int bogus_mount_() { + return mount("procfs", "/not_mounted", 0, NULL); +} + +/* Mappings for extended attribute functions */ +#include +inline ssize_t flistxattr_(int fd, char *list, size_t size) { + return extattr_list_fd(fd, EXTATTR_NAMESPACE_USER, list, size); +} +inline ssize_t fgetxattr_(int fd, const char *name, void *value, size_t size) { + return extattr_get_fd(fd, EXTATTR_NAMESPACE_USER, name, value, size); +} +inline int fsetxattr_(int fd, const char *name, const void *value, size_t size, int) { + return extattr_set_fd(fd, EXTATTR_NAMESPACE_USER, name, value, size); +} +inline int fremovexattr_(int fd, const char *name) { + return extattr_delete_fd(fd, EXTATTR_NAMESPACE_USER, name); +} + +/* mq_* functions are wrappers in FreeBSD so go through to underlying syscalls */ +#include +extern "C" { +extern int __sys_kmq_notify(int, const struct sigevent *); +extern int __sys_kmq_open(const char *, int, mode_t, const struct mq_attr *); +extern int __sys_kmq_setattr(int, const struct mq_attr *__restrict, struct mq_attr *__restrict); +extern ssize_t __sys_kmq_timedreceive(int, char *__restrict, size_t, + unsigned *__restrict, const struct timespec *__restrict); +extern int __sys_kmq_timedsend(int, const char *, size_t, unsigned, + const struct timespec *); +extern int __sys_kmq_unlink(const char *); +} +#define mq_notify_ __sys_kmq_notify +#define mq_open_ __sys_kmq_open +#define mq_setattr_ __sys_kmq_setattr +#define mq_getattr_(A, B) __sys_kmq_setattr(A, NULL, B) +#define mq_timedreceive_ __sys_kmq_timedreceive +#define mq_timedsend_ __sys_kmq_timedsend +#define mq_unlink_ __sys_kmq_unlink +#define mq_close_ close +#include +inline long ptrace_(int request, pid_t pid, void *addr, void *data) { + return ptrace(request, pid, (caddr_t)addr, static_cast((long)data)); +} +#define PTRACE_PEEKDATA_ PT_READ_D +#define getegid_ getegid +#define getgid_ getgid +#define geteuid_ geteuid +#define getuid_ getuid +#define getgroups_ getgroups +#define getrlimit_ getrlimit +#define bind_ bind +#define connect_ connect + +/* Features available */ +#if __FreeBSD_version >= 1000000 +#define HAVE_CHFLAGSAT +#define HAVE_BINDAT +#define HAVE_CONNECTAT +#endif +#define HAVE_CHFLAGS +#define HAVE_GETFSSTAT +#define HAVE_REVOKE +#define HAVE_GETLOGIN +#define HAVE_MKFIFOAT +#define HAVE_SYSARCH +#include +#define HAVE_STAT_BIRTHTIME +#define HAVE_SYSCTL +#define HAVE_FPATHCONF +#define HAVE_F_DUP2FD +#define HAVE_PSELECT +#define HAVE_SCTP + +/* FreeBSD only allows root to call mlock[all]/munlock[all] */ +#define MLOCK_REQUIRES_ROOT 1 +/* FreeBSD effectively only allows root to call sched_setscheduler */ +#define SCHED_SETSCHEDULER_REQUIRES_ROOT 1 + +#endif /* FreeBSD */ + +/************************************************************ + * Linux + ************************************************************/ +#ifdef __linux__ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* profil(2) has a first argument of unsigned short* */ +#define profil_arg1_t unsigned short + +static inline int getdents_(unsigned int fd, void *dirp, unsigned int count) { + return syscall(__NR_getdents, fd, dirp, count); +} +/* A sample mount(2) call */ +static inline int bogus_mount_() { + return mount("/dev/bogus", "/bogus", "debugfs", MS_RDONLY, ""); +} + +/* libc's getpid() wrapper caches the pid value, and doesn't invalidate + * the cached value on pdfork(), so directly syscall. */ +static inline pid_t getpid_() { + return syscall(__NR_getpid); +} +static inline int execveat(int fd, const char *path, + char *const argv[], char *const envp[], int flags) { + return syscall(__NR_execveat, fd, path, argv, envp, flags); +} + +/* + * Linux glibc includes an fexecve() function, implemented via the /proc + * filesystem. Bypass this and go directly to the execveat(2) syscall. + */ +static inline int fexecve_(int fd, char *const argv[], char *const envp[]) { + return execveat(fd, "", argv, envp, AT_EMPTY_PATH); +} +/* + * Linux glibc attempts to be clever and intercepts various uid/gid functions. + * Bypass by calling the syscalls directly. + */ +static inline gid_t getegid_(void) { return syscall(__NR_getegid); } +static inline gid_t getgid_(void) { return syscall(__NR_getgid); } +static inline uid_t geteuid_(void) { return syscall(__NR_geteuid); } +static inline uid_t getuid_(void) { return syscall(__NR_getuid); } +static inline int getgroups_(int size, gid_t list[]) { return syscall(__NR_getgroups, size, list); } +static inline int getrlimit_(int resource, struct rlimit *rlim) { + return syscall(__NR_getrlimit, resource, rlim); +} + +/* + * Linux glibc for i386 consumes the errno returned from the raw socketcall(2) operation, + * so use the raw syscall for those operations that are disallowed in capability mode. + */ +#ifdef __NR_bind +#define bind_ bind +#else +static inline int bind_(int sockfd, const struct sockaddr *addr, socklen_t addrlen) { + unsigned long args[3] = {(unsigned long)sockfd, (unsigned long)(intptr_t)addr, (unsigned long)addrlen}; + return syscall(__NR_socketcall, SYS_BIND, args); +} +#endif +#ifdef __NR_connect +#define connect_ connect +#else +static inline int connect_(int sockfd, const struct sockaddr *addr, socklen_t addrlen) { + unsigned long args[3] = {(unsigned long)sockfd, (unsigned long)(intptr_t)addr, (unsigned long)addrlen}; + return syscall(__NR_socketcall, SYS_CONNECT, args); +} +#endif + +#define mincore_ mincore +#define sendfile_ sendfile +#define flistxattr_ flistxattr +#define fgetxattr_ fgetxattr +#define fsetxattr_ fsetxattr +#define fremovexattr_ fremovexattr +#define mq_notify_ mq_notify +#define mq_open_ mq_open +#define mq_setattr_ mq_setattr +#define mq_getattr_ mq_getattr +#define mq_timedreceive_ mq_timedreceive +#define mq_timedsend_ mq_timedsend +#define mq_unlink_ mq_unlink +#define mq_close_ mq_close +#define ptrace_ ptrace +#define PTRACE_PEEKDATA_ PTRACE_PEEKDATA + +/* Features available */ +#define HAVE_DUP3 +#define HAVE_PIPE2 +#include /* for setfsgid()/setfsuid() */ +#define HAVE_SETFSUID +#define HAVE_SETFSGID +#define HAVE_READAHEAD +#define HAVE_SEND_RECV_MMSG +#define HAVE_SYNCFS +#define HAVE_SYNC_FILE_RANGE +#include /* for vmsplice */ +#define HAVE_TEE +#define HAVE_SPLICE +#define HAVE_VMSPLICE +#define HAVE_PSELECT +#define HAVE_PPOLL +#define HAVE_EXECVEAT +#define HAVE_SYSCALL +#define HAVE_MKNOD_REG +#define HAVE_MKNOD_SOCKET +/* + * O_BENEATH is arch-specific, via ; however we cannot include both that file + * and the normal as they have some clashing definitions. Bypass by directly + * defining O_BENEATH, using the current proposed x86 value. (This will therefore not + * work for non-x86, and may need changing in future if a different value gets merged.) + */ +#ifndef O_BENEATH +#define O_BENEATH 040000000 /* no / or .. in openat path */ +#endif + + +/* Linux allows anyone to call mlock[all]/munlock[all] */ +#define MLOCK_REQUIRES_ROOT 0 +/* Linux allows anyone to call sched_setscheduler */ +#define SCHED_SETSCHEDULER_REQUIRES_ROOT 1 + +#endif /* Linux */ + +#endif /*__SYSCALLS_H__*/ Index: head/contrib/capsicum-test/sysctl.cc =================================================================== --- head/contrib/capsicum-test/sysctl.cc +++ head/contrib/capsicum-test/sysctl.cc @@ -0,0 +1,15 @@ +#include "capsicum.h" +#include "capsicum-test.h" + +#ifdef HAVE_SYSCTL +#include + +// Certain sysctls are permitted in capability mode, but most are not. Test +// for the ones that should be, and try one or two that shouldn't. +TEST(Sysctl, Capability) { + int oid[2] = {CTL_KERN, KERN_OSRELDATE}; + int ii; + size_t len = sizeof(ii); + EXPECT_OK(sysctl(oid, 2, &ii, &len, NULL, 0)); +} +#endif Index: head/contrib/capsicum-test/waittest.c =================================================================== --- head/contrib/capsicum-test/waittest.c +++ head/contrib/capsicum-test/waittest.c @@ -0,0 +1,42 @@ +#include +#include +#include +#include +#include +#include +#include + +#ifdef __FreeBSD__ +#include +#endif + +#ifdef __linux__ +#include +int pdfork(int *fd, int flags) { + return syscall(__NR_pdfork, fd, flags); +} +#endif + +int main() { + int procfd; + int rc = pdfork(&procfd, 0); + if (rc < 0) { + fprintf(stderr, "pdfork() failed rc=%d errno=%d %s\n", rc, errno, strerror(errno)); + exit(1); + } + if (rc == 0) { // Child process + sleep(1); + exit(123); + } + fprintf(stderr, "pdfork()ed child pid=%ld procfd=%d\n", (long)rc, procfd); + sleep(2); // Allow child to complete + pid_t child = waitpid(-1, &rc, WNOHANG); + if (child == 0) { + fprintf(stderr, "waitpid(): no completed child found\n"); + } else if (child < 0) { + fprintf(stderr, "waitpid(): failed errno=%d %s\n", errno, strerror(errno)); + } else { + fprintf(stderr, "waitpid(): found completed child %ld\n", (long)child); + } + return 0; +} Index: head/tests/sys/capsicum/Makefile =================================================================== --- head/tests/sys/capsicum/Makefile +++ head/tests/sys/capsicum/Makefile @@ -1,11 +1,56 @@ # $FreeBSD$ +.include + TESTSDIR= ${TESTSBASE}/sys/capsicum ATF_TESTS_C+= bindat_connectat ATF_TESTS_C+= ioctls_test CFLAGS+= -I${SRCTOP}/tests + +.if ${MK_GOOGLETEST} != no + +.PATH: ${SRCTOP}/contrib/capsicum-test + +GTESTS+= capsicum-test + +SRCS.capsicum-test+= \ + capsicum-test-main.cc \ + capsicum-test.cc \ + capability-fd.cc \ + fexecve.cc \ + procdesc.cc \ + capmode.cc \ + fcntl.cc \ + ioctl.cc \ + openat.cc \ + sysctl.cc \ + select.cc \ + mqueue.cc \ + socket.cc \ + sctp.cc \ + capability-fd-pair.cc \ + overhead.cc \ + rename.cc + +LIBADD.capsicum-test+= gtest pthread +TEST_METADATA.capsicum-test= required_user="unprivileged" + +.for p in mini-me mini-me.noexec mini-me.setuid +PROGS+= $p +NO_SHARED.$p= +SRCS.$p= mini-me.c +.endfor + +BINDIR= ${TESTSDIR} + +BINMODE.mini-me.noexec= ${NOBINMODE} +BINMODE.mini-me.setuid= 4555 + +WARNS.capsicum-test= 3 + +.endif WARNS?= 6