Index: lib/libc/sys/Makefile.inc =================================================================== --- lib/libc/sys/Makefile.inc +++ lib/libc/sys/Makefile.inc @@ -234,6 +234,7 @@ listen.2 \ lseek.2 \ madvise.2 \ + memfd_create.2 \ mincore.2 \ minherit.2 \ mkdir.2 \ Index: lib/libc/sys/Symbol.map =================================================================== --- lib/libc/sys/Symbol.map +++ lib/libc/sys/Symbol.map @@ -408,6 +408,7 @@ fhreadlink; getfhat; funlinkat; + memfd_create; }; FBSDprivate_1.0 { @@ -738,6 +739,8 @@ __sys_mac_syscall; _madvise; __sys_madvise; + _memfd_create; + __sys_memfd_create; _mincore; __sys_mincore; _minherit; Index: lib/libc/sys/memfd_create.2 =================================================================== --- /dev/null +++ lib/libc/sys/memfd_create.2 @@ -0,0 +1,107 @@ +.\" SPDX-License-Identifier: BSD-2-Clause +.\" +.\" Copyright (c) 2019 Kyle Evans +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd August 24, 2019 +.Dt MEMFD_CREATE 2 +.Os +.Sh NAME +.Nm memfd_create +.Nd Create anonymous shared memory object +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In sys/mman.h +.Ft int +.Fo memfd_create +.Fa "const char *name" +.Fa "unsigned int flags" +.Fc +.Sh DESCRIPTION +The +.Fn memfd_create +system call +creates an anonymous shared memory object, identical to that created by +.Xr shm_open 2 +when +.Dv SHM_ANON +is specified. +Newly created objects start off with a size of zero. +The size of the object can be adjusted via +.Xr ftruncate 2 . +.Pp +The +.Fa name +argument is currently unused. +The following +.Fa flags +may be specified: +.Bl -tag -width MFD_ALLOW_SEALING +.It Dv MFD_CLOEXEC +Set +.Dv FD_CLOEXEC +on the resulting file descriptor. +.It Dv MFD_ALLOW_SEALING +Allow adding seals to the resulting file descriptor using the +.Dv F_ADD_SEALS +.Xr fcntl 2 . +.It Dv MFD_HUGETLB +This flag is currently unsupported. +.El +.Sh RETURN VALUES +If successful, +.Fn memfd_create +returns the file descriptor for the opened file; otherwise the value \-1 is +returned and the global variable +.Va errno +is set to indicate the error. +.Fn memfd_ +.Sh ERRORS +In addition to the errors returned by +.Xr shm_open 2 , +.Fn memfd_create +will return +.Bl -tag -width Er +.It Bq Er EINVAL +Any flags not listed above were specified. +.It Bq Er ENOSYS +.Dv MFD_HUGETLB +has been specified, and this system does not support forced hugetlb mappings. +.El +.Sh SEE ALSO +.Xr fcntl 2 , +.Xr ftruncate 2 , +.Xr shm_open 2 +.Sh STANDARDS +The +.Fn memfd_create +system call is expected to be compatible with the Linux system call of +the same name. +.Sh HISTORY +The +.Fn memfd_create +function appeared in +.Fx 13.0 . Index: sys/compat/freebsd32/syscalls.master =================================================================== --- sys/compat/freebsd32/syscalls.master +++ sys/compat/freebsd32/syscalls.master @@ -1150,5 +1150,6 @@ 569 AUE_NULL NOPROTO { ssize_t copy_file_range(int infd, \ off_t *inoffp, int outfd, off_t *outoffp, \ size_t len, unsigned int flags); } +570 AUE_NULL NOPROTO { int memfd_create(const char *path, int flags); } ; vim: syntax=off Index: sys/kern/capabilities.conf =================================================================== --- sys/kern/capabilities.conf +++ sys/kern/capabilities.conf @@ -409,6 +409,15 @@ ## Allow simple VM operations on the current process. ## madvise + +## +## Creates only anonymous objects via shm_open. +## +memfd_create + +## +## Allow simple VM operations on the current process. +## mincore minherit mlock Index: sys/kern/syscalls.master =================================================================== --- sys/kern/syscalls.master +++ sys/kern/syscalls.master @@ -3185,6 +3185,12 @@ unsigned int flags ); } +570 AUE_NULL STD { + int memfd_create( + _In_z_ const char *path, + unsigned int flags + ); + } ; Please copy any additions and changes to the following compatability tables: ; sys/compat/freebsd32/syscalls.master Index: sys/kern/uipc_shm.c =================================================================== --- sys/kern/uipc_shm.c +++ sys/kern/uipc_shm.c @@ -119,6 +119,7 @@ static void shm_insert(char *path, Fnv32_t fnv, struct shmfd *shmfd); static struct shmfd *shm_lookup(char *path, Fnv32_t fnv); static int shm_remove(char *path, Fnv32_t fnv, struct ucred *ucred); +int kern_memfd_create(struct thread *td, const char *name, int flags); static fo_rdwr_t shm_read; static fo_rdwr_t shm_write; @@ -1230,3 +1231,40 @@ CTLFLAG_RD | CTLFLAG_MPSAFE | CTLTYPE_OPAQUE, NULL, 0, sysctl_posix_shm_list, "", "POSIX SHM list"); + +int +kern_memfd_create(struct thread *td, const char *name, int flags) +{ + struct file *fp; + int err, shmflags; + + if ((flags & ~(MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_HUGETLB | + MFD_HUGE_FLAGS)) != 0) + return (EINVAL); + /* This implementation doesn't currently support HUGETLB */ + if ((flags & MFD_HUGETLB) != 0) + return (ENOSYS); + shmflags = O_RDWR; + if ((flags & MFD_CLOEXEC) != 0) + shmflags |= O_CLOEXEC; + err = kern_shm_open(td, SHM_ANON, shmflags, 0, NULL, &fp); + if (err != 0) + return (err); + /* + * Just need to additionally set FSEALABLE as needed. We can't handle + * this in kern_shm_open() because FSEALABLE shares a bit with O_TRUNC, + * because it's unused after the initial open call. + */ + if ((flags & MFD_ALLOW_SEALING) != 0) + fp->f_flag |= FSEALABLE; + + fdrop(fp, td); + return (err); +} + +int +sys_memfd_create(struct thread *td, struct memfd_create_args *uap) +{ + + return (kern_memfd_create(td, uap->path, uap->flags)); +} Index: sys/sys/mman.h =================================================================== --- sys/sys/mman.h +++ sys/sys/mman.h @@ -176,6 +176,36 @@ * Anonymous object constant for shm_open(). */ #define SHM_ANON ((char *)1) + +/* + * Flags for memfd_create() + */ +#define MFD_CLOEXEC 0x00000001 +#define MFD_ALLOW_SEALING 0x00000002 + +/* UNSUPPORTED */ +#define MFD_HUGETLB 0x00000004 + +#define MFD_HUGE_SHIFT 26 +#define MFD_HUGE_64KB (16 << MFD_HUGE_SHIFT) +#define MFD_HUGE_512KB (19 << MFD_HUGE_SHIFT) +#define MFD_HUGE_1MB (20 << MFD_HUGE_SHIFT) +#define MFD_HUGE_2MB (21 << MFD_HUGE_SHIFT) +#define MFD_HUGE_8MB (23 << MFD_HUGE_SHIFT) +#define MFD_HUGE_16MB (24 << MFD_HUGE_SHIFT) +#define MFD_HUGE_32MB (25 << MFD_HUGE_SHIFT) +#define MFD_HUGE_256MB (28 << MFD_HUGE_SHIFT) +#define MFD_HUGE_512MB (29 << MFD_HUGE_SHIFT) +#define MFD_HUGE_1GB (30 << MFD_HUGE_SHIFT) +#define MFD_HUGE_2GB (31 << MFD_HUGE_SHIFT) +#define MFD_HUGE_16GB (34 << MFD_HUGE_SHIFT) + +/* memfd_create(2) interfaces checks this for validity of flags */ +#define MFD_HUGE_FLAGS \ + (MFD_HUGE_64KB | MFD_HUGE_512KB | MFD_HUGE_1MB | MFD_HUGE_2MB | \ + MFD_HUGE_8MB | MFD_HUGE_16MB | MFD_HUGE_32MB | MFD_HUGE_256MB | \ + MFD_HUGE_512MB | MFD_HUGE_1GB | MFD_HUGE_2GB | MFD_HUGE_16GB) + #endif /* __BSD_VISIBLE */ /* @@ -285,6 +315,9 @@ int shm_open(const char *, int, mode_t); int shm_unlink(const char *); #endif +#if __BSD_VISIBLE +int memfd_create(const char *, unsigned int); +#endif __END_DECLS #endif /* !_KERNEL */ Index: tests/sys/kern/Makefile =================================================================== --- tests/sys/kern/Makefile +++ tests/sys/kern/Makefile @@ -9,6 +9,7 @@ ATF_TESTS_C+= kern_copyin ATF_TESTS_C+= kern_descrip_test ATF_TESTS_C+= kill_zombie +ATF_TESTS_C+= memfd_test ATF_TESTS_C+= ptrace_test TEST_METADATA.ptrace_test+= timeout="15" ATF_TESTS_C+= reaper Index: tests/sys/kern/memfd_test.c =================================================================== --- /dev/null +++ tests/sys/kern/memfd_test.c @@ -0,0 +1,222 @@ +/*- + * Copyright (c) 2019 Kyle Evans + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include + +#include +#include +#include + +ATF_TC_WITHOUT_HEAD(basic); +ATF_TC_BODY(basic, tc) +{ + int fd; + char buf[8]; + + ATF_REQUIRE((fd = memfd_create("...", 0)) != -1); + + /* File size should be initially 0 */ + ATF_REQUIRE(write(fd, buf, sizeof(buf)) == 0); + + /* ftruncate(2) must succeed without seals */ + ATF_REQUIRE(ftruncate(fd, sizeof(buf) - 1) == 0); + + ATF_REQUIRE(write(fd, buf, sizeof(buf)) == sizeof(buf) - 1); + + close(fd); +} + +ATF_TC_WITHOUT_HEAD(cloexec); +ATF_TC_BODY(cloexec, tc) +{ + int fd_nocl, fd_cl; + + ATF_REQUIRE((fd_nocl = memfd_create("...", 0)) != -1); + ATF_REQUIRE((fd_cl = memfd_create("...", MFD_CLOEXEC)) != -1); + + ATF_REQUIRE((fcntl(fd_nocl, F_GETFD) & FD_CLOEXEC) == 0); + ATF_REQUIRE((fcntl(fd_cl, F_GETFD) & FD_CLOEXEC) != 0); + + close(fd_nocl); + close(fd_cl); +} + +ATF_TC_WITHOUT_HEAD(disallowed_sealing); +ATF_TC_BODY(disallowed_sealing, tc) +{ + int fd; + + ATF_REQUIRE((fd = memfd_create("...", 0)) != -1); + ATF_REQUIRE(fcntl(fd, F_GET_SEALS) == -1); + ATF_REQUIRE(errno == EINVAL); + ATF_REQUIRE(fcntl(fd, F_ADD_SEALS, F_SEAL_WRITE) == -1); + ATF_REQUIRE(errno == EINVAL); + + close(fd); +} + +ATF_TC_WITHOUT_HEAD(write_seal); +ATF_TC_BODY(write_seal, tc) +{ + int fd; + char buf[8]; + + ATF_REQUIRE((fd = memfd_create("...", MFD_ALLOW_SEALING)) != -1); + ATF_REQUIRE(ftruncate(fd, sizeof(buf)) == 0); + + /* Write once, then we'll seal it and try again */ + ATF_REQUIRE(write(fd, buf, sizeof(buf)) == sizeof(buf)); + ATF_REQUIRE(lseek(fd, 0, SEEK_SET) == 0); + + ATF_REQUIRE(fcntl(fd, F_ADD_SEALS, F_SEAL_WRITE) == 0); + + ATF_REQUIRE(write(fd, buf, sizeof(buf)) == -1); + ATF_REQUIRE(errno == EPERM); + + close(fd); +} + +static int +memfd_truncate_test(int initial_size, int dest_size, int seals) +{ + int err, fd; + + ATF_REQUIRE((fd = memfd_create("...", MFD_ALLOW_SEALING)) != -1); + ATF_REQUIRE(ftruncate(fd, initial_size) == 0); + + ATF_REQUIRE(fcntl(fd, F_ADD_SEALS, seals) == 0); + + err = ftruncate(fd, dest_size); + if (err != 0) + err = errno; + close(fd); + return (err); +} + +ATF_TC_WITHOUT_HEAD(truncate_seals); +ATF_TC_BODY(truncate_seals, tc) +{ + + ATF_REQUIRE(memfd_truncate_test(4, 8, F_SEAL_GROW) == EPERM); + ATF_REQUIRE(memfd_truncate_test(8, 4, F_SEAL_SHRINK) == EPERM); + ATF_REQUIRE(memfd_truncate_test(8, 4, F_SEAL_GROW) == 0); + ATF_REQUIRE(memfd_truncate_test(4, 8, F_SEAL_SHRINK) == 0); + + ATF_REQUIRE(memfd_truncate_test(4, 8, F_SEAL_GROW | F_SEAL_SHRINK) == + EPERM); + ATF_REQUIRE(memfd_truncate_test(8, 4, F_SEAL_GROW | F_SEAL_SHRINK) == + EPERM); + ATF_REQUIRE(memfd_truncate_test(4, 4, F_SEAL_GROW | F_SEAL_SHRINK) == + 0); +} + +ATF_TC_WITHOUT_HEAD(get_seals); +ATF_TC_BODY(get_seals, tc) +{ + int fd; + int seals; + + ATF_REQUIRE((fd = memfd_create("...", MFD_ALLOW_SEALING)) != -1); + ATF_REQUIRE(fcntl(fd, F_GET_SEALS) == 0); + + ATF_REQUIRE(fcntl(fd, F_ADD_SEALS, F_SEAL_WRITE | F_SEAL_GROW) == 0); + seals = fcntl(fd, F_GET_SEALS); + ATF_REQUIRE(seals == (F_SEAL_WRITE | F_SEAL_GROW)); + + close(fd); +} + +ATF_TC_WITHOUT_HEAD(dup_seals); +ATF_TC_BODY(dup_seals, tc) +{ + char buf[8]; + int fd, fdx; + int seals; + + ATF_REQUIRE((fd = memfd_create("...", MFD_ALLOW_SEALING)) != -1); + ATF_REQUIRE((fdx = dup(fd)) != -1); + ATF_REQUIRE(fcntl(fd, F_GET_SEALS) == 0); + + ATF_REQUIRE(fcntl(fd, F_ADD_SEALS, F_SEAL_WRITE | F_SEAL_GROW) == 0); + seals = fcntl(fd, F_GET_SEALS); + ATF_REQUIRE(seals == (F_SEAL_WRITE | F_SEAL_GROW)); + + seals = fcntl(fdx, F_GET_SEALS); + ATF_REQUIRE(seals == (F_SEAL_WRITE | F_SEAL_GROW)); + + /* Make sure the seal's actually being applied at the inode level */ + ATF_REQUIRE(write(fdx, buf, sizeof(buf)) == -1); + ATF_REQUIRE(errno == EPERM); + + close(fd); + close(fdx); +} + +ATF_TC_WITHOUT_HEAD(immutable_seals); +ATF_TC_BODY(immutable_seals, tc) +{ + int fd; + + ATF_REQUIRE((fd = memfd_create("...", MFD_ALLOW_SEALING)) != -1); + + ATF_REQUIRE(fcntl(fd, F_ADD_SEALS, F_SEAL_SEAL) == 0); + ATF_REQUIRE(fcntl(fd, F_ADD_SEALS, F_SEAL_GROW) == -1); + ATF_REQUIRE_MSG(errno == EPERM, + "Added unique grow seal after restricting seals"); + + close(fd); + + /* + * Also check that adding a seal that already exists really doesn't + * do anything once we're sealed. + */ + ATF_REQUIRE((fd = memfd_create("...", MFD_ALLOW_SEALING)) != -1); + + ATF_REQUIRE(fcntl(fd, F_ADD_SEALS, F_SEAL_GROW | F_SEAL_SEAL) == 0); + ATF_REQUIRE(fcntl(fd, F_ADD_SEALS, F_SEAL_GROW) == -1); + ATF_REQUIRE_MSG(errno == EPERM, + "Added duplicate grow seal after restricting seals"); + close(fd); +} + + +ATF_TP_ADD_TCS(tp) +{ + + ATF_TP_ADD_TC(tp, basic); + ATF_TP_ADD_TC(tp, cloexec); + ATF_TP_ADD_TC(tp, disallowed_sealing); + ATF_TP_ADD_TC(tp, write_seal); + ATF_TP_ADD_TC(tp, truncate_seals); + ATF_TP_ADD_TC(tp, get_seals); + ATF_TP_ADD_TC(tp, dup_seals); + ATF_TP_ADD_TC(tp, immutable_seals); + return (atf_no_error()); +}