Index: lib/libc/include/libc_private.h =================================================================== --- lib/libc/include/libc_private.h +++ lib/libc/include/libc_private.h @@ -320,6 +320,7 @@ struct iovec; struct kevent; struct msghdr; +struct open_how; struct pollfd; struct rusage; struct sigaction; @@ -362,6 +363,7 @@ int __sys_nanosleep(const struct timespec *, struct timespec *); int __sys_open(const char *, int, ...); int __sys_openat(int, const char *, int, ...); +long __sys_openat2(int, const char *, struct open_how *, __size_t); int __sys_pdfork(int *, int); int __sys_pselect(int, struct fd_set *, struct fd_set *, struct fd_set *, const struct timespec *, Index: lib/libc/sys/Symbol.map =================================================================== --- lib/libc/sys/Symbol.map +++ lib/libc/sys/Symbol.map @@ -422,6 +422,7 @@ timerfd_create; timerfd_gettime; timerfd_settime; + openat2; }; FBSDprivate_1.0 { @@ -813,6 +814,8 @@ __sys_open; _openat; __sys_openat; + _openat2; + __sys_openat2; _pathconf; __sys_pathconf; __sys_pdfork; Index: lib/libc/sys/openat2.c =================================================================== --- /dev/null +++ lib/libc/sys/openat2.c @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2023 The FreeBSD Foundation. + * All rights reserved. + * + * Portions of this software were developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice(s), this list of conditions and the following disclaimer as + * the first lines of this file unmodified other than the possible + * addition of one or more copyright notices. + * 2. Redistributions in binary form must reproduce the above copyright + * notice(s), this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include "libc_private.h" + +long +openat2(int fd, const char *path, struct open_how *how, size_t size) +{ + return (sys_openat2(fd, path, how, size)); +} Index: sys/bsm/audit_kevents.h =================================================================== --- sys/bsm/audit_kevents.h +++ sys/bsm/audit_kevents.h @@ -662,6 +662,7 @@ #define AUE_AIO_READV 43268 /* FreeBSD-specific. */ #define AUE_FSPACECTL 43269 /* FreeBSD-specific. */ #define AUE_TIMERFD 43270 /* FreeBSD/Linux. */ +#define AUE_OPENAT2 43271 /* FreeBSD/Linux. */ /* * Darwin BSM uses a number of AUE_O_* definitions, which are aliased to the Index: sys/compat/freebsd32/freebsd32_syscall.h =================================================================== --- sys/compat/freebsd32/freebsd32_syscall.h +++ sys/compat/freebsd32/freebsd32_syscall.h @@ -506,4 +506,5 @@ #define FREEBSD32_SYS_timerfd_create 585 #define FREEBSD32_SYS_freebsd32_timerfd_gettime 586 #define FREEBSD32_SYS_freebsd32_timerfd_settime 587 -#define FREEBSD32_SYS_MAXSYSCALL 588 +#define FREEBSD32_SYS_openat2 588 +#define FREEBSD32_SYS_MAXSYSCALL 589 Index: sys/compat/freebsd32/freebsd32_syscalls.c =================================================================== --- sys/compat/freebsd32/freebsd32_syscalls.c +++ sys/compat/freebsd32/freebsd32_syscalls.c @@ -593,4 +593,5 @@ "timerfd_create", /* 585 = timerfd_create */ "freebsd32_timerfd_gettime", /* 586 = freebsd32_timerfd_gettime */ "freebsd32_timerfd_settime", /* 587 = freebsd32_timerfd_settime */ + "openat2", /* 588 = openat2 */ }; Index: sys/compat/freebsd32/freebsd32_sysent.c =================================================================== --- sys/compat/freebsd32/freebsd32_sysent.c +++ sys/compat/freebsd32/freebsd32_sysent.c @@ -649,4 +649,5 @@ { .sy_narg = AS(timerfd_create_args), .sy_call = (sy_call_t *)sys_timerfd_create, .sy_auevent = AUE_TIMERFD, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 585 = timerfd_create */ { .sy_narg = AS(freebsd32_timerfd_gettime_args), .sy_call = (sy_call_t *)freebsd32_timerfd_gettime, .sy_auevent = AUE_TIMERFD, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 586 = freebsd32_timerfd_gettime */ { .sy_narg = AS(freebsd32_timerfd_settime_args), .sy_call = (sy_call_t *)freebsd32_timerfd_settime, .sy_auevent = AUE_TIMERFD, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 587 = freebsd32_timerfd_settime */ + { .sy_narg = AS(openat2_args), .sy_call = (sy_call_t *)sys_openat2, .sy_auevent = AUE_OPENAT2, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 588 = openat2 */ }; Index: sys/compat/freebsd32/freebsd32_systrace_args.c =================================================================== --- sys/compat/freebsd32/freebsd32_systrace_args.c +++ sys/compat/freebsd32/freebsd32_systrace_args.c @@ -3357,6 +3357,16 @@ *n_args = 4; break; } + /* openat2 */ + case 588: { + struct openat2_args *p = params; + iarg[a++] = p->fd; /* int */ + uarg[a++] = (intptr_t)p->path; /* const char * */ + uarg[a++] = (intptr_t)p->how; /* struct open_how * */ + uarg[a++] = p->size; /* size_t */ + *n_args = 4; + break; + } default: *n_args = 0; break; @@ -9067,6 +9077,25 @@ break; }; break; + /* openat2 */ + case 588: + switch (ndx) { + case 0: + p = "int"; + break; + case 1: + p = "userland const char *"; + break; + case 2: + p = "userland struct open_how *"; + break; + case 3: + p = "size_t"; + break; + default: + break; + }; + break; default: break; }; @@ -10945,6 +10974,11 @@ if (ndx == 0 || ndx == 1) p = "int"; break; + /* openat2 */ + case 588: + if (ndx == 0 || ndx == 1) + p = "long"; + break; default: break; }; Index: sys/kern/init_sysent.c =================================================================== --- sys/kern/init_sysent.c +++ sys/kern/init_sysent.c @@ -648,4 +648,5 @@ { .sy_narg = AS(timerfd_create_args), .sy_call = (sy_call_t *)sys_timerfd_create, .sy_auevent = AUE_TIMERFD, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 585 = timerfd_create */ { .sy_narg = AS(timerfd_gettime_args), .sy_call = (sy_call_t *)sys_timerfd_gettime, .sy_auevent = AUE_TIMERFD, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 586 = timerfd_gettime */ { .sy_narg = AS(timerfd_settime_args), .sy_call = (sy_call_t *)sys_timerfd_settime, .sy_auevent = AUE_TIMERFD, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 587 = timerfd_settime */ + { .sy_narg = AS(openat2_args), .sy_call = (sy_call_t *)sys_openat2, .sy_auevent = AUE_OPENAT2, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 588 = openat2 */ }; Index: sys/kern/syscalls.c =================================================================== --- sys/kern/syscalls.c +++ sys/kern/syscalls.c @@ -593,4 +593,5 @@ "timerfd_create", /* 585 = timerfd_create */ "timerfd_gettime", /* 586 = timerfd_gettime */ "timerfd_settime", /* 587 = timerfd_settime */ + "openat2", /* 588 = openat2 */ }; Index: sys/kern/syscalls.master =================================================================== --- sys/kern/syscalls.master +++ sys/kern/syscalls.master @@ -3329,6 +3329,13 @@ _Out_opt_ _Contains_long_timet_ struct itimerspec *old_value ); } - +588 AUE_OPENAT2 STD { + long openat2( + int fd, + _In_z_ const char *path, + struct open_how *how, + size_t size + ); + } ; vim: syntax=off Index: sys/kern/systrace_args.c =================================================================== --- sys/kern/systrace_args.c +++ sys/kern/systrace_args.c @@ -3444,6 +3444,16 @@ *n_args = 4; break; } + /* openat2 */ + case 588: { + struct openat2_args *p = params; + iarg[a++] = p->fd; /* int */ + uarg[a++] = (intptr_t)p->path; /* const char * */ + uarg[a++] = (intptr_t)p->how; /* struct open_how * */ + uarg[a++] = p->size; /* size_t */ + *n_args = 4; + break; + } default: *n_args = 0; break; @@ -9212,6 +9222,25 @@ break; }; break; + /* openat2 */ + case 588: + switch (ndx) { + case 0: + p = "int"; + break; + case 1: + p = "userland const char *"; + break; + case 2: + p = "userland struct open_how *"; + break; + case 3: + p = "size_t"; + break; + default: + break; + }; + break; default: break; }; @@ -11180,6 +11209,11 @@ if (ndx == 0 || ndx == 1) p = "int"; break; + /* openat2 */ + case 588: + if (ndx == 0 || ndx == 1) + p = "long"; + break; default: break; }; Index: sys/kern/vfs_lookup.c =================================================================== --- sys/kern/vfs_lookup.c +++ sys/kern/vfs_lookup.c @@ -86,6 +86,9 @@ ndp->ni_cnd.cn_flags |= ISRESTARTED; \ } while (0) +FEATURE(rnosymlink, "supports RESOLVE_NO_SYMLINK"); +FEATURE(rbeneath, "supports RESOLVE_BENEATH"); + SDT_PROVIDER_DEFINE(vfs); SDT_PROBE_DEFINE4(vfs, namei, lookup, entry, "struct vnode *", "char *", "unsigned long", "bool"); @@ -1330,6 +1333,11 @@ error = ENOENT; goto bad2; } + if (cnp->cn_flags & RNOSYMLINK) { + /* Linux openat2() behavior for RESOLVE_NO_SYMLINKS */ + error = ELOOP; + goto bad2; + } if (dp->v_mount->mnt_flag & MNT_NOSYMFOLLOW) { error = EACCES; goto bad2; Index: sys/kern/vfs_syscalls.c =================================================================== --- sys/kern/vfs_syscalls.c +++ sys/kern/vfs_syscalls.c @@ -1109,6 +1109,66 @@ uap->mode)); } +#ifndef _SYS_SYSPROTO_H_ +struct openat2_args { + int fd; + char *path; + struct open_how *how; + size_t size; +}; +#endif + +static int +build_open_flags(struct openat2_args *uap, int *flag_out, int *mode_out) +{ + int flag, error; + struct open_how how; + + if (uap->size > sizeof(struct open_how)) + return (-E2BIG); + + if ((uap->how == NULL) || (uap->size != sizeof(struct open_how))) + return (-EINVAL); + + error = copyin(uap->how, &how, sizeof(struct open_how)); + if (error != 0) + return (error); + + if (how.resolve & ~VALID_RESOLVE_FLAGS) + return (-EINVAL); + + if (how.flags >= INT32_MAX) + return (-EINVAL); + + if (how.mode >= INT32_MAX) + return (-EINVAL); + + flag = how.flags; + if (how.resolve & RESOLVE_NO_SYMLINKS) + flag |= O_RESOLVE_NO_SYMLINKS; + + if (how.resolve & RESOLVE_BENEATH) + flag |= O_RESOLVE_BENEATH; + + *flag_out = flag; + *mode_out = how.mode; + return (0); +} + +long +sys_openat2(struct thread *td, struct openat2_args *uap) +{ + AUDIT_ARG_FD(uap->fd); + int error, flag, mode; + + error = build_open_flags(uap, &flag, &mode); + if (error) + return (error); + + return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, flag, + mode)); +} + int kern_openat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, int flags, int mode) Index: sys/kern/vfs_vnops.c =================================================================== --- sys/kern/vfs_vnops.c +++ sys/kern/vfs_vnops.c @@ -200,6 +200,8 @@ res = ISOPEN | LOCKLEAF; if ((fmode & O_RESOLVE_BENEATH) != 0) res |= RBENEATH; + if ((fmode & O_RESOLVE_NO_SYMLINKS) != 0) + res |= RNOSYMLINK; if ((fmode & O_EMPTY_PATH) != 0) res |= EMPTYPATH; if ((fmode & FREAD) != 0) Index: sys/sys/fcntl.h =================================================================== --- sys/sys/fcntl.h +++ sys/sys/fcntl.h @@ -45,6 +45,7 @@ #include #include +#include #ifndef _MODE_T_DECLARED typedef __mode_t mode_t; @@ -147,6 +148,7 @@ */ #ifdef _KERNEL +#define O_RESOLVE_NO_SYMLINKS 0x04000000 /* Only for devfs d_close() flags. */ #define FLASTCLOSE O_DIRECTORY @@ -379,6 +381,7 @@ int flock(int, int); int fspacectl(int, int, const struct spacectl_range *, int, struct spacectl_range *); +long openat2(int, const char *, struct open_how *, __size_t); #endif #if __POSIX_VISIBLE >= 200809 int openat(int, const char *, int, ...); Index: sys/sys/namei.h =================================================================== --- sys/sys/namei.h +++ sys/sys/namei.h @@ -150,6 +150,7 @@ #define LOCKSHARED 0x0100 /* Shared lock leaf */ #define NOFOLLOW 0x0000 /* do not follow symbolic links (pseudo) */ #define RBENEATH 0x100000000ULL /* No escape, even tmp, from start dir */ +#define RNOSYMLINK 0x200000000ULL /* Do not follow any symbolic links */ #define MODMASK 0xf000001ffULL /* mask of operational modifiers */ /* Index: sys/sys/openat2.h =================================================================== --- /dev/null +++ sys/sys/openat2.h @@ -0,0 +1,64 @@ +/*- + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * @(#)fcntl.h 8.5 (Berkeley) 5/4/95 + * $FreeBSD$ + */ + +#ifndef _SYS__OPENAT2_H_ +#define _SYS__OPENAT2_H_ + +#include + +struct open_how { + uint64_t flags; + uint64_t mode; + uint64_t resolve; +}; + +/* + * resolve flags for openat2(2). + */ +#define RESOLVE_NO_XDEV 0x01 /* NOT IMPLEMENTED - Block mount-point crossings. */ +#define RESOLVE_NO_MAGICLINKS 0x02 /* NOT IMPLEMENTED - in linux, this blocks + traversal through procfs-style + "magic-links" */ +#define RESOLVE_NO_SYMLINKS 0x04 /* Block traversao through all symlinks */ +#define RESOLVE_BENEATH 0x08 /* Make all jumps to "/" and ".." be + scoped within the dirfd. Identical behavior + to openat(2) flag O_RESOLVE_BENEATH */ +#define RESOLVE_IN_ROOT 0x10 /* NOT IMPLEMENTED - in linux, make all jumps to + "/" and ".." be scoed inside the dirfd + (similar to chroot(2)). */ +#define RESOLVE_CACHED 0x20 /* Only complete if resolution can be completed + via cached lookup. */ + +#define VALID_RESOLVE_FLAGS \ + (RESOLVE_NO_SYMLINKS | RESOLVE_BENEATH) +#endif /* !_SYS__SYS_OPENAT2_H_ */ Index: sys/sys/syscall.h =================================================================== --- sys/sys/syscall.h +++ sys/sys/syscall.h @@ -524,4 +524,5 @@ #define SYS_timerfd_create 585 #define SYS_timerfd_gettime 586 #define SYS_timerfd_settime 587 -#define SYS_MAXSYSCALL 588 +#define SYS_openat2 588 +#define SYS_MAXSYSCALL 589 Index: sys/sys/syscall.mk =================================================================== --- sys/sys/syscall.mk +++ sys/sys/syscall.mk @@ -427,4 +427,5 @@ membarrier.o \ timerfd_create.o \ timerfd_gettime.o \ - timerfd_settime.o + timerfd_settime.o \ + openat2.o Index: sys/sys/sysproto.h =================================================================== --- sys/sys/sysproto.h +++ sys/sys/sysproto.h @@ -1870,6 +1870,12 @@ char new_value_l_[PADL_(const struct itimerspec *)]; const struct itimerspec * new_value; char new_value_r_[PADR_(const struct itimerspec *)]; char old_value_l_[PADL_(struct itimerspec *)]; struct itimerspec * old_value; char old_value_r_[PADR_(struct itimerspec *)]; }; +struct openat2_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; + char how_l_[PADL_(struct open_how *)]; struct open_how * how; char how_r_[PADR_(struct open_how *)]; + char size_l_[PADL_(size_t)]; size_t size; char size_r_[PADR_(size_t)]; +}; int sys_exit(struct thread *, struct exit_args *); int sys_fork(struct thread *, struct fork_args *); int sys_read(struct thread *, struct read_args *); @@ -2268,6 +2274,7 @@ int sys_timerfd_create(struct thread *, struct timerfd_create_args *); int sys_timerfd_gettime(struct thread *, struct timerfd_gettime_args *); int sys_timerfd_settime(struct thread *, struct timerfd_settime_args *); +long sys_openat2(struct thread *, struct openat2_args *); #ifdef COMPAT_43 @@ -3246,6 +3253,7 @@ #define SYS_AUE_timerfd_create AUE_TIMERFD #define SYS_AUE_timerfd_gettime AUE_TIMERFD #define SYS_AUE_timerfd_settime AUE_TIMERFD +#define SYS_AUE_openat2 AUE_OPENAT2 #undef PAD_ #undef PADL_