Index: lib/libc/include/libc_private.h =================================================================== --- lib/libc/include/libc_private.h +++ lib/libc/include/libc_private.h @@ -213,6 +213,7 @@ INTERPOS_msync, INTERPOS_nanosleep, INTERPOS_openat, + INTERPOS_openat2, INTERPOS_poll, INTERPOS_pselect, INTERPOS_recvfrom, @@ -320,6 +321,7 @@ struct iovec; struct kevent; struct msghdr; +struct open_how; struct pollfd; struct rusage; struct sigaction; @@ -362,6 +364,7 @@ int __sys_nanosleep(const struct timespec *, struct timespec *); int __sys_open(const char *, int, ...); int __sys_openat(int, const char *, int, ...); +int __sys_openat2(int, const char *, struct open_how *, __size_t); int __sys_pdfork(int *, int); int __sys_pselect(int, struct fd_set *, struct fd_set *, struct fd_set *, const struct timespec *, Index: lib/libc/sys/Symbol.map =================================================================== --- lib/libc/sys/Symbol.map +++ lib/libc/sys/Symbol.map @@ -424,6 +424,10 @@ timerfd_settime; }; +FBSD_1.8 { + openat2; +}; + FBSDprivate_1.0 { ___acl_aclcheck_fd; __sys___acl_aclcheck_fd; Index: lib/libc/sys/openat2.c =================================================================== --- /dev/null +++ lib/libc/sys/openat2.c @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2024 iXsystems, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice(s), this list of conditions and the following disclaimer as + * the first lines of this file unmodified other than the possible + * addition of one or more copyright notices. + * 2. Redistributions in binary form must reproduce the above copyright + * notice(s), this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include "libc_private.h" + +int +openat2(int fd, const char *path, struct open_how *how, size_t size) +{ + return (((int (*)(int, const char *, struct open_how *, size_t)) + __libc_interposing[INTERPOS_openat2])(fd, path, how, size)); +} Index: sys/kern/syscalls.master =================================================================== --- sys/kern/syscalls.master +++ sys/kern/syscalls.master @@ -3329,6 +3329,13 @@ _Out_opt_ _Contains_long_timet_ struct itimerspec *old_value ); } - +588 AUE_OPENAT STD { + int openat2( + int fd, + _In_z_ const char *path, + _In_reads_bytes(size) struct open_how *how, + size_t size + ); + } ; vim: syntax=off Index: sys/kern/vfs_lookup.c =================================================================== --- sys/kern/vfs_lookup.c +++ sys/kern/vfs_lookup.c @@ -1330,6 +1330,11 @@ error = ENOENT; goto bad2; } + if (cnp->cn_flags & RNOSYMLINK) { + /* Linux openat2() behavior for RESOLVE_NO_SYMLINKS */ + error = ELOOP; + goto bad2; + } if (dp->v_mount->mnt_flag & MNT_NOSYMFOLLOW) { error = EACCES; goto bad2; Index: sys/kern/vfs_syscalls.c =================================================================== --- sys/kern/vfs_syscalls.c +++ sys/kern/vfs_syscalls.c @@ -105,6 +105,8 @@ size_t count, struct thread *td); static int kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, const char *path, enum uio_seg segflag); +static int kern_openat2(struct thread *td, int fd, const char *path, + enum uio_seg pathseg, int flags, int mode, uint64_t flags2); uint64_t at2cnpflags(u_int at_flags, u_int mask) @@ -1088,8 +1090,8 @@ sys_open(struct thread *td, struct open_args *uap) { - return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, - uap->flags, uap->mode)); + return (kern_openat2(td, AT_FDCWD, uap->path, UIO_USERSPACE, + uap->flags, uap->mode, 0)); } #ifndef _SYS_SYSPROTO_H_ @@ -1105,13 +1107,61 @@ { AUDIT_ARG_FD(uap->fd); - return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, - uap->mode)); + return (kern_openat2(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, + uap->mode, 0)); } int kern_openat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, int flags, int mode) +{ + return (kern_openat2(td, fd, path, pathseg, flags, mode, 0)); +} + +#ifndef _SYS_SYSPROTO_H_ +struct openat2_args { + int fd; + char *path; + struct open_how *how; + size_t size; +}; +#endif + +int +sys_openat2(struct thread *td, struct openat2_args *uap) +{ + + AUDIT_ARG_FD(uap->fd); + int error; + uint64_t resolve = 0; + struct open_how how; + + if (uap->size > sizeof(struct open_how)) + return (E2BIG); + + if ((uap->size != sizeof(struct open_how))) + return (EINVAL); + + error = copyin(uap->how, &how, sizeof(struct open_how)); + if (error != 0) + return (error); + + if (how.resolve & RESOLVE_NO_XDEV) + resolve |= VN_OPEN_RES_NO_XDEV; + + if (how.resolve & RESOLVE_BENEATH) + resolve |= VN_OPEN_RES_BENEATH; + + if (how.resolve & RESOLVE_NO_SYMLINKS) + resolve |= VN_OPEN_RES_NO_SYMLINK; + + return (kern_openat2(td, uap->fd, uap->path, UIO_USERSPACE, + how.flags, how.mode, resolve)); +} + +static int +kern_openat2(struct thread *td, int fd, const char *path, enum uio_seg pathseg, + int flags, int mode, uint64_t flags2) { struct proc *p = td->td_proc; struct filedesc *fdp; @@ -1160,7 +1210,7 @@ NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | WANTIOCTLCAPS, pathseg, path, fd, &rights); td->td_dupfd = -1; /* XXX check for fdopen */ - error = vn_open_cred(&nd, &flags, cmode, VN_OPEN_WANTIOCTLCAPS, + error = vn_open_cred(&nd, &flags, cmode, VN_OPEN_WANTIOCTLCAPS | flags2, td->td_ucred, fp); if (error != 0) { /* Index: sys/kern/vfs_vnops.c =================================================================== --- sys/kern/vfs_vnops.c +++ sys/kern/vfs_vnops.c @@ -212,6 +212,12 @@ res |= NOCAPCHECK; if ((vn_open_flags & VN_OPEN_WANTIOCTLCAPS) != 0) res |= WANTIOCTLCAPS; + if ((vn_open_flags & VN_OPEN_RES_BENEATH) != 0) + res |= RBENEATH; + if ((vn_open_flags & VN_OPEN_RES_NO_XDEV) != 0) + res |= NOCROSSMOUNT; + if ((vn_open_flags & VN_OPEN_RES_NO_SYMLINK) != 0) + res |= RNOSYMLINK; return (res); } Index: sys/sys/fcntl.h =================================================================== --- sys/sys/fcntl.h +++ sys/sys/fcntl.h @@ -45,6 +45,7 @@ #include #include +#include #ifndef _MODE_T_DECLARED typedef __mode_t mode_t; @@ -379,6 +380,7 @@ int flock(int, int); int fspacectl(int, int, const struct spacectl_range *, int, struct spacectl_range *); +int openat2(int, const char *, struct open_how *, __size_t); #endif #if __POSIX_VISIBLE >= 200809 int openat(int, const char *, int, ...); Index: sys/sys/namei.h =================================================================== --- sys/sys/namei.h +++ sys/sys/namei.h @@ -150,6 +150,7 @@ #define LOCKSHARED 0x0100 /* Shared lock leaf */ #define NOFOLLOW 0x0000 /* do not follow symbolic links (pseudo) */ #define RBENEATH 0x100000000ULL /* No escape, even tmp, from start dir */ +#define RNOSYMLINK 0x200000000ULL /* Do not follow any symlinks */ #define MODMASK 0xf000001ffULL /* mask of operational modifiers */ /* Index: sys/sys/openat2.h =================================================================== --- /dev/null +++ sys/sys/openat2.h @@ -0,0 +1,51 @@ +/*- + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 2024 iXsystems, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * @(#)fcntl.h 8.5 (Berkeley) 5/4/95 + * $FreeBSD$ + */ + +#ifndef _SYS__OPENAT2_H_ +#define _SYS__OPENAT2_H_ + +#include + +struct open_how { + uint64_t flags; + uint64_t mode; + uint64_t resolve; +}; + +/* + * resolve flags for openat2(2). + */ +#define RESOLVE_NO_XDEV 0x01 /* Block mount-point crossings. */ +#define RESOLVE_NO_SYMLINKS 0x04 /* Block traverse through all symlinks */ +#define RESOLVE_BENEATH 0x08 /* Block lexical "..", symlinks, and absolute + paths which escape the dirfd */ +#endif /* !_SYS__SYS_OPENAT2_H_ */ Index: sys/sys/vnode.h =================================================================== --- sys/sys/vnode.h +++ sys/sys/vnode.h @@ -613,6 +613,11 @@ #define VN_OPEN_INVFS 0x00000008 #define VN_OPEN_WANTIOCTLCAPS 0x00000010 +/* vn_open_flags related to path resolution */ +#define VN_OPEN_RES_NO_XDEV 0x00010000 +#define VN_OPEN_RES_BENEATH 0x00020000 +#define VN_OPEN_RES_NO_SYMLINK 0x00040000 + /* copy_file_range kernel flags */ #define COPY_FILE_RANGE_KFLAGS 0xff000000 #define COPY_FILE_RANGE_TIMEO1SEC 0x01000000 /* Return after 1sec. */