Index: lib/libc/sys/open.2 =================================================================== --- lib/libc/sys/open.2 +++ lib/libc/sys/open.2 @@ -142,6 +142,7 @@ O_DIRECTORY error if file is not a directory O_CLOEXEC set FD_CLOEXEC upon open O_VERIFY verify the contents of the file +O_BENEATH fails if outside current directory or its children .Ed .Pp Opening a file with @@ -269,6 +270,23 @@ The run-time linker (rtld) uses this flag to ensure shared objects have been verified before operating on them. .Pp +.Dv O_BENEATH +returns +.Er ENOTCAPABLE +if the specified path, after resolving all symlinks and ".." references +in it, does not reside in the directory hierarchy of children beneath +the starting directory, or is an absolute path. +Starting directory is the process current directory if relative +.Fa path +is used for +.Fn open , +and the directory referenced by the +.Fa fd +argument when specifying relative +.Fa path +for +.Fn openat . +.Pp If successful, .Fn open returns a non-negative integer, termed a file descriptor. @@ -487,9 +505,13 @@ was called and the process is in capability mode. .It Bq Er ENOTCAPABLE .Fa path -is an absolute path or contained a ".." component leading to a +is an absolute path, +or contained a ".." component leading to a directory outside of the directory hierarchy specified by -.Fa fd . +.Fa fd , +and the process is in capability mode or the +.Dv O_BENEATH +flag was provided. .El .Sh SEE ALSO .Xr chmod 2 , Index: lib/libc/sys/stat.2 =================================================================== --- lib/libc/sys/stat.2 +++ lib/libc/sys/stat.2 @@ -100,6 +100,13 @@ If .Fa path names a symbolic link, the status of the symbolic link is returned. +.It Dv AT_BENEATH +Do not allow to stat a file which is not a child of the starting directory. +See the description of the +.Dv O_BENEATH +flag in the +.Xr open 2 +manual page. .El .Pp If @@ -397,6 +404,12 @@ is neither .Dv AT_FDCWD nor a file descriptor associated with a directory. +.It Bq Er ENOTCAPABLE +.Fa path +was absolute or a ".." component leading to a directory outside of +the directory hierarchy rooted by the start directory, and the +.Dv AT_BENEATH +flag was specified. .El .Sh SEE ALSO .Xr access 2 , Index: sys/cddl/compat/opensolaris/sys/vnode.h =================================================================== --- sys/cddl/compat/opensolaris/sys/vnode.h +++ sys/cddl/compat/opensolaris/sys/vnode.h @@ -278,7 +278,7 @@ ASSERT(seg == UIO_SYSSPACE); ASSERT(dirflag == RMFILE); - return (kern_unlinkat(curthread, AT_FDCWD, fnamep, seg, 0)); + return (kern_unlinkat(curthread, AT_FDCWD, fnamep, seg, 0, 0)); } #endif /* _KERNEL */ Index: sys/compat/cloudabi/cloudabi_file.c =================================================================== --- sys/compat/cloudabi/cloudabi_file.c +++ sys/compat/cloudabi/cloudabi_file.c @@ -752,9 +752,9 @@ return (error); if (uap->flags & CLOUDABI_UNLINK_REMOVEDIR) - error = kern_rmdirat(td, uap->fd, path, UIO_SYSSPACE); + error = kern_rmdirat(td, uap->fd, path, UIO_SYSSPACE, 0); else - error = kern_unlinkat(td, uap->fd, path, UIO_SYSSPACE, 0); + error = kern_unlinkat(td, uap->fd, path, UIO_SYSSPACE, 0, 0); cloudabi_freestr(path); return (error); } Index: sys/compat/linux/linux_file.c =================================================================== --- sys/compat/linux/linux_file.c +++ sys/compat/linux/linux_file.c @@ -590,7 +590,7 @@ printf(ARGS(unlink, "%s"), path); #endif - error = kern_unlinkat(td, AT_FDCWD, path, UIO_SYSSPACE, 0); + error = kern_unlinkat(td, AT_FDCWD, path, UIO_SYSSPACE, 0, 0); if (error == EPERM) { /* Introduce POSIX noncompliant behaviour of Linux */ if (kern_statat(td, 0, AT_FDCWD, path, UIO_SYSSPACE, &st, @@ -623,9 +623,9 @@ #endif if (args->flag & LINUX_AT_REMOVEDIR) - error = kern_rmdirat(td, dfd, path, UIO_SYSSPACE); + error = kern_rmdirat(td, dfd, path, UIO_SYSSPACE, 0); else - error = kern_unlinkat(td, dfd, path, UIO_SYSSPACE, 0); + error = kern_unlinkat(td, dfd, path, UIO_SYSSPACE, 0, 0); if (error == EPERM && !(args->flag & LINUX_AT_REMOVEDIR)) { /* Introduce POSIX noncompliant behaviour of Linux */ if (kern_statat(td, AT_SYMLINK_NOFOLLOW, dfd, path, @@ -741,7 +741,7 @@ if (ldebug(rmdir)) printf(ARGS(rmdir, "%s"), path); #endif - error = kern_rmdirat(td, AT_FDCWD, path, UIO_SYSSPACE); + error = kern_rmdirat(td, AT_FDCWD, path, UIO_SYSSPACE, 0); LFREEPATH(path); return (error); } Index: sys/kern/vfs_lookup.c =================================================================== --- sys/kern/vfs_lookup.c +++ sys/kern/vfs_lookup.c @@ -242,7 +242,8 @@ struct componentname *cnp; cnp = &ndp->ni_cnd; - if ((ndp->ni_lcf & NI_LCF_STRICTRELATIVE) != 0) { + if ((ndp->ni_lcf & NI_LCF_STRICTRELATIVE) != 0 || + (cnp->cn_flags & BENEATH) != 0) { #ifdef KTRACE if (KTRPOINT(curthread, KTR_CAPFAIL)) ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL); @@ -434,8 +435,10 @@ vrele(dp); goto out; } - if ((ndp->ni_lcf & NI_LCF_STRICTRELATIVE) != 0 && - lookup_cap_dotdot != 0) + if (((ndp->ni_lcf & NI_LCF_STRICTRELATIVE) != 0 && + lookup_cap_dotdot != 0) || + ((ndp->ni_lcf & NI_LCF_STRICTRELATIVE) == 0 && + (cnp->cn_flags & BENEATH) != 0)) ndp->ni_lcf |= NI_LCF_CAP_DOTDOT; SDT_PROBE3(vfs, namei, lookup, entry, dp, cnp->cn_pnbuf, cnp->cn_flags); Index: sys/kern/vfs_mountroot.c =================================================================== --- sys/kern/vfs_mountroot.c +++ sys/kern/vfs_mountroot.c @@ -389,7 +389,7 @@ vfs_unbusy(mpdevfs); /* Unlink the no longer needed /dev/dev -> / symlink */ error = kern_unlinkat(td, AT_FDCWD, "/dev/dev", - UIO_SYSSPACE, 0); + UIO_SYSSPACE, 0, 0); if (error) printf("mountroot: unable to unlink /dev/dev " "(error %d)\n", error); Index: sys/kern/vfs_syscalls.c =================================================================== --- sys/kern/vfs_syscalls.c +++ sys/kern/vfs_syscalls.c @@ -1443,11 +1443,12 @@ int flag; flag = uap->flag; - if (flag & ~AT_SYMLINK_FOLLOW) + if ((flag & ~(AT_SYMLINK_FOLLOW | AT_BENEATH)) != 0) return (EINVAL); return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, - UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW)); + UIO_USERSPACE, ((flag & AT_SYMLINK_FOLLOW) != 0 ? FOLLOW : + NOFOLLOW) | ((flag & AT_BENEATH) != 0 ? BENEATH : 0))); } int hardlink_check_uid = 0; @@ -1731,7 +1732,7 @@ sys_unlink(struct thread *td, struct unlink_args *uap) { - return (kern_unlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 0)); + return (kern_unlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 0, 0)); } #ifndef _SYS_SYSPROTO_H_ @@ -1744,22 +1745,25 @@ int sys_unlinkat(struct thread *td, struct unlinkat_args *uap) { - int flag = uap->flag; - int fd = uap->fd; - char *path = uap->path; + int fd, flag; + char *path; + + flag = uap->flag; + fd = uap->fd; + path = uap->path; - if (flag & ~AT_REMOVEDIR) + if ((flag & ~(AT_REMOVEDIR | AT_BENEATH)) != 0) return (EINVAL); - if (flag & AT_REMOVEDIR) - return (kern_rmdirat(td, fd, path, UIO_USERSPACE)); + if ((uap->flag & AT_REMOVEDIR) != 0) + return (kern_rmdirat(td, fd, path, UIO_USERSPACE, flag)); else - return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0)); + return (kern_unlinkat(td, fd, path, UIO_USERSPACE, flag, 0)); } int kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, - ino_t oldinum) + int flag, ino_t oldinum) { struct mount *mp; struct vnode *vp; @@ -1769,7 +1773,8 @@ restart: bwillwrite(); - NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, + NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | + ((flag & AT_BENEATH) != 0 ? BENEATH : 0), pathseg, path, fd, &cap_unlinkat_rights, td); if ((error = namei(&nd)) != 0) return (error == EINVAL ? EPERM : error); @@ -1960,7 +1965,7 @@ struct nameidata nd; int error; - if (flag & ~AT_EACCESS) + if ((flag & ~(AT_EACCESS | AT_BENEATH)) != 0) return (EINVAL); if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) return (EINVAL); @@ -1981,8 +1986,8 @@ usecred = cred; AUDIT_ARG_VALUE(amode); NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | - AUDITVNODE1, pathseg, path, fd, &cap_fstat_rights, - td); + AUDITVNODE1 | ((flag & AT_BENEATH) != 0 ? BENEATH : 0), + pathseg, path, fd, &cap_fstat_rights, td); if ((error = namei(&nd)) != 0) goto out; vp = nd.ni_vp; @@ -2273,11 +2278,12 @@ struct stat sb; int error; - if (flag & ~AT_SYMLINK_NOFOLLOW) + if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_BENEATH)) != 0) return (EINVAL); - NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : - FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, + NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) != 0 ? + NOFOLLOW : FOLLOW) | ((flag & AT_BENEATH) != 0 ? BENEATH : 0) | + LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, &cap_fstat_rights, td); if ((error = namei(&nd)) != 0) @@ -2588,15 +2594,12 @@ int sys_chflagsat(struct thread *td, struct chflagsat_args *uap) { - int fd = uap->fd; - const char *path = uap->path; - u_long flags = uap->flags; - int atflag = uap->atflag; - if (atflag & ~AT_SYMLINK_NOFOLLOW) + if ((uap->atflag & ~(AT_SYMLINK_NOFOLLOW | AT_BENEATH)) != 0) return (EINVAL); - return (kern_chflagsat(td, fd, path, UIO_USERSPACE, flags, atflag)); + return (kern_chflagsat(td, uap->fd, uap->path, UIO_USERSPACE, + uap->flags, uap->atflag)); } /* @@ -2625,6 +2628,7 @@ AUDIT_ARG_FFLAGS(flags); follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; + follow |= (atflag & AT_BENEATH) != 0 ? BENEATH : 0; NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, &cap_fchflags_rights, td); if ((error = namei(&nd)) != 0) @@ -2719,15 +2723,12 @@ int sys_fchmodat(struct thread *td, struct fchmodat_args *uap) { - int flag = uap->flag; - int fd = uap->fd; - char *path = uap->path; - mode_t mode = uap->mode; - if (flag & ~AT_SYMLINK_NOFOLLOW) + if ((uap->flag & ~(AT_SYMLINK_NOFOLLOW | AT_BENEATH)) != 0) return (EINVAL); - return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag)); + return (kern_fchmodat(td, uap->fd, uap->path, UIO_USERSPACE, + uap->mode, uap->flag)); } /* @@ -2755,7 +2756,8 @@ int error, follow; AUDIT_ARG_MODE(mode); - follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; + follow = (flag & AT_SYMLINK_NOFOLLOW) != 0 ? NOFOLLOW : FOLLOW; + follow |= (flag & AT_BENEATH) != 0 ? BENEATH : 0; NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, &cap_fchmod_rights, td); if ((error = namei(&nd)) != 0) @@ -2850,10 +2852,8 @@ int sys_fchownat(struct thread *td, struct fchownat_args *uap) { - int flag; - flag = uap->flag; - if (flag & ~AT_SYMLINK_NOFOLLOW) + if ((uap->flag & ~(AT_SYMLINK_NOFOLLOW | AT_BENEATH)) != 0) return (EINVAL); return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, @@ -2869,6 +2869,7 @@ AUDIT_ARG_OWNER(uid, gid); follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; + follow |= (flag & AT_BENEATH) != 0 ? BENEATH : 0; NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, &cap_fchown_rights, td); @@ -3220,14 +3221,14 @@ struct timespec ts[2]; int error, flags; - if (flag & ~AT_SYMLINK_NOFOLLOW) + if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_BENEATH)) != 0) return (EINVAL); if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0) return (error); NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : - FOLLOW) | AUDITVNODE1, pathseg, path, fd, - &cap_futimes_rights, td); + FOLLOW) | ((flag & AT_BENEATH) != 0 ? BENEATH : 0) | AUDITVNODE1, + pathseg, path, fd, &cap_futimes_rights, td); if ((error = namei(&nd)) != 0) return (error); /* @@ -3671,11 +3672,12 @@ sys_rmdir(struct thread *td, struct rmdir_args *uap) { - return (kern_rmdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE)); + return (kern_rmdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 0)); } int -kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg) +kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg, + int flag) { struct mount *mp; struct vnode *vp; @@ -3684,7 +3686,8 @@ restart: bwillwrite(); - NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, + NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | + ((flag & AT_BENEATH) != 0 ? BENEATH : 0), pathseg, path, fd, &cap_unlinkat_rights, td); if ((error = namei(&nd)) != 0) return (error); Index: sys/kern/vfs_vnops.c =================================================================== --- sys/kern/vfs_vnops.c +++ sys/kern/vfs_vnops.c @@ -212,6 +212,8 @@ ndp->ni_cnd.cn_flags = ISOPEN | LOCKPARENT | LOCKLEAF | NOCACHE; if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0) ndp->ni_cnd.cn_flags |= FOLLOW; + if ((fmode & O_BENEATH) != 0) + ndp->ni_cnd.cn_flags |= BENEATH; if (!(vn_open_flags & VN_OPEN_NOAUDIT)) ndp->ni_cnd.cn_flags |= AUDITVNODE1; if (vn_open_flags & VN_OPEN_NOCAPCHECK) @@ -269,6 +271,8 @@ ((fmode & O_NOFOLLOW) ? NOFOLLOW : FOLLOW) | LOCKLEAF; if (!(fmode & FWRITE)) ndp->ni_cnd.cn_flags |= LOCKSHARED; + if ((fmode & O_BENEATH) != 0) + ndp->ni_cnd.cn_flags |= BENEATH; if (!(vn_open_flags & VN_OPEN_NOAUDIT)) ndp->ni_cnd.cn_flags |= AUDITVNODE1; if (vn_open_flags & VN_OPEN_NOCAPCHECK) Index: sys/sys/fcntl.h =================================================================== --- sys/sys/fcntl.h +++ sys/sys/fcntl.h @@ -133,6 +133,7 @@ #if __BSD_VISIBLE #define O_VERIFY 0x00200000 /* open only after verification */ +#define O_BENEATH 0x00400000 /* Fail if not under cwd */ #endif /* @@ -206,10 +207,12 @@ /* * Miscellaneous flags for the *at() syscalls. */ -#define AT_EACCESS 0x100 /* Check access using effective user and group ID */ -#define AT_SYMLINK_NOFOLLOW 0x200 /* Do not follow symbolic links */ -#define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic link */ -#define AT_REMOVEDIR 0x800 /* Remove directory instead of file */ +#define AT_EACCESS 0x0100 /* Check access using effective user + and group ID */ +#define AT_SYMLINK_NOFOLLOW 0x0200 /* Do not follow symbolic links */ +#define AT_SYMLINK_FOLLOW 0x0400 /* Follow symbolic link */ +#define AT_REMOVEDIR 0x0800 /* Remove directory instead of file */ +#define AT_BENEATH 0x1000 /* Fail if not under dirfd */ #endif /* Index: sys/sys/namei.h =================================================================== --- sys/sys/namei.h +++ sys/sys/namei.h @@ -119,6 +119,7 @@ #define WANTPARENT 0x0010 /* want parent vnode returned unlocked */ #define NOCACHE 0x0020 /* name must not be left in cache */ #define FOLLOW 0x0040 /* follow symbolic links */ +#define BENEATH 0x0080 /* No escape from the start dir */ #define LOCKSHARED 0x0100 /* Shared lock leaf */ #define NOFOLLOW 0x0000 /* do not follow symbolic links (pseudo) */ #define MODMASK 0x01fc /* mask of operational modifiers */ Index: sys/sys/syscallsubr.h =================================================================== --- sys/sys/syscallsubr.h +++ sys/sys/syscallsubr.h @@ -219,7 +219,7 @@ int kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, enum uio_seg pathseg); int kern_rmdirat(struct thread *td, int fd, char *path, - enum uio_seg pathseg); + enum uio_seg pathseg, int flag); int kern_sched_getparam(struct thread *td, struct thread *targettd, struct sched_param *param); int kern_sched_getscheduler(struct thread *td, struct thread *targettd, @@ -286,7 +286,7 @@ int kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length); int kern_unlinkat(struct thread *td, int fd, char *path, - enum uio_seg pathseg, ino_t oldinum); + enum uio_seg pathseg, int flag, ino_t oldinum); int kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg, struct timeval *tptr, enum uio_seg tptrseg); int kern_utimensat(struct thread *td, int fd, char *path, Index: sys/ufs/ffs/ffs_alloc.c =================================================================== --- sys/ufs/ffs/ffs_alloc.c +++ sys/ufs/ffs/ffs_alloc.c @@ -3399,7 +3399,7 @@ vn_finished_write(mp); mp = NULL; error = kern_unlinkat(td, AT_FDCWD, (char *)(intptr_t)cmd.value, - UIO_USERSPACE, (ino_t)cmd.size); + UIO_USERSPACE, 0, (ino_t)cmd.size); break; case FFS_SET_INODE: