diff --git a/lib/libc/sys/open.2 b/lib/libc/sys/open.2 --- a/lib/libc/sys/open.2 +++ b/lib/libc/sys/open.2 @@ -28,7 +28,7 @@ .\" @(#)open.2 8.2 (Berkeley) 11/16/93 .\" $FreeBSD$ .\" -.Dd February 23, 2021 +.Dd March 18, 2021 .Dt OPEN 2 .Os .Sh NAME @@ -168,6 +168,7 @@ O_CLOEXEC set FD_CLOEXEC upon open O_VERIFY verify the contents of the file O_RESOLVE_BENEATH path resolution must not cross the fd directory +O_PATH record only the target path in the opened descriptor .Ed .Pp Opening a file with @@ -316,6 +317,36 @@ .Fn *at family of functions. .Pp +.Dv O_PATH +returns a file descriptor that can be used as a directory file descriptor +for +.Xr openat 2 +and other system calls takind a directory argument, like +.Xr fstatat 2 +and others. +The other functionality of the returned file descriptor is limited to +descriptor-level operations. +It can be used for +.Xr fcntl 2 , +.Xr dup 2 , +.Xr close 2 , +and for passing it using unix domain socket rights messages, +.Xr unix 4 . +Additionally, +.Xr fstat 2 +works. +But operations like +.Xr read 2 , +.Xr ftruncate 2 , +and any other that operate on file and not on file descriptor (except +.Xr fstat 2 ), +are not allowed. +See also the description of +.Dv AT_EMPTY_PATH +flag for +.Xr fstatat 2 +and related syscalls. +.Pp If successful, .Fn open returns a non-negative integer, termed a file descriptor. diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -61,6 +61,7 @@ #include #include #include +#include #include #include #include @@ -610,7 +611,7 @@ error = fget_unlocked(fdp, fd, &cap_flock_rights, &fp); if (error != 0) break; - if (fp->f_type != DTYPE_VNODE) { + if (fp->f_type != DTYPE_VNODE || fp->f_ops == &path_fileops) { error = EBADF; fdrop(fp, td); break; @@ -4960,6 +4961,38 @@ .fo_fill_kinfo = badfo_fill_kinfo, }; +static int +path_poll(struct file *fp, int events, struct ucred *active_cred, + struct thread *td) +{ + return (POLLNVAL); +} + +static int +path_close(struct file *fp, struct thread *td) +{ + MPASS(fp->f_type == DTYPE_VNODE); + fp->f_ops = &badfileops; + vrele(fp->f_vnode); + return (0); +} + +struct fileops path_fileops = { + .fo_read = badfo_readwrite, + .fo_write = badfo_readwrite, + .fo_truncate = badfo_truncate, + .fo_ioctl = badfo_ioctl, + .fo_poll = path_poll, + .fo_kqfilter = badfo_kqfilter, + .fo_stat = vn_statfile, + .fo_close = path_close, + .fo_chmod = badfo_chmod, + .fo_chown = badfo_chown, + .fo_sendfile = badfo_sendfile, + .fo_fill_kinfo = vn_fill_kinfo, + .fo_flags = DFLAG_PASSABLE, +}; + int invfo_rdwr(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags, struct thread *td) diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c --- a/sys/kern/vfs_lookup.c +++ b/sys/kern/vfs_lookup.c @@ -360,8 +360,10 @@ if (cnp->cn_flags & AUDITVNODE2) AUDIT_ARG_ATFD2(ndp->ni_dirfd); /* - * Effectively inlined fgetvp_rights, because we need to - * inspect the file as well as grabbing the vnode. + * Effectively inlined fgetvp_rights, because + * we need to inspect the file as well as + * grabbing the vnode. No check for O_PATH, + * files to implement its semantic. */ error = fget_cap(td, ndp->ni_dirfd, &rights, &dfp, &ndp->ni_filecaps); diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -373,7 +373,7 @@ int error; AUDIT_ARG_FD(fd); - error = getvnode(td, fd, &cap_fstatfs_rights, &fp); + error = getvnode_path(td, fd, &cap_fstatfs_rights, &fp); if (error != 0) return (error); vp = fp->f_vnode; @@ -889,7 +889,7 @@ int error; AUDIT_ARG_FD(uap->fd); - error = getvnode(td, uap->fd, &cap_fchdir_rights, + error = getvnode_path(td, uap->fd, &cap_fchdir_rights, &fp); if (error != 0) return (error); @@ -1109,12 +1109,16 @@ AUDIT_ARG_FFLAGS(flags); AUDIT_ARG_MODE(mode); cap_rights_init_one(&rights, CAP_LOOKUP); - flags_to_rights(flags, &rights); + if ((flags & O_PATH) == 0) + flags_to_rights(flags, &rights); + /* * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags * may be specified. */ - if (flags & O_EXEC) { + if ((flags & O_PATH) != 0) { + flags &= ~(O_EXEC | O_ACCMODE); + } else if ((flags & O_EXEC) != 0) { if (flags & O_ACCMODE) return (EINVAL); } else if ((flags & O_ACCMODE) == O_ACCMODE) { @@ -1174,14 +1178,16 @@ * files that switched type in the cdevsw fdopen() method. */ fp->f_vnode = vp; + /* * If the file wasn't claimed by devfs bind it to the normal * vnode operations here. */ if (fp->f_ops == &badfileops) { - KASSERT(vp->v_type != VFIFO, + KASSERT(vp->v_type != VFIFO || (flags & O_PATH) != 0, ("Unexpected fifo fp %p vp %p", fp, vp)); - finit_vnode(fp, flags, NULL, &vnops); + finit_vnode(fp, flags, NULL, (flags & O_PATH) != 0 ? + &path_fileops : &vnops); } VOP_UNLOCK(vp); @@ -1862,7 +1868,7 @@ fp = NULL; if (fd != FD_NONE) { - error = getvnode(td, fd, &cap_no_rights, &fp); + error = getvnode_path(td, fd, &cap_no_rights, &fp); if (error != 0) return (error); } @@ -1881,8 +1887,8 @@ if (vp->v_type == VDIR && oldinum == 0) { error = EPERM; /* POSIX */ } else if (oldinum != 0 && - ((error = VOP_STAT(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && - sb.st_ino != oldinum) { + ((error = VOP_STAT(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && + sb.st_ino != oldinum) { error = EIDRM; /* Identifier removed */ } else if (fp != NULL && fp->f_vnode != vp) { if (VN_IS_DOOMED(fp->f_vnode)) @@ -3465,7 +3471,7 @@ int error, lock_flags; AUDIT_ARG_FD(fd); - error = getvnode(td, fd, &cap_fsync_rights, &fp); + error = getvnode_path(td, fd, &cap_fsync_rights, &fp); if (error != 0) return (error); vp = fp->f_vnode; @@ -3818,8 +3824,8 @@ fp = NULL; if (fd != FD_NONE) { - error = getvnode(td, fd, cap_rights_init_one(&rights, CAP_LOOKUP), - &fp); + error = getvnode(td, fd, cap_rights_init_one(&rights, + CAP_LOOKUP), &fp); if (error != 0) return (error); } @@ -4230,12 +4236,13 @@ } /* - * Convert a user file descriptor to a kernel file entry and check that, if it - * is a capability, the correct rights are present. A reference on the file - * entry is held upon returning. + * This variant of getvnode() allows O_PATH files. Caller should + * ensure that returned file and vnode are only used for compatible + * semantics. */ int -getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) +getvnode_path(struct thread *td, int fd, cap_rights_t *rightsp, + struct file **fpp) { struct file *fp; int error; @@ -4260,10 +4267,35 @@ fdrop(fp, td); return (EINVAL); } + *fpp = fp; return (0); } +/* + * Convert a user file descriptor to a kernel file entry and check + * that, if it is a capability, the correct rights are present. + * A reference on the file entry is held upon returning. + */ +int +getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) +{ + int error; + + error = getvnode_path(td, fd, rightsp, fpp); + + /* + * Filter out O_PATH file descriptors, most getvnode() callers + * do not call fo_ methods. + */ + if (error == 0 && (*fpp)->f_ops == &path_fileops) { + fdrop(*fpp, td); + error = EBADF; + } + + return (error); +} + /* * Get an (NFS) file handle. */ diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -102,7 +102,6 @@ static fo_ioctl_t vn_ioctl; static fo_poll_t vn_poll; static fo_kqfilter_t vn_kqfilter; -static fo_stat_t vn_statfile; static fo_close_t vn_closefile; static fo_mmap_t vn_mmap; static fo_fallocate_t vn_fallocate; @@ -392,6 +391,10 @@ return (EOPNOTSUPP); if (vp->v_type != VDIR && fmode & O_DIRECTORY) return (ENOTDIR); + + if ((fmode & O_PATH) != 0) + return (0); + accmode = 0; if (fmode & (FWRITE | O_TRUNC)) { if (vp->v_type == VDIR) @@ -1616,7 +1619,7 @@ /* * File table vnode stat routine. */ -static int +int vn_statfile(struct file *fp, struct stat *sb, struct ucred *active_cred, struct thread *td) { @@ -1775,7 +1778,7 @@ vp = fp->f_vnode; fp->f_ops = &badfileops; - ref= (fp->f_flag & FHASLOCK) != 0 && fp->f_type == DTYPE_VNODE; + ref = (fp->f_flag & FHASLOCK) != 0 && fp->f_type == DTYPE_VNODE; error = vn_close1(vp, fp->f_flag, fp->f_cred, td, ref); diff --git a/sys/sys/fcntl.h b/sys/sys/fcntl.h --- a/sys/sys/fcntl.h +++ b/sys/sys/fcntl.h @@ -135,7 +135,7 @@ #if __BSD_VISIBLE #define O_VERIFY 0x00200000 /* open only after verification */ -/* #define O_UNUSED1 0x00400000 */ /* Was O_BENEATH */ +#define O_PATH 0x00400000 /* fd is only a path */ #define O_RESOLVE_BENEATH 0x00800000 /* Do not allow name resolution to walk out of cwd */ #endif diff --git a/sys/sys/file.h b/sys/sys/file.h --- a/sys/sys/file.h +++ b/sys/sys/file.h @@ -239,6 +239,7 @@ extern struct fileops vnops; extern struct fileops badfileops; +extern struct fileops path_fileops; extern struct fileops socketops; extern int maxfiles; /* kernel limit on number of open files */ extern int maxfilesperproc; /* per process limit on number of open files */ @@ -262,7 +263,7 @@ fo_chmod_t invfo_chmod; fo_chown_t invfo_chown; fo_sendfile_t invfo_sendfile; - +fo_stat_t vn_statfile; fo_sendfile_t vn_sendfile; fo_seek_t vn_seek; fo_fill_kinfo_t vn_fill_kinfo; diff --git a/sys/sys/filedesc.h b/sys/sys/filedesc.h --- a/sys/sys/filedesc.h +++ b/sys/sys/filedesc.h @@ -265,6 +265,8 @@ struct filedesc *fdp, struct proc *leader); int getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp); +int getvnode_path(struct thread *td, int fd, cap_rights_t *rightsp, + struct file **fpp); void mountcheckdirs(struct vnode *olddp, struct vnode *newdp); int fget_cap_locked(struct filedesc *fdp, int fd, cap_rights_t *needrightsp,