diff --git a/lib/libc/sys/open.2 b/lib/libc/sys/open.2 --- a/lib/libc/sys/open.2 +++ b/lib/libc/sys/open.2 @@ -28,7 +28,7 @@ .\" @(#)open.2 8.2 (Berkeley) 11/16/93 .\" $FreeBSD$ .\" -.Dd February 23, 2021 +.Dd March 18, 2021 .Dt OPEN 2 .Os .Sh NAME @@ -168,6 +168,7 @@ O_CLOEXEC set FD_CLOEXEC upon open O_VERIFY verify the contents of the file O_RESOLVE_BENEATH path resolution must not cross the fd directory +O_PATH record only the target path in the opened descriptor .Ed .Pp Opening a file with @@ -316,6 +317,35 @@ .Fn *at family of functions. .Pp +.Dv O_PATH +returns a file descriptor that can be used as directory file descriptor +for +.Xr openat 2 +and other calls takind a directory argument, like +.Xr fstatat 2 +and others. +The other functionality of the returned file descriptor is limited to +the descriptor-level operations. +It can be used for +.Xr fcntl 2 , +.Xr dup 2 , +.Xr close 2 , +and for passing it using unix domain socket rights messages. +Additionally, +.Xr fstat 2 +works. +But operations like +.Xr read 2 , +.Xr ftruncate 2 , +and any other that operate on file and not on file descriptor (except +.Xr fstat 2 ), +are not allowed. +See also the description of +.Dv AT_EMPTY_PATH +flag for +.Xr fstatat 2 +and related syscalls. +.Pp If successful, .Fn open returns a non-negative integer, termed a file descriptor. diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -4960,6 +4960,43 @@ .fo_fill_kinfo = badfo_fill_kinfo, }; +static int +path_close(struct file *fp, struct thread *td) +{ + struct vnode *vp; + struct flock lf; + + MPASS(fp->f_type == DTYPE_VNODE); + + vp = fp->f_vnode; + fp->f_ops = &badfileops; + if ((fp->f_flag & FHASLOCK) != 0) { + lf.l_whence = SEEK_SET; + lf.l_start = 0; + lf.l_len = 0; + lf.l_type = F_UNLCK; + (void) VOP_ADVLOCK(vp, fp, F_UNLCK, &lf, F_FLOCK); + } + vrele(vp); + return (0); +} + +struct fileops path_fileops = { + .fo_read = badfo_readwrite, + .fo_write = badfo_readwrite, + .fo_truncate = badfo_truncate, + .fo_ioctl = badfo_ioctl, + .fo_poll = badfo_poll, + .fo_kqfilter = badfo_kqfilter, + .fo_stat = vn_statfile, + .fo_close = path_close, + .fo_chmod = badfo_chmod, + .fo_chown = badfo_chown, + .fo_sendfile = badfo_sendfile, + .fo_fill_kinfo = vn_fill_kinfo, + .fo_flags = DFLAG_PASSABLE, +}; + int invfo_rdwr(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags, struct thread *td) diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c --- a/sys/kern/vfs_lookup.c +++ b/sys/kern/vfs_lookup.c @@ -359,8 +359,10 @@ if (cnp->cn_flags & AUDITVNODE2) AUDIT_ARG_ATFD2(ndp->ni_dirfd); /* - * Effectively inlined fgetvp_rights, because we need to - * inspect the file as well as grabbing the vnode. + * Effectively inlined fgetvp_rights, because + * we need to inspect the file as well as + * grabbing the vnode. No check for O_PATH, + * files to implement its semantic. */ error = fget_cap(td, ndp->ni_dirfd, &rights, &dfp, &ndp->ni_filecaps); diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -373,7 +373,7 @@ int error; AUDIT_ARG_FD(fd); - error = getvnode(td, fd, &cap_fstatfs_rights, &fp); + error = getvnode_path(td, fd, &cap_fstatfs_rights, &fp); if (error != 0) return (error); vp = fp->f_vnode; @@ -889,7 +889,7 @@ int error; AUDIT_ARG_FD(uap->fd); - error = getvnode(td, uap->fd, &cap_fchdir_rights, + error = getvnode_path(td, uap->fd, &cap_fchdir_rights, &fp); if (error != 0) return (error); @@ -1174,14 +1174,16 @@ * files that switched type in the cdevsw fdopen() method. */ fp->f_vnode = vp; + /* * If the file wasn't claimed by devfs bind it to the normal * vnode operations here. */ if (fp->f_ops == &badfileops) { - KASSERT(vp->v_type != VFIFO, + KASSERT(vp->v_type != VFIFO || (flags & O_PATH) != 0, ("Unexpected fifo fp %p vp %p", fp, vp)); - finit_vnode(fp, flags, NULL, &vnops); + finit_vnode(fp, flags, NULL, (flags & O_PATH) != 0 ? + &path_fileops : &vnops); } VOP_UNLOCK(vp); @@ -1862,7 +1864,7 @@ fp = NULL; if (fd != FD_NONE) { - error = getvnode(td, fd, &cap_no_rights, &fp); + error = getvnode_path(td, fd, &cap_no_rights, &fp); if (error != 0) return (error); } @@ -1881,8 +1883,8 @@ if (vp->v_type == VDIR && oldinum == 0) { error = EPERM; /* POSIX */ } else if (oldinum != 0 && - ((error = VOP_STAT(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && - sb.st_ino != oldinum) { + ((error = VOP_STAT(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && + sb.st_ino != oldinum) { error = EIDRM; /* Identifier removed */ } else if (fp != NULL && fp->f_vnode != vp) { if (VN_IS_DOOMED(fp->f_vnode)) @@ -3465,7 +3467,7 @@ int error, lock_flags; AUDIT_ARG_FD(fd); - error = getvnode(td, fd, &cap_fsync_rights, &fp); + error = getvnode_path(td, fd, &cap_fsync_rights, &fp); if (error != 0) return (error); vp = fp->f_vnode; @@ -3818,8 +3820,8 @@ fp = NULL; if (fd != FD_NONE) { - error = getvnode(td, fd, cap_rights_init_one(&rights, CAP_LOOKUP), - &fp); + error = getvnode(td, fd, cap_rights_init_one(&rights, + CAP_LOOKUP), &fp); if (error != 0) return (error); } @@ -4234,8 +4236,9 @@ * is a capability, the correct rights are present. A reference on the file * entry is held upon returning. */ -int -getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) +static int +getvnode_int(struct thread *td, int fd, cap_rights_t *rightsp, + struct file **fpp, bool path_allowed) { struct file *fp; int error; @@ -4255,8 +4258,12 @@ * half-baked file into the process descriptor table, allowing * other thread to dereference it. Guard against the race by * checking f_ops. + * + * Filter out O_PATH file descriptors, most getvnode() callers + * do not call fo_ methods. */ - if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { + if (fp->f_vnode == NULL || fp->f_ops == &badfileops || + (!path_allowed && fp->f_ops == &path_fileops)) { fdrop(fp, td); return (EINVAL); } @@ -4264,6 +4271,23 @@ return (0); } +int +getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) +{ + return (getvnode_int(td, fd, rightsp, fpp, false)); +} + +/* + * Like getvnode(), but allow O_PATH files. Caller should ensure that + * returned file and vnode are only used for compatible semantics. + */ +int +getvnode_path(struct thread *td, int fd, cap_rights_t *rightsp, + struct file **fpp) +{ + return (getvnode_int(td, fd, rightsp, fpp, true)); +} + /* * Get an (NFS) file handle. */ diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -102,7 +102,6 @@ static fo_ioctl_t vn_ioctl; static fo_poll_t vn_poll; static fo_kqfilter_t vn_kqfilter; -static fo_stat_t vn_statfile; static fo_close_t vn_closefile; static fo_mmap_t vn_mmap; static fo_fallocate_t vn_fallocate; @@ -392,6 +391,12 @@ return (EOPNOTSUPP); if (vp->v_type != VDIR && fmode & O_DIRECTORY) return (ENOTDIR); + + if ((fmode & O_PATH) != 0) { + error = vn_open_vnode_advlock(vp, fmode, fp); + return (error); + } + accmode = 0; if (fmode & (FWRITE | O_TRUNC)) { if (vp->v_type == VDIR) @@ -1616,7 +1621,7 @@ /* * File table vnode stat routine. */ -static int +int vn_statfile(struct file *fp, struct stat *sb, struct ucred *active_cred, struct thread *td) { @@ -1775,7 +1780,7 @@ vp = fp->f_vnode; fp->f_ops = &badfileops; - ref= (fp->f_flag & FHASLOCK) != 0 && fp->f_type == DTYPE_VNODE; + ref = (fp->f_flag & FHASLOCK) != 0 && fp->f_type == DTYPE_VNODE; error = vn_close1(vp, fp->f_flag, fp->f_cred, td, ref); diff --git a/sys/sys/fcntl.h b/sys/sys/fcntl.h --- a/sys/sys/fcntl.h +++ b/sys/sys/fcntl.h @@ -135,7 +135,7 @@ #if __BSD_VISIBLE #define O_VERIFY 0x00200000 /* open only after verification */ -/* #define O_UNUSED1 0x00400000 */ /* Was O_BENEATH */ +#define O_PATH 0x00400000 /* fd is only a path */ #define O_RESOLVE_BENEATH 0x00800000 /* Do not allow name resolution to walk out of cwd */ #endif diff --git a/sys/sys/file.h b/sys/sys/file.h --- a/sys/sys/file.h +++ b/sys/sys/file.h @@ -239,6 +239,7 @@ extern struct fileops vnops; extern struct fileops badfileops; +extern struct fileops path_fileops; extern struct fileops socketops; extern int maxfiles; /* kernel limit on number of open files */ extern int maxfilesperproc; /* per process limit on number of open files */ @@ -262,7 +263,7 @@ fo_chmod_t invfo_chmod; fo_chown_t invfo_chown; fo_sendfile_t invfo_sendfile; - +fo_stat_t vn_statfile; fo_sendfile_t vn_sendfile; fo_seek_t vn_seek; fo_fill_kinfo_t vn_fill_kinfo; diff --git a/sys/sys/filedesc.h b/sys/sys/filedesc.h --- a/sys/sys/filedesc.h +++ b/sys/sys/filedesc.h @@ -265,6 +265,8 @@ struct filedesc *fdp, struct proc *leader); int getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp); +int getvnode_path(struct thread *td, int fd, cap_rights_t *rightsp, + struct file **fpp); void mountcheckdirs(struct vnode *olddp, struct vnode *newdp); int fget_cap_locked(struct filedesc *fdp, int fd, cap_rights_t *needrightsp,