Page MenuHomeFreeBSD

D29323.id87084.diff
No OneTemporary

D29323.id87084.diff

diff --git a/lib/libc/sys/open.2 b/lib/libc/sys/open.2
--- a/lib/libc/sys/open.2
+++ b/lib/libc/sys/open.2
@@ -28,7 +28,7 @@
.\" @(#)open.2 8.2 (Berkeley) 11/16/93
.\" $FreeBSD$
.\"
-.Dd February 23, 2021
+.Dd March 18, 2021
.Dt OPEN 2
.Os
.Sh NAME
@@ -168,6 +168,7 @@
O_CLOEXEC set FD_CLOEXEC upon open
O_VERIFY verify the contents of the file
O_RESOLVE_BENEATH path resolution must not cross the fd directory
+O_PATH record only the target path in the opened descriptor
.Ed
.Pp
Opening a file with
@@ -316,6 +317,50 @@
.Fn *at
family of functions.
.Pp
+.Dv O_PATH
+returns a file descriptor that can be used as a directory file descriptor for
+.Xr openat 2
+and other system calls taking a file descriptor argument, like
+.Xr fstatat 2
+and others.
+The other functionality of the returned file descriptor is limited to
+the descriptor-level operations.
+It can be used for
+.Bl -tag -width SCM_RIGHTS -offset indent -compact
+.It Xr fcntl 2
+but advisory locking is not allowed
+.It Xr dup 2
+.It Xr close 2
+.It Xr fstat 2
+.It Xr fexecve 2
+requires that
+.Dv O_EXEC
+was also specified at open time
+.It Dv SCM_RIGHTS
+can be passed over a
+.Xr unix 4
+socket using a
+.Dv SCM_RIGHTS
+message
+.It Xr kqueue 2
+using for
+.Dv EVFILT_VNODE
+.El
+But operations like
+.Xr read 2 ,
+.Xr ftruncate 2 ,
+and any other that operate on file and not on file descriptor (except
+.Xr fstat 2 ),
+are not allowed.
+File opened with the
+.Dv O_PATH
+flag does not prevent non-forced unmount of the volume it belongs to.
+See also the description of
+.Dv AT_EMPTY_PATH
+flag for
+.Xr fstatat 2
+and related syscalls.
+.Pp
If successful,
.Fn open
returns a non-negative integer, termed a file descriptor.
diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
--- a/sys/kern/kern_descrip.c
+++ b/sys/kern/kern_descrip.c
@@ -61,6 +61,7 @@
#include <sys/mutex.h>
#include <sys/namei.h>
#include <sys/selinfo.h>
+#include <sys/poll.h>
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/protosw.h>
@@ -610,7 +611,7 @@
error = fget_unlocked(fdp, fd, &cap_flock_rights, &fp);
if (error != 0)
break;
- if (fp->f_type != DTYPE_VNODE) {
+ if (fp->f_type != DTYPE_VNODE || fp->f_ops == &path_fileops) {
error = EBADF;
fdrop(fp, td);
break;
@@ -715,7 +716,7 @@
error = fget_unlocked(fdp, fd, &cap_flock_rights, &fp);
if (error != 0)
break;
- if (fp->f_type != DTYPE_VNODE) {
+ if (fp->f_type != DTYPE_VNODE || fp->f_ops == &path_fileops) {
error = EBADF;
fdrop(fp, td);
break;
@@ -3424,7 +3425,7 @@
error = EINVAL;
} else {
*vpp = fp->f_vnode;
- vrefact(*vpp);
+ vref(*vpp);
}
fdrop(fp, td);
@@ -3460,7 +3461,7 @@
*havecaps = caps;
*vpp = fp->f_vnode;
- vrefact(*vpp);
+ vref(*vpp);
fdrop(fp, td);
return (0);
@@ -3544,7 +3545,7 @@
error = fget(td, uap->fd, &cap_flock_rights, &fp);
if (error != 0)
return (error);
- if (fp->f_type != DTYPE_VNODE) {
+ if (fp->f_type != DTYPE_VNODE || fp->f_ops == &path_fileops) {
fdrop(fp, td);
return (EOPNOTSUPP);
}
@@ -4960,6 +4961,38 @@
.fo_fill_kinfo = badfo_fill_kinfo,
};
+static int
+path_poll(struct file *fp, int events, struct ucred *active_cred,
+ struct thread *td)
+{
+ return (POLLNVAL);
+}
+
+static int
+path_close(struct file *fp, struct thread *td)
+{
+ MPASS(fp->f_type == DTYPE_VNODE);
+ fp->f_ops = &badfileops;
+ vdrop(fp->f_vnode);
+ return (0);
+}
+
+struct fileops path_fileops = {
+ .fo_read = badfo_readwrite,
+ .fo_write = badfo_readwrite,
+ .fo_truncate = badfo_truncate,
+ .fo_ioctl = badfo_ioctl,
+ .fo_poll = path_poll,
+ .fo_kqfilter = vn_kqfilter_opath,
+ .fo_stat = vn_statfile,
+ .fo_close = path_close,
+ .fo_chmod = badfo_chmod,
+ .fo_chown = badfo_chown,
+ .fo_sendfile = badfo_sendfile,
+ .fo_fill_kinfo = vn_fill_kinfo,
+ .fo_flags = DFLAG_PASSABLE,
+};
+
int
invfo_rdwr(struct file *fp, struct uio *uio, struct ucred *active_cred,
int flags, struct thread *td)
diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c
--- a/sys/kern/vfs_lookup.c
+++ b/sys/kern/vfs_lookup.c
@@ -360,8 +360,10 @@
if (cnp->cn_flags & AUDITVNODE2)
AUDIT_ARG_ATFD2(ndp->ni_dirfd);
/*
- * Effectively inlined fgetvp_rights, because we need to
- * inspect the file as well as grabbing the vnode.
+ * Effectively inlined fgetvp_rights, because
+ * we need to inspect the file as well as
+ * grabbing the vnode. No check for O_PATH,
+ * files to implement its semantic.
*/
error = fget_cap(td, ndp->ni_dirfd, &rights,
&dfp, &ndp->ni_filecaps);
@@ -378,7 +380,7 @@
error = ENOTDIR;
} else {
*dpp = dfp->f_vnode;
- vrefact(*dpp);
+ vref(*dpp);
if ((dfp->f_flag & FSEARCH) != 0)
cnp->cn_flags |= NOEXECCHECK;
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -373,7 +373,7 @@
int error;
AUDIT_ARG_FD(fd);
- error = getvnode(td, fd, &cap_fstatfs_rights, &fp);
+ error = getvnode_path(td, fd, &cap_fstatfs_rights, &fp);
if (error != 0)
return (error);
vp = fp->f_vnode;
@@ -889,12 +889,12 @@
int error;
AUDIT_ARG_FD(uap->fd);
- error = getvnode(td, uap->fd, &cap_fchdir_rights,
+ error = getvnode_path(td, uap->fd, &cap_fchdir_rights,
&fp);
if (error != 0)
return (error);
vp = fp->f_vnode;
- vrefact(vp);
+ vref(vp);
fdrop(fp, td);
vn_lock(vp, LK_SHARED | LK_RETRY);
AUDIT_ARG_VNODE1(vp);
@@ -1021,9 +1021,10 @@
static __inline void
flags_to_rights(int flags, cap_rights_t *rightsp)
{
-
if (flags & O_EXEC) {
cap_rights_set_one(rightsp, CAP_FEXECVE);
+ if (flags & O_PATH)
+ return;
} else {
switch ((flags & O_ACCMODE)) {
case O_RDONLY:
@@ -1110,11 +1111,15 @@
AUDIT_ARG_MODE(mode);
cap_rights_init_one(&rights, CAP_LOOKUP);
flags_to_rights(flags, &rights);
+
/*
* Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags
- * may be specified.
+ * may be specified. On the other hand, for O_PATH any mode
+ * except O_EXEC is ignored.
*/
- if (flags & O_EXEC) {
+ if ((flags & O_PATH) != 0) {
+ flags &= ~(O_CREAT | O_ACCMODE);
+ } else if ((flags & O_EXEC) != 0) {
if (flags & O_ACCMODE)
return (EINVAL);
} else if ((flags & O_ACCMODE) == O_ACCMODE) {
@@ -1174,14 +1179,22 @@
* files that switched type in the cdevsw fdopen() method.
*/
fp->f_vnode = vp;
+
/*
* If the file wasn't claimed by devfs bind it to the normal
* vnode operations here.
*/
if (fp->f_ops == &badfileops) {
- KASSERT(vp->v_type != VFIFO,
+ KASSERT(vp->v_type != VFIFO || (flags & O_PATH) != 0,
("Unexpected fifo fp %p vp %p", fp, vp));
- finit_vnode(fp, flags, NULL, &vnops);
+ if ((flags & O_PATH) != 0) {
+ finit(fp, (flags & FMASK) | (fp->f_flag & FKQALLOWED),
+ DTYPE_VNODE, NULL, &path_fileops);
+ vhold(vp);
+ vunref(vp);
+ } else {
+ finit_vnode(fp, flags, NULL, &vnops);
+ }
}
VOP_UNLOCK(vp);
@@ -1862,7 +1875,7 @@
fp = NULL;
if (fd != FD_NONE) {
- error = getvnode(td, fd, &cap_no_rights, &fp);
+ error = getvnode_path(td, fd, &cap_no_rights, &fp);
if (error != 0)
return (error);
}
@@ -1881,8 +1894,8 @@
if (vp->v_type == VDIR && oldinum == 0) {
error = EPERM; /* POSIX */
} else if (oldinum != 0 &&
- ((error = VOP_STAT(vp, &sb, td->td_ucred, NOCRED, td)) == 0) &&
- sb.st_ino != oldinum) {
+ ((error = VOP_STAT(vp, &sb, td->td_ucred, NOCRED, td)) == 0) &&
+ sb.st_ino != oldinum) {
error = EIDRM; /* Identifier removed */
} else if (fp != NULL && fp->f_vnode != vp) {
if (VN_IS_DOOMED(fp->f_vnode))
@@ -3818,8 +3831,8 @@
fp = NULL;
if (fd != FD_NONE) {
- error = getvnode(td, fd, cap_rights_init_one(&rights, CAP_LOOKUP),
- &fp);
+ error = getvnode(td, fd, cap_rights_init_one(&rights,
+ CAP_LOOKUP), &fp);
if (error != 0)
return (error);
}
@@ -4230,12 +4243,13 @@
}
/*
- * Convert a user file descriptor to a kernel file entry and check that, if it
- * is a capability, the correct rights are present. A reference on the file
- * entry is held upon returning.
+ * This variant of getvnode() allows O_PATH files. Caller should
+ * ensure that returned file and vnode are only used for compatible
+ * semantics.
*/
int
-getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp)
+getvnode_path(struct thread *td, int fd, cap_rights_t *rightsp,
+ struct file **fpp)
{
struct file *fp;
int error;
@@ -4260,10 +4274,35 @@
fdrop(fp, td);
return (EINVAL);
}
+
*fpp = fp;
return (0);
}
+/*
+ * Convert a user file descriptor to a kernel file entry and check
+ * that, if it is a capability, the correct rights are present.
+ * A reference on the file entry is held upon returning.
+ */
+int
+getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp)
+{
+ int error;
+
+ error = getvnode_path(td, fd, rightsp, fpp);
+
+ /*
+ * Filter out O_PATH file descriptors, most getvnode() callers
+ * do not call fo_ methods.
+ */
+ if (error == 0 && (*fpp)->f_ops == &path_fileops) {
+ fdrop(*fpp, td);
+ error = EBADF;
+ }
+
+ return (error);
+}
+
/*
* Get an (NFS) file handle.
*/
diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -102,7 +102,6 @@
static fo_ioctl_t vn_ioctl;
static fo_poll_t vn_poll;
static fo_kqfilter_t vn_kqfilter;
-static fo_stat_t vn_statfile;
static fo_close_t vn_closefile;
static fo_mmap_t vn_mmap;
static fo_fallocate_t vn_fallocate;
@@ -386,31 +385,38 @@
accmode_t accmode;
int error;
- if (vp->v_type == VLNK)
- return (EMLINK);
+ if (vp->v_type == VLNK) {
+ if ((fmode & O_PATH) == 0 || (fmode & FEXEC) != 0)
+ return (EMLINK);
+ }
if (vp->v_type == VSOCK)
return (EOPNOTSUPP);
if (vp->v_type != VDIR && fmode & O_DIRECTORY)
return (ENOTDIR);
+
accmode = 0;
- if (fmode & (FWRITE | O_TRUNC)) {
- if (vp->v_type == VDIR)
- return (EISDIR);
- accmode |= VWRITE;
+ if ((fmode & O_PATH) == 0) {
+ if ((fmode & (FWRITE | O_TRUNC)) != 0) {
+ if (vp->v_type == VDIR)
+ return (EISDIR);
+ accmode |= VWRITE;
+ }
+ if ((fmode & FREAD) != 0)
+ accmode |= VREAD;
+ if ((fmode & O_APPEND) && (fmode & FWRITE))
+ accmode |= VAPPEND;
+#ifdef MAC
+ if ((fmode & O_CREAT) != 0)
+ accmode |= VCREAT;
+#endif
}
- if (fmode & FREAD)
- accmode |= VREAD;
- if (fmode & FEXEC)
+ if ((fmode & FEXEC) != 0)
accmode |= VEXEC;
- if ((fmode & O_APPEND) && (fmode & FWRITE))
- accmode |= VAPPEND;
#ifdef MAC
- if (fmode & O_CREAT)
- accmode |= VCREAT;
- if (fmode & O_VERIFY)
+ if ((fmode & O_VERIFY) != 0)
accmode |= VVERIFY;
error = mac_vnode_check_open(cred, vp, accmode);
- if (error)
+ if (error != 0)
return (error);
accmode &= ~(VCREAT | VVERIFY);
@@ -420,6 +426,13 @@
if (error != 0)
return (error);
}
+ if ((fmode & O_PATH) != 0) {
+ error = VOP_ACCESS(vp, VREAD, cred, td);
+ if (error == 0)
+ fp->f_flag |= FKQALLOWED;
+ return (0);
+ }
+
if (vp->v_type == VFIFO && VOP_ISLOCKED(vp) != LK_EXCLUSIVE)
vn_lock(vp, LK_UPGRADE | LK_RETRY);
error = VOP_OPEN(vp, fmode, cred, td, fp);
@@ -1616,7 +1629,7 @@
/*
* File table vnode stat routine.
*/
-static int
+int
vn_statfile(struct file *fp, struct stat *sb, struct ucred *active_cred,
struct thread *td)
{
@@ -1775,7 +1788,7 @@
vp = fp->f_vnode;
fp->f_ops = &badfileops;
- ref= (fp->f_flag & FHASLOCK) != 0 && fp->f_type == DTYPE_VNODE;
+ ref = (fp->f_flag & FHASLOCK) != 0 && fp->f_type == DTYPE_VNODE;
error = vn_close1(vp, fp->f_flag, fp->f_cred, td, ref);
@@ -2130,6 +2143,14 @@
return (VOP_KQFILTER(fp->f_vnode, kn));
}
+int
+vn_kqfilter_opath(struct file *fp, struct knote *kn)
+{
+ if ((fp->f_flag & FKQALLOWED) == 0)
+ return (EBADF);
+ return (vn_kqfilter(fp, kn));
+}
+
/*
* Simplified in-kernel wrapper calls for extended attribute access.
* Both calls pass in a NULL credential, authorizing as "kernel" access.
diff --git a/sys/sys/fcntl.h b/sys/sys/fcntl.h
--- a/sys/sys/fcntl.h
+++ b/sys/sys/fcntl.h
@@ -135,7 +135,7 @@
#if __BSD_VISIBLE
#define O_VERIFY 0x00200000 /* open only after verification */
-/* #define O_UNUSED1 0x00400000 */ /* Was O_BENEATH */
+#define O_PATH 0x00400000 /* fd is only a path */
#define O_RESOLVE_BENEATH 0x00800000 /* Do not allow name resolution to walk
out of cwd */
#endif
@@ -153,13 +153,17 @@
#define FREVOKE O_VERIFY
/* Only for fo_close() from half-succeeded open */
#define FOPENFAILED O_TTY_INIT
+/* Only for O_PATH files which passed ACCESS FREAD check on open */
+#define FKQALLOWED O_RESOLVE_BENEATH
/* convert from open() flags to/from fflags; convert O_RD/WR to FREAD/FWRITE */
#define FFLAGS(oflags) ((oflags) & O_EXEC ? (oflags) : (oflags) + 1)
-#define OFLAGS(fflags) ((fflags) & O_EXEC ? (fflags) : (fflags) - 1)
+#define OFLAGS(fflags) \
+ (((fflags) & (O_EXEC | O_PATH)) != 0 ? (fflags) : (fflags) - 1)
/* bits to save after open */
-#define FMASK (FREAD|FWRITE|FAPPEND|FASYNC|FFSYNC|FDSYNC|FNONBLOCK|O_DIRECT|FEXEC)
+#define FMASK (FREAD|FWRITE|FAPPEND|FASYNC|FFSYNC|FDSYNC|FNONBLOCK| \
+ O_DIRECT|FEXEC|O_PATH)
/* bits settable by fcntl(F_SETFL, ...) */
#define FCNTLFLAGS (FAPPEND|FASYNC|FFSYNC|FDSYNC|FNONBLOCK|FRDAHEAD|O_DIRECT)
diff --git a/sys/sys/file.h b/sys/sys/file.h
--- a/sys/sys/file.h
+++ b/sys/sys/file.h
@@ -239,6 +239,7 @@
extern struct fileops vnops;
extern struct fileops badfileops;
+extern struct fileops path_fileops;
extern struct fileops socketops;
extern int maxfiles; /* kernel limit on number of open files */
extern int maxfilesperproc; /* per process limit on number of open files */
@@ -262,10 +263,11 @@
fo_chmod_t invfo_chmod;
fo_chown_t invfo_chown;
fo_sendfile_t invfo_sendfile;
-
+fo_stat_t vn_statfile;
fo_sendfile_t vn_sendfile;
fo_seek_t vn_seek;
fo_fill_kinfo_t vn_fill_kinfo;
+fo_kqfilter_t vn_kqfilter_opath;
int vn_fill_kinfo_vnode(struct vnode *vp, struct kinfo_file *kif);
void finit(struct file *, u_int, short, void *, struct fileops *);
diff --git a/sys/sys/filedesc.h b/sys/sys/filedesc.h
--- a/sys/sys/filedesc.h
+++ b/sys/sys/filedesc.h
@@ -265,6 +265,8 @@
struct filedesc *fdp, struct proc *leader);
int getvnode(struct thread *td, int fd, cap_rights_t *rightsp,
struct file **fpp);
+int getvnode_path(struct thread *td, int fd, cap_rights_t *rightsp,
+ struct file **fpp);
void mountcheckdirs(struct vnode *olddp, struct vnode *newdp);
int fget_cap_locked(struct filedesc *fdp, int fd, cap_rights_t *needrightsp,

File Metadata

Mime Type
text/plain
Expires
Mon, Jan 27, 2:48 AM (16 m, 19 s)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16185559
Default Alt Text
D29323.id87084.diff (14 KB)

Event Timeline