Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F108540721
D29323.id87084.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
14 KB
Referenced Files
None
Subscribers
None
D29323.id87084.diff
View Options
diff --git a/lib/libc/sys/open.2 b/lib/libc/sys/open.2
--- a/lib/libc/sys/open.2
+++ b/lib/libc/sys/open.2
@@ -28,7 +28,7 @@
.\" @(#)open.2 8.2 (Berkeley) 11/16/93
.\" $FreeBSD$
.\"
-.Dd February 23, 2021
+.Dd March 18, 2021
.Dt OPEN 2
.Os
.Sh NAME
@@ -168,6 +168,7 @@
O_CLOEXEC set FD_CLOEXEC upon open
O_VERIFY verify the contents of the file
O_RESOLVE_BENEATH path resolution must not cross the fd directory
+O_PATH record only the target path in the opened descriptor
.Ed
.Pp
Opening a file with
@@ -316,6 +317,50 @@
.Fn *at
family of functions.
.Pp
+.Dv O_PATH
+returns a file descriptor that can be used as a directory file descriptor for
+.Xr openat 2
+and other system calls taking a file descriptor argument, like
+.Xr fstatat 2
+and others.
+The other functionality of the returned file descriptor is limited to
+the descriptor-level operations.
+It can be used for
+.Bl -tag -width SCM_RIGHTS -offset indent -compact
+.It Xr fcntl 2
+but advisory locking is not allowed
+.It Xr dup 2
+.It Xr close 2
+.It Xr fstat 2
+.It Xr fexecve 2
+requires that
+.Dv O_EXEC
+was also specified at open time
+.It Dv SCM_RIGHTS
+can be passed over a
+.Xr unix 4
+socket using a
+.Dv SCM_RIGHTS
+message
+.It Xr kqueue 2
+using for
+.Dv EVFILT_VNODE
+.El
+But operations like
+.Xr read 2 ,
+.Xr ftruncate 2 ,
+and any other that operate on file and not on file descriptor (except
+.Xr fstat 2 ),
+are not allowed.
+File opened with the
+.Dv O_PATH
+flag does not prevent non-forced unmount of the volume it belongs to.
+See also the description of
+.Dv AT_EMPTY_PATH
+flag for
+.Xr fstatat 2
+and related syscalls.
+.Pp
If successful,
.Fn open
returns a non-negative integer, termed a file descriptor.
diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
--- a/sys/kern/kern_descrip.c
+++ b/sys/kern/kern_descrip.c
@@ -61,6 +61,7 @@
#include <sys/mutex.h>
#include <sys/namei.h>
#include <sys/selinfo.h>
+#include <sys/poll.h>
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/protosw.h>
@@ -610,7 +611,7 @@
error = fget_unlocked(fdp, fd, &cap_flock_rights, &fp);
if (error != 0)
break;
- if (fp->f_type != DTYPE_VNODE) {
+ if (fp->f_type != DTYPE_VNODE || fp->f_ops == &path_fileops) {
error = EBADF;
fdrop(fp, td);
break;
@@ -715,7 +716,7 @@
error = fget_unlocked(fdp, fd, &cap_flock_rights, &fp);
if (error != 0)
break;
- if (fp->f_type != DTYPE_VNODE) {
+ if (fp->f_type != DTYPE_VNODE || fp->f_ops == &path_fileops) {
error = EBADF;
fdrop(fp, td);
break;
@@ -3424,7 +3425,7 @@
error = EINVAL;
} else {
*vpp = fp->f_vnode;
- vrefact(*vpp);
+ vref(*vpp);
}
fdrop(fp, td);
@@ -3460,7 +3461,7 @@
*havecaps = caps;
*vpp = fp->f_vnode;
- vrefact(*vpp);
+ vref(*vpp);
fdrop(fp, td);
return (0);
@@ -3544,7 +3545,7 @@
error = fget(td, uap->fd, &cap_flock_rights, &fp);
if (error != 0)
return (error);
- if (fp->f_type != DTYPE_VNODE) {
+ if (fp->f_type != DTYPE_VNODE || fp->f_ops == &path_fileops) {
fdrop(fp, td);
return (EOPNOTSUPP);
}
@@ -4960,6 +4961,38 @@
.fo_fill_kinfo = badfo_fill_kinfo,
};
+static int
+path_poll(struct file *fp, int events, struct ucred *active_cred,
+ struct thread *td)
+{
+ return (POLLNVAL);
+}
+
+static int
+path_close(struct file *fp, struct thread *td)
+{
+ MPASS(fp->f_type == DTYPE_VNODE);
+ fp->f_ops = &badfileops;
+ vdrop(fp->f_vnode);
+ return (0);
+}
+
+struct fileops path_fileops = {
+ .fo_read = badfo_readwrite,
+ .fo_write = badfo_readwrite,
+ .fo_truncate = badfo_truncate,
+ .fo_ioctl = badfo_ioctl,
+ .fo_poll = path_poll,
+ .fo_kqfilter = vn_kqfilter_opath,
+ .fo_stat = vn_statfile,
+ .fo_close = path_close,
+ .fo_chmod = badfo_chmod,
+ .fo_chown = badfo_chown,
+ .fo_sendfile = badfo_sendfile,
+ .fo_fill_kinfo = vn_fill_kinfo,
+ .fo_flags = DFLAG_PASSABLE,
+};
+
int
invfo_rdwr(struct file *fp, struct uio *uio, struct ucred *active_cred,
int flags, struct thread *td)
diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c
--- a/sys/kern/vfs_lookup.c
+++ b/sys/kern/vfs_lookup.c
@@ -360,8 +360,10 @@
if (cnp->cn_flags & AUDITVNODE2)
AUDIT_ARG_ATFD2(ndp->ni_dirfd);
/*
- * Effectively inlined fgetvp_rights, because we need to
- * inspect the file as well as grabbing the vnode.
+ * Effectively inlined fgetvp_rights, because
+ * we need to inspect the file as well as
+ * grabbing the vnode. No check for O_PATH,
+ * files to implement its semantic.
*/
error = fget_cap(td, ndp->ni_dirfd, &rights,
&dfp, &ndp->ni_filecaps);
@@ -378,7 +380,7 @@
error = ENOTDIR;
} else {
*dpp = dfp->f_vnode;
- vrefact(*dpp);
+ vref(*dpp);
if ((dfp->f_flag & FSEARCH) != 0)
cnp->cn_flags |= NOEXECCHECK;
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -373,7 +373,7 @@
int error;
AUDIT_ARG_FD(fd);
- error = getvnode(td, fd, &cap_fstatfs_rights, &fp);
+ error = getvnode_path(td, fd, &cap_fstatfs_rights, &fp);
if (error != 0)
return (error);
vp = fp->f_vnode;
@@ -889,12 +889,12 @@
int error;
AUDIT_ARG_FD(uap->fd);
- error = getvnode(td, uap->fd, &cap_fchdir_rights,
+ error = getvnode_path(td, uap->fd, &cap_fchdir_rights,
&fp);
if (error != 0)
return (error);
vp = fp->f_vnode;
- vrefact(vp);
+ vref(vp);
fdrop(fp, td);
vn_lock(vp, LK_SHARED | LK_RETRY);
AUDIT_ARG_VNODE1(vp);
@@ -1021,9 +1021,10 @@
static __inline void
flags_to_rights(int flags, cap_rights_t *rightsp)
{
-
if (flags & O_EXEC) {
cap_rights_set_one(rightsp, CAP_FEXECVE);
+ if (flags & O_PATH)
+ return;
} else {
switch ((flags & O_ACCMODE)) {
case O_RDONLY:
@@ -1110,11 +1111,15 @@
AUDIT_ARG_MODE(mode);
cap_rights_init_one(&rights, CAP_LOOKUP);
flags_to_rights(flags, &rights);
+
/*
* Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags
- * may be specified.
+ * may be specified. On the other hand, for O_PATH any mode
+ * except O_EXEC is ignored.
*/
- if (flags & O_EXEC) {
+ if ((flags & O_PATH) != 0) {
+ flags &= ~(O_CREAT | O_ACCMODE);
+ } else if ((flags & O_EXEC) != 0) {
if (flags & O_ACCMODE)
return (EINVAL);
} else if ((flags & O_ACCMODE) == O_ACCMODE) {
@@ -1174,14 +1179,22 @@
* files that switched type in the cdevsw fdopen() method.
*/
fp->f_vnode = vp;
+
/*
* If the file wasn't claimed by devfs bind it to the normal
* vnode operations here.
*/
if (fp->f_ops == &badfileops) {
- KASSERT(vp->v_type != VFIFO,
+ KASSERT(vp->v_type != VFIFO || (flags & O_PATH) != 0,
("Unexpected fifo fp %p vp %p", fp, vp));
- finit_vnode(fp, flags, NULL, &vnops);
+ if ((flags & O_PATH) != 0) {
+ finit(fp, (flags & FMASK) | (fp->f_flag & FKQALLOWED),
+ DTYPE_VNODE, NULL, &path_fileops);
+ vhold(vp);
+ vunref(vp);
+ } else {
+ finit_vnode(fp, flags, NULL, &vnops);
+ }
}
VOP_UNLOCK(vp);
@@ -1862,7 +1875,7 @@
fp = NULL;
if (fd != FD_NONE) {
- error = getvnode(td, fd, &cap_no_rights, &fp);
+ error = getvnode_path(td, fd, &cap_no_rights, &fp);
if (error != 0)
return (error);
}
@@ -1881,8 +1894,8 @@
if (vp->v_type == VDIR && oldinum == 0) {
error = EPERM; /* POSIX */
} else if (oldinum != 0 &&
- ((error = VOP_STAT(vp, &sb, td->td_ucred, NOCRED, td)) == 0) &&
- sb.st_ino != oldinum) {
+ ((error = VOP_STAT(vp, &sb, td->td_ucred, NOCRED, td)) == 0) &&
+ sb.st_ino != oldinum) {
error = EIDRM; /* Identifier removed */
} else if (fp != NULL && fp->f_vnode != vp) {
if (VN_IS_DOOMED(fp->f_vnode))
@@ -3818,8 +3831,8 @@
fp = NULL;
if (fd != FD_NONE) {
- error = getvnode(td, fd, cap_rights_init_one(&rights, CAP_LOOKUP),
- &fp);
+ error = getvnode(td, fd, cap_rights_init_one(&rights,
+ CAP_LOOKUP), &fp);
if (error != 0)
return (error);
}
@@ -4230,12 +4243,13 @@
}
/*
- * Convert a user file descriptor to a kernel file entry and check that, if it
- * is a capability, the correct rights are present. A reference on the file
- * entry is held upon returning.
+ * This variant of getvnode() allows O_PATH files. Caller should
+ * ensure that returned file and vnode are only used for compatible
+ * semantics.
*/
int
-getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp)
+getvnode_path(struct thread *td, int fd, cap_rights_t *rightsp,
+ struct file **fpp)
{
struct file *fp;
int error;
@@ -4260,10 +4274,35 @@
fdrop(fp, td);
return (EINVAL);
}
+
*fpp = fp;
return (0);
}
+/*
+ * Convert a user file descriptor to a kernel file entry and check
+ * that, if it is a capability, the correct rights are present.
+ * A reference on the file entry is held upon returning.
+ */
+int
+getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp)
+{
+ int error;
+
+ error = getvnode_path(td, fd, rightsp, fpp);
+
+ /*
+ * Filter out O_PATH file descriptors, most getvnode() callers
+ * do not call fo_ methods.
+ */
+ if (error == 0 && (*fpp)->f_ops == &path_fileops) {
+ fdrop(*fpp, td);
+ error = EBADF;
+ }
+
+ return (error);
+}
+
/*
* Get an (NFS) file handle.
*/
diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -102,7 +102,6 @@
static fo_ioctl_t vn_ioctl;
static fo_poll_t vn_poll;
static fo_kqfilter_t vn_kqfilter;
-static fo_stat_t vn_statfile;
static fo_close_t vn_closefile;
static fo_mmap_t vn_mmap;
static fo_fallocate_t vn_fallocate;
@@ -386,31 +385,38 @@
accmode_t accmode;
int error;
- if (vp->v_type == VLNK)
- return (EMLINK);
+ if (vp->v_type == VLNK) {
+ if ((fmode & O_PATH) == 0 || (fmode & FEXEC) != 0)
+ return (EMLINK);
+ }
if (vp->v_type == VSOCK)
return (EOPNOTSUPP);
if (vp->v_type != VDIR && fmode & O_DIRECTORY)
return (ENOTDIR);
+
accmode = 0;
- if (fmode & (FWRITE | O_TRUNC)) {
- if (vp->v_type == VDIR)
- return (EISDIR);
- accmode |= VWRITE;
+ if ((fmode & O_PATH) == 0) {
+ if ((fmode & (FWRITE | O_TRUNC)) != 0) {
+ if (vp->v_type == VDIR)
+ return (EISDIR);
+ accmode |= VWRITE;
+ }
+ if ((fmode & FREAD) != 0)
+ accmode |= VREAD;
+ if ((fmode & O_APPEND) && (fmode & FWRITE))
+ accmode |= VAPPEND;
+#ifdef MAC
+ if ((fmode & O_CREAT) != 0)
+ accmode |= VCREAT;
+#endif
}
- if (fmode & FREAD)
- accmode |= VREAD;
- if (fmode & FEXEC)
+ if ((fmode & FEXEC) != 0)
accmode |= VEXEC;
- if ((fmode & O_APPEND) && (fmode & FWRITE))
- accmode |= VAPPEND;
#ifdef MAC
- if (fmode & O_CREAT)
- accmode |= VCREAT;
- if (fmode & O_VERIFY)
+ if ((fmode & O_VERIFY) != 0)
accmode |= VVERIFY;
error = mac_vnode_check_open(cred, vp, accmode);
- if (error)
+ if (error != 0)
return (error);
accmode &= ~(VCREAT | VVERIFY);
@@ -420,6 +426,13 @@
if (error != 0)
return (error);
}
+ if ((fmode & O_PATH) != 0) {
+ error = VOP_ACCESS(vp, VREAD, cred, td);
+ if (error == 0)
+ fp->f_flag |= FKQALLOWED;
+ return (0);
+ }
+
if (vp->v_type == VFIFO && VOP_ISLOCKED(vp) != LK_EXCLUSIVE)
vn_lock(vp, LK_UPGRADE | LK_RETRY);
error = VOP_OPEN(vp, fmode, cred, td, fp);
@@ -1616,7 +1629,7 @@
/*
* File table vnode stat routine.
*/
-static int
+int
vn_statfile(struct file *fp, struct stat *sb, struct ucred *active_cred,
struct thread *td)
{
@@ -1775,7 +1788,7 @@
vp = fp->f_vnode;
fp->f_ops = &badfileops;
- ref= (fp->f_flag & FHASLOCK) != 0 && fp->f_type == DTYPE_VNODE;
+ ref = (fp->f_flag & FHASLOCK) != 0 && fp->f_type == DTYPE_VNODE;
error = vn_close1(vp, fp->f_flag, fp->f_cred, td, ref);
@@ -2130,6 +2143,14 @@
return (VOP_KQFILTER(fp->f_vnode, kn));
}
+int
+vn_kqfilter_opath(struct file *fp, struct knote *kn)
+{
+ if ((fp->f_flag & FKQALLOWED) == 0)
+ return (EBADF);
+ return (vn_kqfilter(fp, kn));
+}
+
/*
* Simplified in-kernel wrapper calls for extended attribute access.
* Both calls pass in a NULL credential, authorizing as "kernel" access.
diff --git a/sys/sys/fcntl.h b/sys/sys/fcntl.h
--- a/sys/sys/fcntl.h
+++ b/sys/sys/fcntl.h
@@ -135,7 +135,7 @@
#if __BSD_VISIBLE
#define O_VERIFY 0x00200000 /* open only after verification */
-/* #define O_UNUSED1 0x00400000 */ /* Was O_BENEATH */
+#define O_PATH 0x00400000 /* fd is only a path */
#define O_RESOLVE_BENEATH 0x00800000 /* Do not allow name resolution to walk
out of cwd */
#endif
@@ -153,13 +153,17 @@
#define FREVOKE O_VERIFY
/* Only for fo_close() from half-succeeded open */
#define FOPENFAILED O_TTY_INIT
+/* Only for O_PATH files which passed ACCESS FREAD check on open */
+#define FKQALLOWED O_RESOLVE_BENEATH
/* convert from open() flags to/from fflags; convert O_RD/WR to FREAD/FWRITE */
#define FFLAGS(oflags) ((oflags) & O_EXEC ? (oflags) : (oflags) + 1)
-#define OFLAGS(fflags) ((fflags) & O_EXEC ? (fflags) : (fflags) - 1)
+#define OFLAGS(fflags) \
+ (((fflags) & (O_EXEC | O_PATH)) != 0 ? (fflags) : (fflags) - 1)
/* bits to save after open */
-#define FMASK (FREAD|FWRITE|FAPPEND|FASYNC|FFSYNC|FDSYNC|FNONBLOCK|O_DIRECT|FEXEC)
+#define FMASK (FREAD|FWRITE|FAPPEND|FASYNC|FFSYNC|FDSYNC|FNONBLOCK| \
+ O_DIRECT|FEXEC|O_PATH)
/* bits settable by fcntl(F_SETFL, ...) */
#define FCNTLFLAGS (FAPPEND|FASYNC|FFSYNC|FDSYNC|FNONBLOCK|FRDAHEAD|O_DIRECT)
diff --git a/sys/sys/file.h b/sys/sys/file.h
--- a/sys/sys/file.h
+++ b/sys/sys/file.h
@@ -239,6 +239,7 @@
extern struct fileops vnops;
extern struct fileops badfileops;
+extern struct fileops path_fileops;
extern struct fileops socketops;
extern int maxfiles; /* kernel limit on number of open files */
extern int maxfilesperproc; /* per process limit on number of open files */
@@ -262,10 +263,11 @@
fo_chmod_t invfo_chmod;
fo_chown_t invfo_chown;
fo_sendfile_t invfo_sendfile;
-
+fo_stat_t vn_statfile;
fo_sendfile_t vn_sendfile;
fo_seek_t vn_seek;
fo_fill_kinfo_t vn_fill_kinfo;
+fo_kqfilter_t vn_kqfilter_opath;
int vn_fill_kinfo_vnode(struct vnode *vp, struct kinfo_file *kif);
void finit(struct file *, u_int, short, void *, struct fileops *);
diff --git a/sys/sys/filedesc.h b/sys/sys/filedesc.h
--- a/sys/sys/filedesc.h
+++ b/sys/sys/filedesc.h
@@ -265,6 +265,8 @@
struct filedesc *fdp, struct proc *leader);
int getvnode(struct thread *td, int fd, cap_rights_t *rightsp,
struct file **fpp);
+int getvnode_path(struct thread *td, int fd, cap_rights_t *rightsp,
+ struct file **fpp);
void mountcheckdirs(struct vnode *olddp, struct vnode *newdp);
int fget_cap_locked(struct filedesc *fdp, int fd, cap_rights_t *needrightsp,
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Mon, Jan 27, 2:48 AM (16 m, 19 s)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16185559
Default Alt Text
D29323.id87084.diff (14 KB)
Attached To
Mode
D29323: Implement O_PATH
Attached
Detach File
Event Timeline
Log In to Comment