Changeset View
Standalone View
sys/kern/vfs_syscalls.c
Show First 20 Lines • Show All 367 Lines • ▼ Show 20 Lines | |||||
kern_fstatfs(struct thread *td, int fd, struct statfs *buf) | kern_fstatfs(struct thread *td, int fd, struct statfs *buf) | ||||
{ | { | ||||
struct file *fp; | struct file *fp; | ||||
struct mount *mp; | struct mount *mp; | ||||
struct vnode *vp; | struct vnode *vp; | ||||
int error; | int error; | ||||
AUDIT_ARG_FD(fd); | AUDIT_ARG_FD(fd); | ||||
error = getvnode(td, fd, &cap_fstatfs_rights, &fp); | error = getvnode_path(td, fd, &cap_fstatfs_rights, &fp); | ||||
if (error != 0) | if (error != 0) | ||||
return (error); | return (error); | ||||
vp = fp->f_vnode; | vp = fp->f_vnode; | ||||
#ifdef AUDIT | #ifdef AUDIT | ||||
if (AUDITING_TD(td)) { | if (AUDITING_TD(td)) { | ||||
vn_lock(vp, LK_SHARED | LK_RETRY); | vn_lock(vp, LK_SHARED | LK_RETRY); | ||||
AUDIT_ARG_VNODE1(vp); | AUDIT_ARG_VNODE1(vp); | ||||
VOP_UNLOCK(vp); | VOP_UNLOCK(vp); | ||||
▲ Show 20 Lines • Show All 499 Lines • ▼ Show 20 Lines | |||||
sys_fchdir(struct thread *td, struct fchdir_args *uap) | sys_fchdir(struct thread *td, struct fchdir_args *uap) | ||||
{ | { | ||||
struct vnode *vp, *tdp; | struct vnode *vp, *tdp; | ||||
struct mount *mp; | struct mount *mp; | ||||
struct file *fp; | struct file *fp; | ||||
int error; | int error; | ||||
AUDIT_ARG_FD(uap->fd); | AUDIT_ARG_FD(uap->fd); | ||||
error = getvnode(td, uap->fd, &cap_fchdir_rights, | error = getvnode_path(td, uap->fd, &cap_fchdir_rights, | ||||
&fp); | &fp); | ||||
if (error != 0) | if (error != 0) | ||||
return (error); | return (error); | ||||
vp = fp->f_vnode; | vp = fp->f_vnode; | ||||
vrefact(vp); | vref(vp); | ||||
fdrop(fp, td); | fdrop(fp, td); | ||||
vn_lock(vp, LK_SHARED | LK_RETRY); | vn_lock(vp, LK_SHARED | LK_RETRY); | ||||
AUDIT_ARG_VNODE1(vp); | AUDIT_ARG_VNODE1(vp); | ||||
error = change_dir(vp, td); | error = change_dir(vp, td); | ||||
while (!error && (mp = vp->v_mountedhere) != NULL) { | while (!error && (mp = vp->v_mountedhere) != NULL) { | ||||
if (vfs_busy(mp, 0)) | if (vfs_busy(mp, 0)) | ||||
continue; | continue; | ||||
error = VFS_ROOT(mp, LK_SHARED, &tdp); | error = VFS_ROOT(mp, LK_SHARED, &tdp); | ||||
▲ Show 20 Lines • Show All 110 Lines • ▼ Show 20 Lines | if (error != 0) | ||||
return (error); | return (error); | ||||
#endif | #endif | ||||
return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); | return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); | ||||
} | } | ||||
static __inline void | static __inline void | ||||
flags_to_rights(int flags, cap_rights_t *rightsp) | flags_to_rights(int flags, cap_rights_t *rightsp) | ||||
{ | { | ||||
if (flags & O_EXEC) { | if (flags & O_EXEC) { | ||||
cap_rights_set_one(rightsp, CAP_FEXECVE); | cap_rights_set_one(rightsp, CAP_FEXECVE); | ||||
if (flags & O_PATH) | |||||
return; | |||||
markj: Or handle it here
```
if (flags & O_EXEC) {
cap_rights_set_one(rightsp, CAP_FEXECVE)… | |||||
} else { | } else { | ||||
switch ((flags & O_ACCMODE)) { | switch ((flags & O_ACCMODE)) { | ||||
case O_RDONLY: | case O_RDONLY: | ||||
cap_rights_set_one(rightsp, CAP_READ); | cap_rights_set_one(rightsp, CAP_READ); | ||||
break; | break; | ||||
case O_RDWR: | case O_RDWR: | ||||
cap_rights_set_one(rightsp, CAP_READ); | cap_rights_set_one(rightsp, CAP_READ); | ||||
/* FALLTHROUGH */ | /* FALLTHROUGH */ | ||||
▲ Show 20 Lines • Show All 69 Lines • ▼ Show 20 Lines | kern_openat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, | ||||
indx = -1; | indx = -1; | ||||
fdp = p->p_fd; | fdp = p->p_fd; | ||||
pdp = p->p_pd; | pdp = p->p_pd; | ||||
AUDIT_ARG_FFLAGS(flags); | AUDIT_ARG_FFLAGS(flags); | ||||
AUDIT_ARG_MODE(mode); | AUDIT_ARG_MODE(mode); | ||||
cap_rights_init_one(&rights, CAP_LOOKUP); | cap_rights_init_one(&rights, CAP_LOOKUP); | ||||
flags_to_rights(flags, &rights); | flags_to_rights(flags, &rights); | ||||
Done Inline ActionsIs it correct? For a file opened with O_PATH | O_EXEC, we indeed want the CAP_FEXECVE right, no? Linux documentation states that O_PATH descriptors may be used with fexecve(). markj: Is it correct? For a file opened with O_PATH | O_EXEC, we indeed want the CAP_FEXECVE right, no? | |||||
Done Inline ActionsI did not knew that, I intended to disable fexecve(). Tried to handle O_EXEC then. kib: I did not knew that, I intended to disable fexecve(). Tried to handle O_EXEC then. | |||||
/* | /* | ||||
* Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags | * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags | ||||
* may be specified. | * may be specified. On the other hand, for O_PATH any mode | ||||
* except O_EXEC is ignored. | |||||
Done Inline ActionsWe should not allow O_CREAT either, I suspect. markj: We should not allow O_CREAT either, I suspect. | |||||
*/ | */ | ||||
if (flags & O_EXEC) { | if ((flags & O_PATH) != 0) { | ||||
flags &= ~(O_CREAT | O_ACCMODE); | |||||
} else if ((flags & O_EXEC) != 0) { | |||||
if (flags & O_ACCMODE) | if (flags & O_ACCMODE) | ||||
return (EINVAL); | return (EINVAL); | ||||
} else if ((flags & O_ACCMODE) == O_ACCMODE) { | } else if ((flags & O_ACCMODE) == O_ACCMODE) { | ||||
return (EINVAL); | return (EINVAL); | ||||
} else { | } else { | ||||
flags = FFLAGS(flags); | flags = FFLAGS(flags); | ||||
} | } | ||||
Show All 13 Lines | kern_openat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, | ||||
error = vn_open(&nd, &flags, cmode, fp); | error = vn_open(&nd, &flags, cmode, fp); | ||||
if (error != 0) { | if (error != 0) { | ||||
/* | /* | ||||
* If the vn_open replaced the method vector, something | * If the vn_open replaced the method vector, something | ||||
* wonderous happened deep below and we just pass it up | * wonderous happened deep below and we just pass it up | ||||
* pretending we know what we do. | * pretending we know what we do. | ||||
*/ | */ | ||||
if (error == ENXIO && fp->f_ops != &badfileops) | if (error == ENXIO && fp->f_ops != &badfileops) | ||||
goto success; | goto success; | ||||
Done Inline ActionsPerhaps assert that O_PATH was not specified in this case. markj: Perhaps assert that O_PATH was not specified in this case. | |||||
/* | /* | ||||
* Handle special fdopen() case. bleh. | * Handle special fdopen() case. bleh. | ||||
* | * | ||||
* Don't do this for relative (capability) lookups; we don't | * Don't do this for relative (capability) lookups; we don't | ||||
* understand exactly what would happen, and we don't think | * understand exactly what would happen, and we don't think | ||||
* that it ever should. | * that it ever should. | ||||
*/ | */ | ||||
Show All 13 Lines | kern_openat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, | ||||
vp = nd.ni_vp; | vp = nd.ni_vp; | ||||
/* | /* | ||||
* Store the vnode, for any f_type. Typically, the vnode use | * Store the vnode, for any f_type. Typically, the vnode use | ||||
* count is decremented by direct call to vn_closefile() for | * count is decremented by direct call to vn_closefile() for | ||||
* files that switched type in the cdevsw fdopen() method. | * files that switched type in the cdevsw fdopen() method. | ||||
*/ | */ | ||||
fp->f_vnode = vp; | fp->f_vnode = vp; | ||||
/* | /* | ||||
* If the file wasn't claimed by devfs bind it to the normal | * If the file wasn't claimed by devfs bind it to the normal | ||||
* vnode operations here. | * vnode operations here. | ||||
*/ | */ | ||||
if (fp->f_ops == &badfileops) { | if (fp->f_ops == &badfileops) { | ||||
KASSERT(vp->v_type != VFIFO, | flags |= fp->f_flag & FKQALLOWED; | ||||
KASSERT(vp->v_type != VFIFO || (flags & O_PATH) != 0, | |||||
("Unexpected fifo fp %p vp %p", fp, vp)); | ("Unexpected fifo fp %p vp %p", fp, vp)); | ||||
if ((flags & O_PATH) != 0) { | |||||
finit_vnode(fp, flags, NULL, &path_fileops); | |||||
vhold(vp); | |||||
vunref(vp); | |||||
} else { | |||||
finit_vnode(fp, flags, NULL, &vnops); | finit_vnode(fp, flags, NULL, &vnops); | ||||
} | } | ||||
} | |||||
VOP_UNLOCK(vp); | VOP_UNLOCK(vp); | ||||
if (flags & O_TRUNC) { | if (flags & O_TRUNC) { | ||||
error = fo_truncate(fp, 0, td->td_ucred, td); | error = fo_truncate(fp, 0, td->td_ucred, td); | ||||
if (error != 0) | if (error != 0) | ||||
goto bad; | goto bad; | ||||
} | } | ||||
success: | success: | ||||
▲ Show 20 Lines • Show All 663 Lines • ▼ Show 20 Lines | kern_funlinkat(struct thread *td, int dfd, const char *path, int fd, | ||||
struct file *fp; | struct file *fp; | ||||
struct vnode *vp; | struct vnode *vp; | ||||
struct nameidata nd; | struct nameidata nd; | ||||
struct stat sb; | struct stat sb; | ||||
int error; | int error; | ||||
fp = NULL; | fp = NULL; | ||||
if (fd != FD_NONE) { | if (fd != FD_NONE) { | ||||
error = getvnode(td, fd, &cap_no_rights, &fp); | error = getvnode_path(td, fd, &cap_no_rights, &fp); | ||||
if (error != 0) | if (error != 0) | ||||
return (error); | return (error); | ||||
} | } | ||||
restart: | restart: | ||||
bwillwrite(); | bwillwrite(); | ||||
NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | | NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | | ||||
at2cnpflags(flag, AT_RESOLVE_BENEATH), | at2cnpflags(flag, AT_RESOLVE_BENEATH), | ||||
pathseg, path, dfd, &cap_unlinkat_rights, td); | pathseg, path, dfd, &cap_unlinkat_rights, td); | ||||
if ((error = namei(&nd)) != 0) { | if ((error = namei(&nd)) != 0) { | ||||
if (error == EINVAL) | if (error == EINVAL) | ||||
error = EPERM; | error = EPERM; | ||||
goto fdout; | goto fdout; | ||||
} | } | ||||
vp = nd.ni_vp; | vp = nd.ni_vp; | ||||
if (vp->v_type == VDIR && oldinum == 0) { | if (vp->v_type == VDIR && oldinum == 0) { | ||||
error = EPERM; /* POSIX */ | error = EPERM; /* POSIX */ | ||||
} else if (oldinum != 0 && | } else if (oldinum != 0 && | ||||
((error = VOP_STAT(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && | ((error = VOP_STAT(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && | ||||
sb.st_ino != oldinum) { | sb.st_ino != oldinum) { | ||||
error = EIDRM; /* Identifier removed */ | error = EIDRM; /* Identifier removed */ | ||||
} else if (fp != NULL && fp->f_vnode != vp) { | } else if (fp != NULL && fp->f_vnode != vp) { | ||||
if (VN_IS_DOOMED(fp->f_vnode)) | if (VN_IS_DOOMED(fp->f_vnode)) | ||||
error = EBADF; | error = EBADF; | ||||
else | else | ||||
error = EDEADLK; | error = EDEADLK; | ||||
} else { | } else { | ||||
/* | /* | ||||
▲ Show 20 Lines • Show All 1,566 Lines • ▼ Show 20 Lines | |||||
kern_fsync(struct thread *td, int fd, bool fullsync) | kern_fsync(struct thread *td, int fd, bool fullsync) | ||||
{ | { | ||||
struct vnode *vp; | struct vnode *vp; | ||||
struct mount *mp; | struct mount *mp; | ||||
struct file *fp; | struct file *fp; | ||||
int error, lock_flags; | int error, lock_flags; | ||||
AUDIT_ARG_FD(fd); | AUDIT_ARG_FD(fd); | ||||
error = getvnode(td, fd, &cap_fsync_rights, &fp); | error = getvnode(td, fd, &cap_fsync_rights, &fp); | ||||
Done Inline ActionsLinux does not permit fsync/fdatasync with O_PATH descriptors. I can't really see why it would be useful to diverge there. markj: Linux does not permit fsync/fdatasync with O_PATH descriptors. I can't really see why it would… | |||||
Done Inline ActionsBut user can call sync(2) always, so what would be the point disallowing fsync(2)? kib: But user can call sync(2) always, so what would be the point disallowing fsync(2)?
I can but it… | |||||
Done Inline ActionsI do not have a strong argument, it just seems more in line with expected semantics for O_PATH, and it is hard to imagine a scenario where something would want to call fsync() on an O_PATH fd. Having a minimal set of permitted interfaces for O_PATH descriptors makes it easier to think about their properties and how they interact with the rest of the system. In this case I agree it's a minor point and don't insist on anything. Capsicum even permits sync(2), though that seems strange to me. markj: I do not have a strong argument, it just seems more in line with expected semantics for O_PATH… | |||||
if (error != 0) | if (error != 0) | ||||
return (error); | return (error); | ||||
vp = fp->f_vnode; | vp = fp->f_vnode; | ||||
#if 0 | #if 0 | ||||
if (!fullsync) | if (!fullsync) | ||||
/* XXXKIB: compete outstanding aio writes */; | /* XXXKIB: compete outstanding aio writes */; | ||||
#endif | #endif | ||||
retry: | retry: | ||||
▲ Show 20 Lines • Show All 336 Lines • ▼ Show 20 Lines | kern_frmdirat(struct thread *td, int dfd, const char *path, int fd, | ||||
struct vnode *vp; | struct vnode *vp; | ||||
struct file *fp; | struct file *fp; | ||||
struct nameidata nd; | struct nameidata nd; | ||||
cap_rights_t rights; | cap_rights_t rights; | ||||
int error; | int error; | ||||
fp = NULL; | fp = NULL; | ||||
if (fd != FD_NONE) { | if (fd != FD_NONE) { | ||||
error = getvnode(td, fd, cap_rights_init_one(&rights, CAP_LOOKUP), | error = getvnode(td, fd, cap_rights_init_one(&rights, | ||||
&fp); | CAP_LOOKUP), &fp); | ||||
if (error != 0) | if (error != 0) | ||||
return (error); | return (error); | ||||
} | } | ||||
restart: | restart: | ||||
bwillwrite(); | bwillwrite(); | ||||
NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | | NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | | ||||
at2cnpflags(flag, AT_RESOLVE_BENEATH), | at2cnpflags(flag, AT_RESOLVE_BENEATH), | ||||
▲ Show 20 Lines • Show All 394 Lines • ▼ Show 20 Lines | #endif | ||||
if (devfs_usecount(vp) > 0) | if (devfs_usecount(vp) > 0) | ||||
VOP_REVOKE(vp, REVOKEALL); | VOP_REVOKE(vp, REVOKEALL); | ||||
out: | out: | ||||
vput(vp); | vput(vp); | ||||
return (error); | return (error); | ||||
} | } | ||||
/* | /* | ||||
* Convert a user file descriptor to a kernel file entry and check that, if it | * This variant of getvnode() allows O_PATH files. Caller should | ||||
* is a capability, the correct rights are present. A reference on the file | * ensure that returned file and vnode are only used for compatible | ||||
* entry is held upon returning. | * semantics. | ||||
*/ | */ | ||||
int | int | ||||
getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) | getvnode_path(struct thread *td, int fd, cap_rights_t *rightsp, | ||||
struct file **fpp) | |||||
{ | { | ||||
struct file *fp; | struct file *fp; | ||||
int error; | int error; | ||||
error = fget_unlocked(td->td_proc->p_fd, fd, rightsp, &fp); | error = fget_unlocked(td->td_proc->p_fd, fd, rightsp, &fp); | ||||
if (error != 0) | if (error != 0) | ||||
return (error); | return (error); | ||||
/* | /* | ||||
* The file could be not of the vnode type, or it may be not | * The file could be not of the vnode type, or it may be not | ||||
* yet fully initialized, in which case the f_vnode pointer | * yet fully initialized, in which case the f_vnode pointer | ||||
* may be set, but f_ops is still badfileops. E.g., | * may be set, but f_ops is still badfileops. E.g., | ||||
* devfs_open() transiently create such situation to | * devfs_open() transiently create such situation to | ||||
* facilitate csw d_fdopen(). | * facilitate csw d_fdopen(). | ||||
* | * | ||||
* Dupfdopen() handling in kern_openat() installs the | * Dupfdopen() handling in kern_openat() installs the | ||||
* half-baked file into the process descriptor table, allowing | * half-baked file into the process descriptor table, allowing | ||||
* other thread to dereference it. Guard against the race by | * other thread to dereference it. Guard against the race by | ||||
* checking f_ops. | * checking f_ops. | ||||
*/ | */ | ||||
if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { | if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { | ||||
fdrop(fp, td); | fdrop(fp, td); | ||||
return (EINVAL); | return (EINVAL); | ||||
} | } | ||||
*fpp = fp; | *fpp = fp; | ||||
return (0); | return (0); | ||||
} | |||||
/* | |||||
* Convert a user file descriptor to a kernel file entry and check | |||||
* that, if it is a capability, the correct rights are present. | |||||
* A reference on the file entry is held upon returning. | |||||
Done Inline ActionsIs there a reason we can't just do this check in getvnode()? That way, there is no extra flag parameter. It is just a suggestion. markj: Is there a reason we can't just do this check in `getvnode()`? That way, there is no extra flag… | |||||
Done Inline ActionsMinor issue is that *fpp is written for error != 0, but it should be fine. Usually compiler inlines such kind of calls, converting the argument into code. Anyway, I removed getvnode_int(). kib: Minor issue is that *fpp is written for error != 0, but it should be fine.
Usually compiler… | |||||
*/ | |||||
int | |||||
getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) | |||||
{ | |||||
int error; | |||||
error = getvnode_path(td, fd, rightsp, fpp); | |||||
/* | |||||
* Filter out O_PATH file descriptors, most getvnode() callers | |||||
* do not call fo_ methods. | |||||
*/ | |||||
if (error == 0 && (*fpp)->f_ops == &path_fileops) { | |||||
fdrop(*fpp, td); | |||||
error = EBADF; | |||||
} | |||||
return (error); | |||||
} | } | ||||
/* | /* | ||||
* Get an (NFS) file handle. | * Get an (NFS) file handle. | ||||
*/ | */ | ||||
#ifndef _SYS_SYSPROTO_H_ | #ifndef _SYS_SYSPROTO_H_ | ||||
struct lgetfh_args { | struct lgetfh_args { | ||||
char *fname; | char *fname; | ||||
▲ Show 20 Lines • Show All 653 Lines • Show Last 20 Lines |
Or handle it here