Index: lib/libc/sys/open.2 =================================================================== --- lib/libc/sys/open.2 +++ lib/libc/sys/open.2 @@ -75,8 +75,14 @@ .Fn open function except in the case where the .Fa path -specifies a relative path. -In this case the file to be opened is determined relative to the directory +specifies a relative path, or the +.Dv O_BENEATH +flag is provided. +For +.Fn openat +and relative +.Fa path , +the file to be opened is determined relative to the directory associated with the file descriptor .Fa fd instead of the current working directory. @@ -95,6 +101,26 @@ and the behavior is identical to a call to .Fn open . .Pp +When +.Fn openat +is called with an absolute +.Fa path +without the +.Dv O_BENEATH +flag, it ignores the +.Fa fd +argument. +When +.Dv O_BENEATH +is specified with an absolute +.Fa path , +a directory passed by the +.Fa fd +argument is used as the starting point for the resolution. +See the definition of the +.Dv O_BENEATH +flag below. +.Pp In .Xr capsicum 4 capability mode, @@ -109,14 +135,28 @@ as defined in .Pa sys/kern/vfs_lookup.c . .Fa path -must not be an absolute path and must not contain ".." components. +must not be an absolute path and must not contain ".." components +which cause the path resolution to escape the directory hierarhy +starting at +.Fa fd . Additionally, no symbolic link in .Fa path -may contain ".." components either. +may contain escaping ".." components. .Fa fd must not be .Dv AT_FDCWD . .Pp +If the +.Dv vfs.lookup_cap_dotdot +.Xr sysctl 2 +MIB is set to zero, ".." components in the paths, +used in capability mode, or with the +.Dv O_BENEATH +flag, are completely disabled. +If the +.Dv vfs.lookup_cap_dotdot_nonlocal +MIB is set to zero, ".." is not allowed if found on non-local filesystem. +.Pp The flags specified are formed by .Em or Ns 'ing the following values @@ -273,19 +313,21 @@ .Dv O_BENEATH returns .Er ENOTCAPABLE -if the specified path, after resolving all symlinks and ".." references -in it, does not reside in the directory hierarchy of children beneath -the starting directory, or is an absolute path. +if the specified relative path, after resolving all symlinks and ".." +references in it, does not reside in the directory hierarchy of +children beneath the starting directory. Starting directory is the process current directory if relative .Fa path is used for .Fn open , and the directory referenced by the .Fa fd -argument when specifying relative -.Fa path -for +argument when using .Fn openat . +If the specified path is absolute, +.Dv O_BENEATH +allows arbitrary prefix that ends up in the starting directory, +after which all further resolved components must be under it. .Pp If successful, .Fn open Index: sys/kern/vfs_lookup.c =================================================================== --- sys/kern/vfs_lookup.c +++ sys/kern/vfs_lookup.c @@ -177,6 +177,13 @@ if ((ndp->ni_lcf & NI_LCF_CAP_DOTDOT) == 0 || dp->v_type != VDIR) return; + if ((ndp->ni_lcf & (NI_LCF_BENEATH_ABS | NI_LCF_BENEATH_LATCHED)) == + NI_LCF_BENEATH_ABS) { + MPASS((ndp->ni_lcf & NI_LCF_LATCH) != 0); + if (dp != ndp->ni_beneath_latch) + return; + ndp->ni_lcf |= NI_LCF_BENEATH_LATCHED; + } nt = uma_zalloc(nt_zone, M_WAITOK); vhold(dp); nt->dp = dp; @@ -222,6 +229,10 @@ if (dp == nt->dp) return (0); } + if ((ndp->ni_lcf & NI_LCF_BENEATH_ABS) != 0) { + ndp->ni_lcf &= ~NI_LCF_BENEATH_LATCHED; + nameicap_cleanup(ndp); + } return (ENOTCAPABLE); } @@ -236,20 +247,33 @@ #endif } +static void +namei_cleanup_vp(struct nameidata *ndp) +{ + + vput(ndp->ni_vp); + ndp->ni_vp = NULL; + vrele(ndp->ni_dvp); +} + static int namei_handle_root(struct nameidata *ndp, struct vnode **dpp) { struct componentname *cnp; cnp = &ndp->ni_cnd; - if ((ndp->ni_lcf & NI_LCF_STRICTRELATIVE) != 0 || - (cnp->cn_flags & BENEATH) != 0) { + if ((ndp->ni_lcf & NI_LCF_STRICTRELATIVE) != 0) { #ifdef KTRACE if (KTRPOINT(curthread, KTR_CAPFAIL)) ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL); #endif return (ENOTCAPABLE); } + if ((cnp->cn_flags & BENEATH) != 0) { + ndp->ni_lcf |= NI_LCF_BENEATH_ABS; + ndp->ni_lcf &= ~NI_LCF_BENEATH_LATCHED; + nameicap_cleanup(ndp); + } while (*(cnp->cn_nameptr) == '/') { cnp->cn_nameptr++; ndp->ni_pathlen--; @@ -290,6 +314,7 @@ struct thread *td; struct proc *p; cap_rights_t rights; + struct filecaps dirfd_caps; struct uio auio; int error, linklen, startdir_used; @@ -427,6 +452,23 @@ if (error == 0 && dp->v_type != VDIR) error = ENOTDIR; } + if (error == 0 && (ndp->ni_lcf & NI_LCF_BENEATH_ABS) != 0) { + if (ndp->ni_dirfd == AT_FDCWD) { + ndp->ni_beneath_latch = fdp->fd_cdir; + vrefact(ndp->ni_beneath_latch); + } else { + rights = ndp->ni_rightsneeded; + cap_rights_set(&rights, CAP_LOOKUP); + error = fgetvp_rights(td, ndp->ni_dirfd, &rights, + &dirfd_caps, &ndp->ni_beneath_latch); + if (error == 0 && dp->v_type != VDIR) { + vrele(ndp->ni_beneath_latch); + error = ENOTDIR; + } + } + if (error == 0) + ndp->ni_lcf |= NI_LCF_LATCH; + } FILEDESC_SUNLOCK(fdp); if (ndp->ni_startdir != NULL && !startdir_used) vrele(ndp->ni_startdir); @@ -456,9 +498,14 @@ namei_cleanup_cnp(cnp); } else cnp->cn_flags |= HASBUF; + if ((ndp->ni_lcf & (NI_LCF_BENEATH_ABS | + NI_LCF_BENEATH_LATCHED)) == NI_LCF_BENEATH_ABS) + error = ENOTCAPABLE; nameicap_cleanup(ndp); + if ((ndp->ni_lcf & NI_LCF_LATCH) != 0) + vrele(ndp->ni_beneath_latch); SDT_PROBE2(vfs, namei, lookup, return, 0, ndp->ni_vp); - return (0); + return (error); } if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { error = ELOOP; @@ -524,13 +571,18 @@ goto out; } } - vput(ndp->ni_vp); - ndp->ni_vp = NULL; - vrele(ndp->ni_dvp); + namei_cleanup_vp(ndp); out: vrele(ndp->ni_rootdir); + if (error == 0 && (ndp->ni_lcf & (NI_LCF_BENEATH_ABS | + NI_LCF_BENEATH_LATCHED)) == NI_LCF_BENEATH_ABS) { + namei_cleanup_vp(ndp); + error = ENOTCAPABLE; + } namei_cleanup_cnp(cnp); nameicap_cleanup(ndp); + if ((ndp->ni_lcf & NI_LCF_LATCH) != 0) + vrele(ndp->ni_beneath_latch); SDT_PROBE2(vfs, namei, lookup, return, error, NULL); return (error); } Index: sys/sys/namei.h =================================================================== --- sys/sys/namei.h +++ sys/sys/namei.h @@ -100,6 +100,7 @@ */ struct componentname ni_cnd; struct nameicap_tracker_head ni_cap_tracker; + struct vnode *ni_beneath_latch; }; #ifdef _KERNEL @@ -163,6 +164,9 @@ */ #define NI_LCF_STRICTRELATIVE 0x0001 /* relative lookup only */ #define NI_LCF_CAP_DOTDOT 0x0002 /* ".." in strictrelative case */ +#define NI_LCF_BENEATH_ABS 0x0004 /* BENEATH with absolute path */ +#define NI_LCF_BENEATH_LATCHED 0x0008 /* BENEATH_ABS traversed starting dir */ +#define NI_LCF_LATCH 0x0010 /* ni_beneath_latch valid */ /* * Initialization of a nameidata structure.