Index: sys/kern/vfs_lookup.c =================================================================== --- sys/kern/vfs_lookup.c +++ sys/kern/vfs_lookup.c @@ -82,16 +82,31 @@ */ static struct vnode *vp_crossmp; +struct nameicap_tracker { + struct vnode *dp; + bool renamed; + TAILQ_ENTRY(nameicap_tracker) nm_link; + TAILQ_ENTRY(nameicap_tracker) glob_link; +}; + +static TAILQ_HEAD(, nameicap_tracker) nt_glob = + TAILQ_HEAD_INITIALIZER(nt_glob); +static struct mtx nt_glob_lock; +static uma_zone_t nt_zone; + static void nameiinit(void *dummy __unused) { namei_zone = uma_zcreate("NAMEI", MAXPATHLEN, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); + nt_zone = uma_zcreate("rentr", sizeof(struct nameicap_tracker), + NULL, NULL, NULL, NULL, sizeof(void *), 0); getnewvnode("crossmp", NULL, &dead_vnodeops, &vp_crossmp); vn_lock(vp_crossmp, LK_EXCLUSIVE); VN_LOCK_ASHARE(vp_crossmp); VOP_UNLOCK(vp_crossmp, 0); + mtx_init(&nt_glob_lock, "rentr", NULL, MTX_DEF); } SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nameiinit, NULL); @@ -100,8 +115,65 @@ "Enables/Disables shared locks for path name translation"); static void +nameicap_tracker_add(struct nameidata *ndp, struct vnode *dp) +{ + struct nameicap_tracker *nt; + + ASSERT_VOP_LOCKED(dp, "tracker add"); + if (ndp->ni_strictrelative == 0 || dp->v_type != VDIR) + return; + nt = uma_zalloc(nt_zone, M_WAITOK); + nt->renamed = false; + vhold(dp); + nt->dp = dp; + TAILQ_INSERT_TAIL(&ndp->ni_cap_tracker, nt, nm_link); + mtx_lock(&nt_glob_lock); + TAILQ_INSERT_TAIL(&nt_glob, nt, glob_link); + mtx_unlock(&nt_glob_lock); +} + +static bool +nameicap_untrack(struct nameidata *ndp) +{ + struct nameicap_tracker *nt, *nt1; + bool renamed; + + KASSERT(ndp->ni_strictrelative != 0, ("not strictrelative")); + mtx_lock(&nt_glob_lock); + TAILQ_FOREACH(nt, &ndp->ni_cap_tracker, nm_link) + TAILQ_REMOVE(&nt_glob, nt, glob_link); + mtx_unlock(&nt_glob_lock); + renamed = false; + TAILQ_FOREACH_SAFE(nt, &ndp->ni_cap_tracker, nm_link, nt1) { + if (nt->renamed) + renamed = true; + TAILQ_REMOVE(&ndp->ni_cap_tracker, nt, nm_link); + vdrop(nt->dp); + uma_zfree(nt_zone, nt); + } + return (renamed); +} + +void +nameicap_renamed(struct vnode *dp) +{ + struct nameicap_tracker *nt; + + ASSERT_VOP_LOCKED(dp, "tracker renamed"); + if (dp->v_type != VDIR) + return; + mtx_lock(&nt_glob_lock); + TAILQ_FOREACH(nt, &nt_glob, glob_link) { + if (nt->dp == dp) + nt->renamed = true; + } + mtx_unlock(&nt_glob_lock); +} + +static void namei_cleanup_cnp(struct componentname *cnp) { + uma_zfree(namei_zone, cnp->cn_pnbuf); #ifdef DIAGNOSTIC cnp->cn_pnbuf = NULL; @@ -158,12 +230,16 @@ char *cp; /* pointer into pathname argument */ struct vnode *dp; /* the directory we are searching */ struct iovec aiov; /* uio for reading symbolic links */ + struct componentname *cnp; + struct thread *td; + struct proc *p; + cap_rights_t rights; struct uio auio; int error, linklen, startdir_used; - struct componentname *cnp = &ndp->ni_cnd; - struct thread *td = cnp->cn_thread; - struct proc *p = td->td_proc; + cnp = &ndp->ni_cnd; + td = cnp->cn_thread; + p = td->td_proc; ndp->ni_cnd.cn_cred = ndp->ni_cnd.cn_thread->td_ucred; KASSERT(cnp->cn_cred && p, ("namei: bad cred/proc")); KASSERT((cnp->cn_nameiop & (~OPMASK)) == 0, @@ -178,6 +254,7 @@ /* We will set this ourselves if we need it. */ cnp->cn_flags &= ~TRAILINGSLASH; + ndp->ni_capdir = NULL; /* * Get a buffer for the name to be translated, and copy the @@ -200,9 +277,10 @@ #ifdef CAPABILITY_MODE /* - * In capability mode, lookups must be "strictly relative" (i.e. - * not an absolute path, and not containing '..' components) to + * In capability mode, lookups must be "strictly relative" to * a real file descriptor, not the pseudo-descriptor AT_FDCWD. + * We ensure that the passed path is not absolute, and all + * '..' components do not escape the relative root. */ if (error == 0 && IN_CAPABILITY_MODE(td) && (cnp->cn_flags & NOCAPCHECK) == 0) { @@ -258,8 +336,6 @@ dp = fdp->fd_cdir; VREF(dp); } else { - cap_rights_t rights; - rights = ndp->ni_rightsneeded; cap_rights_set(&rights, CAP_LOOKUP); @@ -299,6 +375,12 @@ } SDT_PROBE3(vfs, namei, lookup, entry, dp, cnp->cn_pnbuf, cnp->cn_flags); + if (ndp->ni_strictrelative != 0) { + ndp->ni_capdir = dp; + vref(dp); + TAILQ_INIT(&ndp->ni_cap_tracker); + ndp->ni_dotdot = false; + } for (;;) { ndp->ni_startdir = dp; error = lookup(ndp); @@ -308,11 +390,26 @@ * If not a symbolic link, we're done. */ if ((cnp->cn_flags & ISSYMLINK) == 0) { + if (ndp->ni_strictrelative && nameicap_untrack(ndp) && + ndp->ni_dotdot) { + NDFREE(ndp, 0); + if (ndp->ni_capdir != NULL) { + vrele(ndp->ni_capdir); + ndp->ni_capdir = NULL; + } + vrele(ndp->ni_rootdir); + return (EAGAIN); + } + vrele(ndp->ni_rootdir); if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0) { namei_cleanup_cnp(cnp); } else cnp->cn_flags |= HASBUF; + if (ndp->ni_capdir != NULL) { + vrele(ndp->ni_capdir); + ndp->ni_capdir = NULL; + } SDT_PROBE2(vfs, namei, lookup, return, 0, ndp->ni_vp); return (0); @@ -387,6 +484,11 @@ out: vrele(ndp->ni_rootdir); namei_cleanup_cnp(cnp); + if (ndp->ni_capdir != NULL) { + vrele(ndp->ni_capdir); + ndp->ni_capdir = NULL; + nameicap_untrack(ndp); + } SDT_PROBE2(vfs, namei, lookup, return, error, NULL); return (error); } @@ -608,6 +710,7 @@ else if (cnp->cn_flags & AUDITVNODE2) AUDIT_ARG_VNODE2(dp); + nameicap_tracker_add(ndp, dp); if (!(cnp->cn_flags & (LOCKPARENT | LOCKLEAF))) VOP_UNLOCK(dp, 0); /* XXX This should probably move to the top of function. */ @@ -618,9 +721,10 @@ /* * Handle "..": five special cases. - * 0. If doing a capability lookup, return ENOTCAPABLE (this is a - * fairly conservative design choice, but it's the only one that we - * are satisfied guarantees the property we're looking for). + * 0. If doing a capability lookup, return ENOTCAPABLE if the lookup + * would escape from the initial file descriptor directory. + * Checks are done before other verifications, to have + * cleaner semantic for e.g. jail roots or mp traversals. * 1. Return an error if this is the last component of * the name and the operation is DELETE or RENAME. * 2. If at root directory (e.g. after chroot) @@ -635,12 +739,15 @@ */ if (cnp->cn_flags & ISDOTDOT) { if (ndp->ni_strictrelative != 0) { + if (dp == ndp->ni_capdir) { #ifdef KTRACE - if (KTRPOINT(curthread, KTR_CAPFAIL)) - ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL); + if (KTRPOINT(curthread, KTR_CAPFAIL)) + ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL); #endif - error = ENOTCAPABLE; - goto bad; + error = ENOTCAPABLE; + goto bad; + } + ndp->ni_dotdot = true; } if ((cnp->cn_flags & ISLASTCN) != 0 && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { @@ -676,6 +783,7 @@ vn_lock(dp, compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY, ISDOTDOT)); + nameicap_tracker_add(ndp, dp); } } @@ -783,6 +891,7 @@ printf("found\n"); #endif dp = ndp->ni_vp; + nameicap_tracker_add(ndp, dp); /* * Check to see if the vnode has been mounted on; @@ -809,6 +918,7 @@ goto bad2; } ndp->ni_vp = dp = tdp; + nameicap_tracker_add(ndp, dp); } /* Index: sys/sys/namei.h =================================================================== --- sys/sys/namei.h +++ sys/sys/namei.h @@ -71,6 +71,7 @@ struct vnode *ni_startdir; /* starting directory */ struct vnode *ni_rootdir; /* logical root directory */ struct vnode *ni_topdir; /* logical top directory */ + struct vnode *ni_capdir; /* logical top dir for cap lookups */ int ni_dirfd; /* starting directory for *at functions */ int ni_strictrelative; /* relative lookup only; no '..' */ /* @@ -94,6 +95,8 @@ * through the VOP interface. */ struct componentname ni_cnd; + TAILQ_HEAD(, nameicap_tracker) ni_cap_tracker; + _Bool ni_dotdot; }; #ifdef _KERNEL @@ -183,6 +186,7 @@ int lookup(struct nameidata *ndp); int relookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp); +void nameicap_renamed(struct vnode *dp); #endif /* Index: sys/ufs/ufs/ufs_vnops.c =================================================================== --- sys/ufs/ufs/ufs_vnops.c +++ sys/ufs/ufs/ufs_vnops.c @@ -1517,6 +1517,8 @@ if (tvp) cache_purge(tvp); cache_purge_negative(tdvp); + if (fdvp != tdvp) + nameicap_renamed(fvp); unlockout: vput(fdvp);