diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c --- a/sys/kern/vfs_default.c +++ b/sys/kern/vfs_default.c @@ -276,49 +276,56 @@ } /* - * Check if a named file exists in a given directory vnode. + * Check if a named file exists in a given directory vnode + * + * Returns 0 if the file exists, ENOENT if it doesn't, or errors returned by + * vn_dir_next_dirent(). */ static int dirent_exists(struct vnode *vp, const char *dirname, struct thread *td) { - char *dirbuf, *cpos; - int error, eofflag, dirbuflen, len, found; + char *dirbuf; + int error, eofflag; + size_t dirbuflen, len; off_t off; struct dirent *dp; struct vattr va; - KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp)); + ASSERT_VOP_LOCKED(vp, "vnode not locked"); KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp)); - found = 0; - error = VOP_GETATTR(vp, &va, td->td_ucred); - if (error) - return (found); + if (error != 0) + return (error); - dirbuflen = DEV_BSIZE; + dirbuflen = MAX(DEV_BSIZE, GENERIC_MAXDIRSIZ); if (dirbuflen < va.va_blocksize) dirbuflen = va.va_blocksize; - dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK); + dirbuf = malloc(dirbuflen, M_TEMP, M_WAITOK); - off = 0; len = 0; - do { - error = vn_dir_next_dirent(vp, &dp, dirbuf, dirbuflen, &off, - &cpos, &len, &eofflag, td); - if (error) + off = 0; + eofflag = 0; + + for (;;) { + error = vn_dir_next_dirent(vp, td, dirbuf, dirbuflen, + &dp, &len, &off, &eofflag); + if (error != 0) goto out; + if (len == 0) + break; + if (dp->d_type != DT_WHT && dp->d_fileno != 0 && - strcmp(dp->d_name, dirname) == 0) { - found = 1; + strcmp(dp->d_name, dirname) == 0) goto out; - } - } while (len > 0 || !eofflag); + } + + error = ENOENT; out: free(dirbuf, M_TEMP); - return (found); + return (error); } int @@ -672,27 +679,24 @@ int vop_stdvptocnp(struct vop_vptocnp_args *ap) { - struct vnode *vp = ap->a_vp; - struct vnode **dvp = ap->a_vpp; - struct ucred *cred; + struct vnode *const vp = ap->a_vp; + struct vnode **const dvp = ap->a_vpp; char *buf = ap->a_buf; size_t *buflen = ap->a_buflen; - char *dirbuf, *cpos; - int i, error, eofflag, dirbuflen, flags, locked, len, covered; + char *dirbuf; + int i = *buflen; + int error = 0, covered = 0; + int eofflag, flags, locked; + size_t dirbuflen, len; off_t off; ino_t fileno; struct vattr va; struct nameidata nd; - struct thread *td; + struct thread *const td = curthread; + struct ucred *const cred = td->td_ucred; struct dirent *dp; struct vnode *mvp; - i = *buflen; - error = 0; - covered = 0; - td = curthread; - cred = td->td_ucred; - if (vp->v_type != VDIR) return (ENOENT); @@ -729,7 +733,7 @@ fileno = va.va_fileid; - dirbuflen = DEV_BSIZE; + dirbuflen = MAX(DEV_BSIZE, GENERIC_MAXDIRSIZ); if (dirbuflen < va.va_blocksize) dirbuflen = va.va_blocksize; dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK); @@ -739,21 +743,26 @@ goto out; } - off = 0; len = 0; - do { + off = 0; + eofflag = 0; + + for (;;) { /* call VOP_READDIR of parent */ - error = vn_dir_next_dirent(*dvp, &dp, dirbuf, dirbuflen, &off, - &cpos, &len, &eofflag, td); - if (error) + error = vn_dir_next_dirent(*dvp, td, + dirbuf, dirbuflen, &dp, &len, &off, &eofflag); + if (error != 0) goto out; + if (len == 0) + break; + if ((dp->d_type != DT_WHT) && (dp->d_fileno == fileno)) { if (covered) { VOP_UNLOCK(*dvp); vn_lock(mvp, LK_SHARED | LK_RETRY); - if (dirent_exists(mvp, dp->d_name, td)) { + if (dirent_exists(mvp, dp->d_name, td) == 0) { error = ENOENT; VOP_UNLOCK(mvp); vn_lock(*dvp, LK_SHARED | LK_RETRY); @@ -776,7 +785,7 @@ } goto out; } - } while (len > 0 || !eofflag); + } error = ENOENT; out: diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -3740,22 +3740,112 @@ return (error); } -#define DIRENT_MINSIZE (sizeof(struct dirent) - (MAXNAMLEN+1) + 4) +/* + * Keep this assert as long as sizeof(struct dirent) is used as the maximum + * entry size. + */ +_Static_assert(_GENERIC_MAXDIRSIZ == sizeof(struct dirent), + "'struct dirent' size must be a multiple of its alignment " + "(see _GENERIC_DIRLEN())"); +/* + * Returns successive directory entries through some caller's provided buffer + * + * This function automatically refills the provided buffer with calls to + * VOP_READDIR() (after MAC permission checks). + * + * 'td' is used for credentials and passed to uiomove(). 'dirbuf' is the + * caller's buffer to fill and 'dirbuflen' its allocated size. 'dirbuf' must be + * properly aligned to access 'struct dirent' structures and 'dirbuflen' must + * be greater than GENERIC_MAXDIRSIZ to avoid VOP_READDIR() returning EINVAL + * (the latter is not a strong guarantee (yet); but EINVAL will always be + * returned if this requirement is not verified). '*dpp' points to the current + * directory entry in the buffer and '*len' contains the remaining valid bytes + * in 'dirbuf' after 'dpp' (including the pointed entry). + * + * At first call (or when restarting the read), '*len' must have been set to 0, + * '*off' to 0 (or any valid start offset) and '*eofflag' to 0. There are no + * more entries as soon as '*len' is 0 after a call that returned 0. Calling + * again this function after such a condition is considered an error and EINVAL + * will be returned. Other possible error codes are those of VOP_READDIR(), + * EINTEGRITY if the returned entries do not pass coherency tests, or EINVAL + * (bad call). All errors are unrecoverable, i.e., the state ('*len', '*off' + * and '*eofflag') must be re-initialized before a subsequent call. On error or + * at end of directory, '*dpp' is reset to NULL. + * + * '*len', '*off' and '*eofflag' are internal state the caller should not + * tamper with except as explained above. '*off' is the next directory offset + * to read from to refill the buffer. '*eofflag' is set to 0 or 1 by the last + * internal call to VOP_READDIR() that returned without error, indicating + * whether it reached the end of the directory, and to 2 by this function after + * all entries have been read. + */ int -vn_dir_next_dirent(struct vnode *vp, struct dirent **dpp, char *dirbuf, - int dirbuflen, off_t *off, char **cpos, int *len, - int *eofflag, struct thread *td) +vn_dir_next_dirent(struct vnode *vp, struct thread *td, + char *dirbuf, size_t dirbuflen, + struct dirent **dpp, size_t *len, off_t *off, int *eofflag) { - int error, reclen; + struct dirent *dp = NULL; + int reclen; + int error; struct uio uio; struct iovec iov; - struct dirent *dp; - KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp)); - KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp)); + ASSERT_VOP_LOCKED(vp, "vnode not locked"); + VNASSERT(vp->v_type == VDIR, vp, ("vnode is not a directory")); + MPASS2((uintptr_t)dirbuf < (uintptr_t)dirbuf + dirbuflen, + "Address space overflow"); + + if (__predict_false(dirbuflen < GENERIC_MAXDIRSIZ)) { + /* Don't take any chances in this case */ + error = EINVAL; + goto out; + } + + if (*len != 0) { + dp = *dpp; + + /* + * The caller continued to call us after an error (we set dp to + * NULL in a previous iteration). Bail out right now. + */ + if (__predict_false(dp == NULL)) + return (EINVAL); + + MPASS(*len <= dirbuflen); + MPASS2((uintptr_t)dirbuf <= (uintptr_t)dp && + (uintptr_t)dp + *len <= (uintptr_t)dirbuf + dirbuflen, + "Filled range not inside buffer"); + + reclen = dp->d_reclen; + if (reclen >= *len) { + /* End of buffer reached */ + *len = 0; + } else { + dp = (struct dirent *)((char *)dp + reclen); + *len -= reclen; + } + } if (*len == 0) { + dp = NULL; + + /* Have to refill. */ + switch (*eofflag) { + case 0: + break; + + case 1: + /* Nothing more to read. */ + *eofflag = 2; /* Remember the caller reached EOF. */ + goto success; + + default: + /* The caller didn't test for EOF. */ + error = EINVAL; + goto out; + } + iov.iov_base = dirbuf; iov.iov_len = dirbuflen; @@ -3767,40 +3857,50 @@ uio.uio_rw = UIO_READ; uio.uio_td = td; - *eofflag = 0; - #ifdef MAC error = mac_vnode_check_readdir(td->td_ucred, vp); if (error == 0) #endif error = VOP_READDIR(vp, &uio, td->td_ucred, eofflag, - NULL, NULL); - if (error) - return (error); + NULL, NULL); + if (error != 0) + goto out; + *len = dirbuflen - uio.uio_resid; *off = uio.uio_offset; - *cpos = dirbuf; - *len = (dirbuflen - uio.uio_resid); + if (*len == 0) { + /* Sanity check on INVARIANTS. */ + MPASS(*eofflag != 0); + *eofflag = 1; + goto success; + } - if (*len == 0) - return (ENOENT); + /* + * Normalize the flag returned by VOP_READDIR(), since we use 2 + * as a sentinel value. + */ + if (*eofflag != 0) + *eofflag = 1; + + dp = (struct dirent *)dirbuf; } - dp = (struct dirent *)(*cpos); - reclen = dp->d_reclen; + if (__predict_false((*len < GENERIC_MINDIRSIZ) || + (dp->d_reclen < GENERIC_MINDIRSIZ))) { + error = EINTEGRITY; + dp = NULL; + goto out; + } + +success: + error = 0; +out: *dpp = dp; - - /* check for malformed directory.. */ - if (reclen < DIRENT_MINSIZE) - return (EINVAL); - - *cpos += reclen; - *len -= reclen; - - return (0); + return (error); } + static u_long vn_lock_pair_pause_cnt; SYSCTL_ULONG(_debug, OID_AUTO, vn_lock_pair_pause, CTLFLAG_RD, &vn_lock_pair_pause_cnt, 0, diff --git a/sys/sys/dirent.h b/sys/sys/dirent.h --- a/sys/sys/dirent.h +++ b/sys/sys/dirent.h @@ -122,11 +122,14 @@ #define _GENERIC_DIRLEN(namlen) \ ((__offsetof(struct dirent, d_name) + (namlen) + 1 + 7) & ~7) #define _GENERIC_DIRSIZ(dp) _GENERIC_DIRLEN((dp)->d_namlen) +#define _GENERIC_MINDIRSIZ _GENERIC_DIRLEN(1) /* Name must not be empty */ +#define _GENERIC_MAXDIRSIZ _GENERIC_DIRLEN(MAXNAMLEN) #endif /* __BSD_VISIBLE */ #ifdef _KERNEL #define GENERIC_DIRSIZ(dp) _GENERIC_DIRSIZ(dp) - +#define GENERIC_MINDIRSIZ _GENERIC_MINDIRSIZ +#define GENERIC_MAXDIRSIZ _GENERIC_MAXDIRSIZ /* * Ensure that padding bytes are zeroed and that the name is NUL-terminated. */ diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -1099,9 +1099,9 @@ int vfs_kqfilter(struct vop_kqfilter_args *); struct dirent; -int vn_dir_next_dirent(struct vnode *vp, struct dirent **dpp, char *dirbuf, - int dirbuflen, off_t *off, char **cpos, int *len, - int *eofflag, struct thread *td); +int vn_dir_next_dirent(struct vnode *vp, struct thread *td, + char *dirbuf, size_t dirbuflen, + struct dirent **dpp, size_t *len, off_t *off, int *eofflag); int vfs_read_dirent(struct vop_readdir_args *ap, struct dirent *dp, off_t off); int vfs_emptydir(struct vnode *vp);