diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c --- a/sys/kern/vfs_default.c +++ b/sys/kern/vfs_default.c @@ -75,15 +75,9 @@ static int vop_nolookup(struct vop_lookup_args *); static int vop_norename(struct vop_rename_args *); static int vop_nostrategy(struct vop_strategy_args *); -static int get_next_dirent(struct vnode *vp, struct dirent **dpp, - char *dirbuf, int dirbuflen, off_t *off, - char **cpos, int *len, int *eofflag, - struct thread *td); static int dirent_exists(struct vnode *vp, const char *dirname, struct thread *td); -#define DIRENT_MINSIZE (sizeof(struct dirent) - (MAXNAMLEN+1) + 4) - static int vop_stdis_text(struct vop_is_text_args *ap); static int vop_stdunset_text(struct vop_unset_text_args *ap); static int vop_stdadd_writecount(struct vop_add_writecount_args *ap); @@ -281,73 +275,18 @@ return (EOPNOTSUPP); } -static int -get_next_dirent(struct vnode *vp, struct dirent **dpp, char *dirbuf, - int dirbuflen, off_t *off, char **cpos, int *len, - int *eofflag, struct thread *td) -{ - int error, reclen; - struct uio uio; - struct iovec iov; - struct dirent *dp; - - KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp)); - KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp)); - - if (*len == 0) { - iov.iov_base = dirbuf; - iov.iov_len = dirbuflen; - - uio.uio_iov = &iov; - uio.uio_iovcnt = 1; - uio.uio_offset = *off; - uio.uio_resid = dirbuflen; - uio.uio_segflg = UIO_SYSSPACE; - uio.uio_rw = UIO_READ; - uio.uio_td = td; - - *eofflag = 0; - -#ifdef MAC - error = mac_vnode_check_readdir(td->td_ucred, vp); - if (error == 0) -#endif - error = VOP_READDIR(vp, &uio, td->td_ucred, eofflag, - NULL, NULL); - if (error) - return (error); - - *off = uio.uio_offset; - - *cpos = dirbuf; - *len = (dirbuflen - uio.uio_resid); - - if (*len == 0) - return (ENOENT); - } - - dp = (struct dirent *)(*cpos); - reclen = dp->d_reclen; - *dpp = dp; - - /* check for malformed directory.. */ - if (reclen < DIRENT_MINSIZE) - return (EINVAL); - - *cpos += reclen; - *len -= reclen; - - return (0); -} - /* - * Check if a named file exists in a given directory vnode. + * Check if a named file exists in a given directory vnode + * + * Returns 0 if the file exists, ENOENT if it doesn't, or errors returned by + * vfs_next_dirent(). */ static int dirent_exists(struct vnode *vp, const char *dirname, struct thread *td) { - char *dirbuf, *cpos; - int error, eofflag, dirbuflen, len, found; + char *dirbuf; + int error, eofflag; + size_t dirbuflen, len; off_t off; struct dirent *dp; struct vattr va; @@ -355,35 +294,38 @@ KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp)); KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp)); - found = 0; - error = VOP_GETATTR(vp, &va, td->td_ucred); - if (error) - return (found); + if (error != 0) + return (error); - dirbuflen = DEV_BSIZE; + dirbuflen = MAX(DEV_BSIZE, GENERIC_MAXDIRSIZ); if (dirbuflen < va.va_blocksize) dirbuflen = va.va_blocksize; dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK); - off = 0; len = 0; - do { - error = get_next_dirent(vp, &dp, dirbuf, dirbuflen, &off, - &cpos, &len, &eofflag, td); - if (error) + off = 0; + eofflag = 0; + + for (;;) { + error = vfs_next_dirent(vp, td, dirbuf, dirbuflen, + &dp, &len, &off, &eofflag); + if (error != 0) goto out; + if (len == 0) + break; + if (dp->d_type != DT_WHT && dp->d_fileno != 0 && - strcmp(dp->d_name, dirname) == 0) { - found = 1; + strcmp(dp->d_name, dirname) == 0) goto out; - } - } while (len > 0 || !eofflag); + } + + error = ENOENT; out: free(dirbuf, M_TEMP); - return (found); + return (error); } int @@ -737,27 +679,24 @@ int vop_stdvptocnp(struct vop_vptocnp_args *ap) { - struct vnode *vp = ap->a_vp; - struct vnode **dvp = ap->a_vpp; - struct ucred *cred; + struct vnode *const vp = ap->a_vp; + struct vnode **const dvp = ap->a_vpp; char *buf = ap->a_buf; size_t *buflen = ap->a_buflen; - char *dirbuf, *cpos; - int i, error, eofflag, dirbuflen, flags, locked, len, covered; + char *dirbuf; + int i = *buflen; + int error = 0, covered = 0; + int eofflag, flags, locked; + size_t dirbuflen, len; off_t off; ino_t fileno; struct vattr va; struct nameidata nd; - struct thread *td; + struct thread *const td = curthread; + struct ucred *const cred = td->td_ucred; struct dirent *dp; struct vnode *mvp; - i = *buflen; - error = 0; - covered = 0; - td = curthread; - cred = td->td_ucred; - if (vp->v_type != VDIR) return (ENOENT); @@ -794,7 +733,7 @@ fileno = va.va_fileid; - dirbuflen = DEV_BSIZE; + dirbuflen = MAX(DEV_BSIZE, GENERIC_MAXDIRSIZ); if (dirbuflen < va.va_blocksize) dirbuflen = va.va_blocksize; dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK); @@ -804,21 +743,26 @@ goto out; } - off = 0; len = 0; - do { + off = 0; + eofflag = 0; + + for (;;) { /* call VOP_READDIR of parent */ - error = get_next_dirent(*dvp, &dp, dirbuf, dirbuflen, &off, - &cpos, &len, &eofflag, td); + error = vfs_next_dirent(*dvp, td, + dirbuf, dirbuflen, &dp, &len, &off, &eofflag); if (error) goto out; + if (len == 0) + break; + if ((dp->d_type != DT_WHT) && (dp->d_fileno == fileno)) { if (covered) { VOP_UNLOCK(*dvp); vn_lock(mvp, LK_SHARED | LK_RETRY); - if (dirent_exists(mvp, dp->d_name, td)) { + if (dirent_exists(mvp, dp->d_name, td) == 0) { error = ENOENT; VOP_UNLOCK(mvp); vn_lock(*dvp, LK_SHARED | LK_RETRY); @@ -841,7 +785,7 @@ } goto out; } - } while (len > 0 || !eofflag); + } error = ENOENT; out: diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -6382,6 +6382,142 @@ return (res); } +/* Keep this assert as long as sizeof(struct dirent) is used as the maximum + * entry size. */ +_Static_assert(_GENERIC_MAXDIRSIZ == sizeof(struct dirent), + "'struct dirent' size must be a multiple of its alignment " + "(see _GENERIC_DIRLEN())"); + +/* + * Returns successive directory entries through some caller's provided buffer + * + * This function automatically refills the provided buffer with calls to + * VOP_READDIR() (after MAC permission checks). + * + * 'td' is used for credentials and passed to uiomove(). 'dirbuf' is the + * caller's buffer to fill and 'dirbuflen' its allocated size. 'dirbuf' must be + * properly aligned to access 'struct dirent' structures and 'dirbuflen' must + * be greater than GENERIC_MAXDIRSIZ to avoid VOP_READDIR() returning EINVAL + * (the latter is not a strong guarantee (yet); but EINVAL will always be + * returned if this requirement is not verified). '*dpp' points to the current + * directory entry in the buffer and '*len' contains the remaining valid bytes + * in 'dirbuf' after 'dpp' (including this entry). + * + * At first call (or when restarting the read), '*len' must have been set to 0, + * '*off' to 0 (or any valid start offset) and '*eofflag' to 0. There are no + * more entries as soon as '*len' is 0 after a call that returned 0. Calling + * again this function after such a condition is considered an error and EINVAL + * will be returned. Other possible error codes are those of VOP_READDIR() or + * EINTEGRITY if the returned entries do not pass coherency tests. + * + * '*off' and '*eofflag' are internal state the caller should not tamper with, + * except as explained in the previous paragraph. '*off' is the next directory + * offset to read from to refill the buffer. '*eofflag' is set to 0 or 1 by the + * last internal call to VOP_READDIR() that returned without error, indicating + * whether it reached the end of the directory, and to 2 by this function after + * all entries have been read. + */ +int +vfs_next_dirent(struct vnode *vp, struct thread *td, + char *dirbuf, size_t dirbuflen, + struct dirent **dpp, size_t *len, off_t *off, int *eofflag) +{ + struct dirent *dp; + int reclen; + int error; + struct uio uio; + struct iovec iov; + + VNASSERT(VOP_ISLOCKED(vp), vp, ("vnode not locked")); + VNASSERT(vp->v_type == VDIR, vp, ("vnode is not a directory")); + MPASS2(dirbuf < dirbuf + dirbuflen, "Address space overflow"); + + if (__predict_false(dirbuflen < GENERIC_MAXDIRSIZ)) + /* Don't take any chances in this case */ + return (EINVAL); + + if (*len != 0) { + dp = *dpp; + + MPASS(*len <= dirbuflen); + MPASS2((uintptr_t)dirbuf <= (uintptr_t)dp && + (uintptr_t)dp + *len <= (uintptr_t)dirbuf + dirbuflen, + "Filled range not inside buffer"); + + reclen = dp->d_reclen; + if (reclen >= *len) + /* End of buffer reached */ + *len = 0; + else { + dp = (struct dirent *)((char *)dp + reclen); + *len -= reclen; + } + } + + if (*len == 0) { + /* Have to refill */ + switch (*eofflag) { + case 0: + break; + + case 1: + /* Nothing more to read. */ + *eofflag = 2; /* Remember the caller reached EOF. */ + return (0); + + default: + /* The caller didn't test for EOF */ + return (EINVAL); + } + + iov.iov_base = dirbuf; + iov.iov_len = dirbuflen; + + uio.uio_iov = &iov; + uio.uio_iovcnt = 1; + uio.uio_offset = *off; + uio.uio_resid = dirbuflen; + uio.uio_segflg = UIO_SYSSPACE; + uio.uio_rw = UIO_READ; + uio.uio_td = td; + +#ifdef MAC + error = mac_vnode_check_readdir(td->td_ucred, vp); + if (error == 0) +#endif + error = VOP_READDIR(vp, &uio, td->td_ucred, eofflag, + NULL, NULL); + if (error != 0) + return (error); + + *len = dirbuflen - uio.uio_resid; + *off = uio.uio_offset; + + if (*len == 0) { + /* Sanity check on INVARIANTS */ + MPASS(*eofflag != 0); + *eofflag = 1; + dp = NULL; + goto end; + } + + /* Normalize the flag returned by VOP_READDIR() */ + if (*eofflag != 0) + *eofflag = 1; + + dp = (struct dirent *)dirbuf; + } + + MPASS2(*len >= GENERIC_MINDIRSIZ, "Buffer underflow"); + VNASSERT(dp->d_reclen >= GENERIC_MINDIRSIZ, vp, + ("Too short directory entry")); + +end: + *dpp = dp; + + return (0); +} + /* * Returns whether the directory is empty or not. * If it is empty, the return value is 0; otherwise diff --git a/sys/sys/dirent.h b/sys/sys/dirent.h --- a/sys/sys/dirent.h +++ b/sys/sys/dirent.h @@ -65,7 +65,7 @@ struct dirent { ino_t d_fileno; /* file number of entry */ - off_t d_off; /* directory offset of entry */ + off_t d_off; /* directory offset of next entry */ __uint16_t d_reclen; /* length of this record */ __uint8_t d_type; /* file type, see below */ __uint8_t d_pad0; @@ -122,11 +122,14 @@ #define _GENERIC_DIRLEN(namlen) \ ((__offsetof(struct dirent, d_name) + (namlen) + 1 + 7) & ~7) #define _GENERIC_DIRSIZ(dp) _GENERIC_DIRLEN((dp)->d_namlen) +#define _GENERIC_MINDIRSIZ _GENERIC_DIRLEN(1) /* Name must not be empty */ +#define _GENERIC_MAXDIRSIZ _GENERIC_DIRLEN(MAXNAMLEN) #endif /* __BSD_VISIBLE */ #ifdef _KERNEL #define GENERIC_DIRSIZ(dp) _GENERIC_DIRSIZ(dp) - +#define GENERIC_MINDIRSIZ _GENERIC_MINDIRSIZ +#define GENERIC_MAXDIRSIZ _GENERIC_MAXDIRSIZ /* * Ensure that padding bytes are zeroed and that the name is NUL-terminated. */ diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -1100,6 +1100,9 @@ int vfs_kqfilter(struct vop_kqfilter_args *); struct dirent; int vfs_read_dirent(struct vop_readdir_args *ap, struct dirent *dp, off_t off); +int vfs_next_dirent(struct vnode *vp, struct thread *td, + char *dirbuf, size_t dirbuflen, + struct dirent **dpp, size_t *len, off_t *off, int *eofflag); int vfs_emptydir(struct vnode *vp); int vfs_unixify_accmode(accmode_t *accmode);