diff --git a/sys/fs/ext2fs/ext2_vnops.c b/sys/fs/ext2fs/ext2_vnops.c --- a/sys/fs/ext2fs/ext2_vnops.c +++ b/sys/fs/ext2fs/ext2_vnops.c @@ -2207,8 +2207,9 @@ * Maybe this should be above the vnode op call, but so long as * file servers have no limits, I don't think it matters. */ - if (vn_rlimit_fsize(vp, uio, uio->uio_td)) - return (EFBIG); + error = vn_rlimit_fsize(vp, uio, uio->uio_td); + if (error != 0) + return (error); resid = uio->uio_resid; osize = ip->i_size; diff --git a/sys/fs/fuse/fuse_io.c b/sys/fs/fuse/fuse_io.c --- a/sys/fs/fuse/fuse_io.c +++ b/sys/fs/fuse/fuse_io.c @@ -338,8 +338,9 @@ if (ioflag & IO_APPEND) uio_setoffset(uio, filesize); - if (vn_rlimit_fsize(vp, uio, uio->uio_td)) - return (EFBIG); + err = vn_rlimit_fsize(vp, uio, uio->uio_td); + if (err != 0) + return (err); fdisp_init(&fdi, 0); @@ -493,8 +494,9 @@ if (ioflag & IO_APPEND) uio_setoffset(uio, filesize); - if (vn_rlimit_fsize(vp, uio, uio->uio_td)) - return (EFBIG); + err = vn_rlimit_fsize(vp, uio, uio->uio_td); + if (err != 0) + return (err); do { bool direct_append, extending; diff --git a/sys/fs/msdosfs/msdosfs_vnops.c b/sys/fs/msdosfs/msdosfs_vnops.c --- a/sys/fs/msdosfs/msdosfs_vnops.c +++ b/sys/fs/msdosfs/msdosfs_vnops.c @@ -456,6 +456,9 @@ */ break; } + error = vn_rlimit_trunc(vap->va_size, td); + if (error != 0) + return (error); error = detrunc(dep, vap->va_size, 0, cred); if (error) return error; @@ -611,7 +614,7 @@ { int n; int croffset; - ssize_t resid; + ssize_t resid, r; u_long osize; int error = 0; u_long count; @@ -656,15 +659,15 @@ /* * The caller is supposed to ensure that * uio->uio_offset >= 0 and uio->uio_resid >= 0. - */ - if ((uoff_t)uio->uio_offset + uio->uio_resid > MSDOSFS_FILESIZE_MAX) - return (EFBIG); - - /* + * * If they've exceeded their filesize limit, tell them about it. */ - if (vn_rlimit_fsize(vp, uio, uio->uio_td)) - return (EFBIG); + error = vn_rlimit_fsizex(vp, uio, MSDOSFS_FILESIZE_MAX, &r, + uio->uio_td); + if (error != 0) { + vn_rlimit_fsizex_res(uio, r); + return (error); + } /* * If the offset we are starting the write at is beyond the end of @@ -674,8 +677,10 @@ */ if (uio->uio_offset > dep->de_FileSize) { error = deextend(dep, uio->uio_offset, cred); - if (error) + if (error != 0) { + vn_rlimit_fsizex_res(uio, r); return (error); + } } /* @@ -818,6 +823,7 @@ } } else if (ioflag & IO_SYNC) error = deupdat(dep, 1); + vn_rlimit_fsizex_res(uio, r); return (error); } diff --git a/sys/fs/nfsclient/nfs_clbio.c b/sys/fs/nfsclient/nfs_clbio.c --- a/sys/fs/nfsclient/nfs_clbio.c +++ b/sys/fs/nfsclient/nfs_clbio.c @@ -1034,8 +1034,9 @@ * Maybe this should be above the vnode op call, but so long as * file servers have no limits, i don't think it matters */ - if (vn_rlimit_fsize(vp, uio, td)) - return (EFBIG); + error = vn_rlimit_fsize(vp, uio, td); + if (error != 0) + return (error); save2 = curthread_pflags2_set(TDP2_SBPAGES); biosize = vp->v_bufobj.bo_bsize; diff --git a/sys/fs/smbfs/smbfs_io.c b/sys/fs/smbfs/smbfs_io.c --- a/sys/fs/smbfs/smbfs_io.c +++ b/sys/fs/smbfs/smbfs_io.c @@ -285,8 +285,9 @@ if (uiop->uio_resid == 0) return 0; - if (vn_rlimit_fsize(vp, uiop, td)) - return (EFBIG); + error = vn_rlimit_fsize(vp, uiop, td); + if (error != 0) + return (error); scred = smbfs_malloc_scred(); smb_makescred(scred, td, cred); diff --git a/sys/fs/tmpfs/tmpfs_subr.c b/sys/fs/tmpfs/tmpfs_subr.c --- a/sys/fs/tmpfs/tmpfs_subr.c +++ b/sys/fs/tmpfs/tmpfs_subr.c @@ -120,7 +120,8 @@ */ if (vp == NULL) { KASSERT((object->flags & OBJ_TMPFS_VREF) == 0, - ("object %p with OBJ_TMPFS_VREF but without vnode", object)); + ("object %p with OBJ_TMPFS_VREF but without vnode", + object)); VM_OBJECT_WUNLOCK(object); return; } @@ -131,7 +132,8 @@ VNPASS(vp->v_usecount > 0, vp); } else { VNASSERT((object->flags & OBJ_TMPFS_VREF) != 0, vp, - ("object with writable mappings does not have a reference")); + ("object with writable mappings does not " + "have a reference")); } if (old == new) { @@ -534,11 +536,13 @@ symlink = NULL; if (!tmp->tm_nonc) { - symlink = cache_symlink_alloc(nnode->tn_size + 1, M_WAITOK); + symlink = cache_symlink_alloc(nnode->tn_size + 1, + M_WAITOK); symlink_smr = true; } if (symlink == NULL) { - symlink = malloc(nnode->tn_size + 1, M_TMPFSNAME, M_WAITOK); + symlink = malloc(nnode->tn_size + 1, M_TMPFSNAME, + M_WAITOK); symlink_smr = false; } memcpy(symlink, target, nnode->tn_size + 1); @@ -550,14 +554,15 @@ * 1. nnode is not yet visible to the world * 2. both tn_link_target and tn_link_smr get populated * 3. release fence publishes their content - * 4. tn_link_target content is immutable until node destruction, - * where the pointer gets set to NULL + * 4. tn_link_target content is immutable until node + * destruction, where the pointer gets set to NULL * 5. tn_link_smr is never changed once set * - * As a result it is sufficient to issue load consume on the node - * pointer to also get the above content in a stable manner. - * Worst case tn_link_smr flag may be set to true despite being stale, - * while the target buffer is already cleared out. + * As a result it is sufficient to issue load consume + * on the node pointer to also get the above content + * in a stable manner. Worst case tn_link_smr flag + * may be set to true despite being stale, while the + * target buffer is already cleared out. */ atomic_store_ptr(&nnode->tn_link_target, symlink); atomic_store_char((char *)&nnode->tn_link_smr, symlink_smr); @@ -635,9 +640,10 @@ MPASS((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0); /* - * Make sure this is a node type we can deal with. Everything is explicitly - * enumerated without the 'default' clause so the compiler can throw an - * error in case a new type is added. + * Make sure this is a node type we can deal with. Everything + * is explicitly enumerated without the 'default' clause so + * the compiler can throw an error in case a new type is + * added. */ switch (node->tn_type) { case VBLK: @@ -651,17 +657,16 @@ case VNON: case VBAD: case VMARKER: - panic("%s: bad type %d for node %p", __func__, (int)node->tn_type, node); + panic("%s: bad type %d for node %p", __func__, + (int)node->tn_type, node); } #endif switch (node->tn_type) { case VREG: uobj = node->tn_reg.tn_aobj; - if (uobj != NULL) { - if (uobj->size != 0) - atomic_subtract_long(&tmp->tm_pages_used, uobj->size); - } + if (uobj != NULL && uobj->size != 0) + atomic_subtract_long(&tmp->tm_pages_used, uobj->size); tmpfs_free_tmp(tmp); @@ -1882,7 +1887,7 @@ */ int tmpfs_chflags(struct vnode *vp, u_long flags, struct ucred *cred, - struct thread *p) + struct thread *td) { int error; struct tmpfs_node *node; @@ -1905,7 +1910,7 @@ * Callers may only modify the file flags on objects they * have VADMIN rights for. */ - if ((error = VOP_ACCESS(vp, VADMIN, cred, p))) + if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) return (error); /* * Unprivileged processes are not permitted to unset system @@ -1938,7 +1943,8 @@ * The vnode must be locked on entry and remain locked on exit. */ int -tmpfs_chmod(struct vnode *vp, mode_t mode, struct ucred *cred, struct thread *p) +tmpfs_chmod(struct vnode *vp, mode_t mode, struct ucred *cred, + struct thread *td) { int error; struct tmpfs_node *node; @@ -1961,7 +1967,7 @@ * To modify the permissions on a file, must possess VADMIN * for that file. */ - if ((error = VOP_ACCESS(vp, VADMIN, cred, p))) + if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) return (error); /* @@ -1999,7 +2005,7 @@ */ int tmpfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred, - struct thread *p) + struct thread *td) { int error; struct tmpfs_node *node; @@ -2032,7 +2038,7 @@ * To modify the ownership of a file, must possess VADMIN for that * file. */ - if ((error = VOP_ACCESS(vp, VADMIN, cred, p))) + if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) return (error); /* @@ -2053,7 +2059,8 @@ node->tn_status |= TMPFS_NODE_CHANGED; - if ((node->tn_mode & (S_ISUID | S_ISGID)) && (ouid != uid || ogid != gid)) { + if ((node->tn_mode & (S_ISUID | S_ISGID)) != 0 && + (ouid != uid || ogid != gid)) { if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID)) { newmode = node->tn_mode & ~(S_ISUID | S_ISGID); atomic_store_short(&node->tn_mode, newmode); @@ -2072,7 +2079,7 @@ */ int tmpfs_chsize(struct vnode *vp, u_quad_t size, struct ucred *cred, - struct thread *p) + struct thread *td) { int error; struct tmpfs_node *node; @@ -2113,6 +2120,10 @@ if (node->tn_flags & (IMMUTABLE | APPEND)) return (EPERM); + error = vn_rlimit_trunc(size, td); + if (error != 0) + return (error); + error = tmpfs_truncate(vp, size); /* * tmpfs_truncate will raise the NOTE_EXTEND and NOTE_ATTRIB kevents @@ -2131,7 +2142,7 @@ */ int tmpfs_chtimes(struct vnode *vp, struct vattr *vap, - struct ucred *cred, struct thread *l) + struct ucred *cred, struct thread *td) { int error; struct tmpfs_node *node; @@ -2148,21 +2159,17 @@ if (node->tn_flags & (IMMUTABLE | APPEND)) return (EPERM); - error = vn_utimes_perm(vp, vap, cred, l); + error = vn_utimes_perm(vp, vap, cred, td); if (error != 0) return (error); if (vap->va_atime.tv_sec != VNOVAL) node->tn_accessed = true; - if (vap->va_mtime.tv_sec != VNOVAL) node->tn_status |= TMPFS_NODE_MODIFIED; - if (vap->va_birthtime.tv_sec != VNOVAL) node->tn_status |= TMPFS_NODE_MODIFIED; - tmpfs_itimes(vp, &vap->va_atime, &vap->va_mtime); - if (vap->va_birthtime.tv_sec != VNOVAL) node->tn_birthtime = vap->va_birthtime; ASSERT_VOP_ELOCKED(vp, "chtimes2"); diff --git a/sys/fs/tmpfs/tmpfs_vnops.c b/sys/fs/tmpfs/tmpfs_vnops.c --- a/sys/fs/tmpfs/tmpfs_vnops.c +++ b/sys/fs/tmpfs/tmpfs_vnops.c @@ -639,6 +639,7 @@ struct uio *uio; struct tmpfs_node *node; off_t oldsize; + ssize_t r; int error, ioflag; mode_t newmode; @@ -655,11 +656,13 @@ return (0); if (ioflag & IO_APPEND) uio->uio_offset = node->tn_size; - if (uio->uio_offset + uio->uio_resid > - VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize) - return (EFBIG); - if (vn_rlimit_fsize(vp, uio, uio->uio_td)) - return (EFBIG); + error = vn_rlimit_fsizex(vp, uio, VFS_TO_TMPFS(vp->v_mount)-> + tm_maxfilesize, &r, uio->uio_td); + if (error != 0) { + vn_rlimit_fsizex_res(uio, r); + return (error); + } + if (uio->uio_offset + uio->uio_resid > node->tn_size) { error = tmpfs_reg_resize(vp, uio->uio_offset + uio->uio_resid, FALSE); @@ -685,6 +688,7 @@ MPASS(IMPLIES(error == 0, uio->uio_resid == 0)); MPASS(IMPLIES(error != 0, oldsize == node->tn_size)); + vn_rlimit_fsizex_res(uio, r); return (error); } diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -2372,42 +2372,124 @@ return (error); } +static void +vn_send_sigxfsz(struct proc *p) +{ + PROC_LOCK(p); + kern_psignal(p, SIGXFSZ); + PROC_UNLOCK(p); +} + int -vn_rlimit_fsize(const struct vnode *vp, const struct uio *uio, - struct thread *td) +vn_rlimit_trunc(u_quad_t size, struct thread *td) +{ + if (size <= lim_cur(td, RLIMIT_FSIZE)) + return (0); + vn_send_sigxfsz(td->td_proc); + return (EFBIG); +} + +static int +vn_rlimit_fsizex1(const struct vnode *vp, struct uio *uio, off_t maxfsz, + bool adj, struct thread *td) { off_t lim; bool ktr_write; - if (td == NULL) + if (vp->v_type != VREG) return (0); /* - * There are conditions where the limit is to be ignored. - * However, since it is almost never reached, check it first. + * Handle file system maximum file size. + */ + if (maxfsz != 0 && uio->uio_offset + uio->uio_resid > maxfsz) { + if (!adj || uio->uio_offset >= maxfsz) + return (EFBIG); + uio->uio_resid = maxfsz - uio->uio_offset; + } + + /* + * This is kernel write (e.g. vnode_pager) or accounting + * write, ignore limit. + */ + if (td == NULL || (td->td_pflags2 & TDP2_ACCT) != 0) + return (0); + + /* + * Calculate file size limit. */ ktr_write = (td->td_pflags & TDP_INKTRACE) != 0; - lim = lim_cur(td, RLIMIT_FSIZE); - if (__predict_false(ktr_write)) - lim = td->td_ktr_io_lim; + lim = __predict_false(ktr_write) ? td->td_ktr_io_lim : + lim_cur(td, RLIMIT_FSIZE); + + /* + * Is the limit reached? + */ if (__predict_true((uoff_t)uio->uio_offset + uio->uio_resid <= lim)) return (0); /* - * The limit is reached. + * Prepared filesystems can handle writes truncated to the + * file size limit. */ - if (vp->v_type != VREG || - (td->td_pflags2 & TDP2_ACCT) != 0) + if (adj && (uoff_t)uio->uio_offset < lim) { + uio->uio_resid = lim - (uoff_t)uio->uio_offset; return (0); - - if (!ktr_write || ktr_filesize_limit_signal) { - PROC_LOCK(td->td_proc); - kern_psignal(td->td_proc, SIGXFSZ); - PROC_UNLOCK(td->td_proc); } + + if (!ktr_write || ktr_filesize_limit_signal) + vn_send_sigxfsz(td->td_proc); return (EFBIG); } +/* + * Helper for VOP_WRITE() implementations, the common code to + * handle maximum supported file size on the filesystem, and + * RLIMIT_FSIZE, except for special writes from accounting subsystem + * and ktrace. + * + * For maximum file size (maxfsz argument): + * - return EFBIG if uio_offset is beyond it + * - otherwise, clamp uio_resid if write would extend file beyond maxfsz. + * + * For RLIMIT_FSIZE: + * - return EFBIG and send SIGXFSZ if uio_offset is beyond the limit + * - otherwise, clamp uio_resid if write would extend file beyond limit. + * + * If clamping occured, the adjustment for uio_resid is stored in + * *resid_adj, to be re-applied by vn_rlimit_fsizex_res() on return + * from the VOP. + */ +int +vn_rlimit_fsizex(const struct vnode *vp, struct uio *uio, off_t maxfsz, + ssize_t *resid_adj, struct thread *td) +{ + ssize_t resid_orig; + int error; + bool adj; + + resid_orig = uio->uio_resid; + adj = resid_adj != NULL; + error = vn_rlimit_fsizex1(vp, uio, maxfsz, adj, td); + if (adj) + *resid_adj = resid_orig - uio->uio_resid; + return (error); +} + +void +vn_rlimit_fsizex_res(struct uio *uio, ssize_t resid_adj) +{ + uio->uio_resid += resid_adj; +} + +int +vn_rlimit_fsize(const struct vnode *vp, const struct uio *uio, + struct thread *td) +{ + return (vn_rlimit_fsizex(vp, __DECONST(struct uio *, uio), 0, NULL, + td)); +} + int vn_chmod(struct file *fp, mode_t mode, struct ucred *active_cred, struct thread *td) @@ -3220,8 +3302,6 @@ io.uio_offset = *outoffp; io.uio_resid = len; error = vn_rlimit_fsize(outvp, &io, fsize_td); - if (error != 0) - error = EFBIG; } if (VOP_PATHCONF(outvp, _PC_MIN_HOLE_SIZE, &holeout) != 0) holeout = 0; diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -786,6 +786,10 @@ int vn_read_from_obj(struct vnode *vp, struct uio *uio); int vn_rlimit_fsize(const struct vnode *vp, const struct uio *uio, struct thread *td); +int vn_rlimit_fsizex(const struct vnode *vp, struct uio *uio, + off_t maxfsz, ssize_t *resid_adj, struct thread *td); +void vn_rlimit_fsizex_res(struct uio *uio, ssize_t resid_adj); +int vn_rlimit_trunc(u_quad_t size, struct thread *td); int vn_start_write(struct vnode *vp, struct mount **mpp, int flags); int vn_start_secondary_write(struct vnode *vp, struct mount **mpp, int flags); diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c --- a/sys/ufs/ffs/ffs_vnops.c +++ b/sys/ufs/ffs/ffs_vnops.c @@ -839,7 +839,7 @@ struct buf *bp; ufs_lbn_t lbn; off_t osize; - ssize_t resid; + ssize_t resid, r; int seqcount; int blkoffset, error, flags, ioflag, size, xfersize; @@ -888,14 +888,17 @@ KASSERT(uio->uio_resid >= 0, ("ffs_write: uio->uio_resid < 0")); KASSERT(uio->uio_offset >= 0, ("ffs_write: uio->uio_offset < 0")); fs = ITOFS(ip); - if ((uoff_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize) - return (EFBIG); + /* * Maybe this should be above the vnode op call, but so long as * file servers have no limits, I don't think it matters. */ - if (vn_rlimit_fsize(vp, uio, uio->uio_td)) - return (EFBIG); + error = vn_rlimit_fsizex(vp, uio, fs->fs_maxfilesize, &r, + uio->uio_td); + if (error != 0) { + vn_rlimit_fsizex_res(uio, r); + return (error); + } resid = uio->uio_resid; osize = ip->i_size; @@ -1036,6 +1039,7 @@ if (ffs_fsfail_cleanup(VFSTOUFS(vp->v_mount), error)) error = ENXIO; } + vn_rlimit_fsizex_res(uio, r); return (error); } diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c --- a/sys/ufs/ufs/ufs_vnops.c +++ b/sys/ufs/ufs/ufs_vnops.c @@ -755,6 +755,9 @@ */ return (0); } + error = vn_rlimit_trunc(vap->va_size, td); + if (error != 0) + return (error); if ((error = UFS_TRUNCATE(vp, vap->va_size, IO_NORMAL | ((vap->va_vaflags & VA_SYNC) != 0 ? IO_SYNC : 0), cred)) != 0)