Index: lib/libc/sys/fspacectl.2 =================================================================== --- lib/libc/sys/fspacectl.2 +++ lib/libc/sys/fspacectl.2 @@ -24,7 +24,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd Feb 08, 2021 +.Dd February 8, 2021 .Dt FSPACECTL 2 .Os .Sh NAME @@ -61,6 +61,16 @@ .Fa cmd argument may be one of: .Bl -tag -width SPACECTL_DEALLOC +.It Dv SPACECTL_ALLOC +Back a region in the file specified by the +.Fa range +argument within the file system space. +Subsequent operations in the region will not fail due to lack of free space in +the file system. +This operation is used to implement +.Xr posix_fallocate 2 . +Content will remain the same for a non-hole region. +Holes will be converted to non-hole region with zeroes. .It Dv SPACECTL_DEALLOC Zero a region in the file specified by the .Fa range @@ -71,7 +81,18 @@ .Pp The .Fa flags -argument needs be the value 0 currently. +argument may include one or more of the following: +.Bl -tag -width SPACECTL_F_CANEXTEND +.It Dv SPACECTL_F_CANEXTEND +The operation can extend the file size if +.Fa range->r_offset ++ +.Fa range->r_len +is greater than the current file size. +This flag is supported by the +.Dv SPACECTL_ALLOC +operation. +.El .Pp If signal is delivered to a thread calling this system call, the system call would either finish as if nothing has happened, Index: share/man/man9/VOP_ALLOCATE.9 =================================================================== --- share/man/man9/VOP_ALLOCATE.9 +++ share/man/man9/VOP_ALLOCATE.9 @@ -37,7 +37,7 @@ .In sys/param.h .In sys/vnode.h .Ft int -.Fn VOP_ALLOCATE "struct vnode *vp" "off_t *offset" "off_t *len" +.Fn VOP_ALLOCATE "struct vnode *vp" "off_t *offset" "off_t *len" "int flags" "struct ucred *cred" .Sh DESCRIPTION This call allocates storage for a range of offsets in a file. It is used to implement the @@ -52,6 +52,8 @@ The start of the range to allocate storage for in the file. .It Fa len The length of the range to allocate storage for in the file. +.It Fa flags +The parameter to control the behavior of this call. .El .Pp The Index: sys/fs/nfsclient/nfs_clvnops.c =================================================================== --- sys/fs/nfsclient/nfs_clvnops.c +++ sys/fs/nfsclient/nfs_clvnops.c @@ -3614,6 +3614,12 @@ struct nfsmount *nmp; int attrflag, error, ret; + if ((ap->a_flags & SPACECTL_F_CANEXTEND) == 0) + *ap->a_len = omin( + *ap->a_len, VTONFS(vp)->n_size - *ap->a_offset); + if (*ap->a_len == 0) + return (0); + attrflag = 0; nmp = VFSTONFS(vp->v_mount); mtx_lock(&nmp->nm_mtx); @@ -3627,7 +3633,7 @@ error = ncl_flush(vp, MNT_WAIT, td, 1, 0); if (error == 0) error = nfsrpc_allocate(vp, *ap->a_offset, *ap->a_len, - &nfsva, &attrflag, td->td_ucred, td, NULL); + &nfsva, &attrflag, ap->a_cred, td, NULL); if (error == 0) { *ap->a_offset += *ap->a_len; *ap->a_len = 0; Index: sys/fs/nfsserver/nfs_nfsdport.c =================================================================== --- sys/fs/nfsserver/nfs_nfsdport.c +++ sys/fs/nfsserver/nfs_nfsdport.c @@ -6335,7 +6335,8 @@ */ trycnt = 0; while (error == 0 && len > 0 && trycnt++ < 20) - error = VOP_ALLOCATE(vp, &off, &len); + error = VOP_ALLOCATE(vp, &off, &len, SPACECTL_F_CANEXTEND, + cred); if (error == 0 && len > 0) error = NFSERR_IO; NFSEXITCODE(error); Index: sys/kern/sys_generic.c =================================================================== --- sys/kern/sys_generic.c +++ sys/kern/sys_generic.c @@ -855,8 +855,9 @@ goto out; } - error = fo_fallocate(fp, offset, len, td); - out: + error = fo_fspacectl(fp, SPACECTL_ALLOC, offset, len, + SPACECTL_F_CANEXTEND, td->td_ucred, td); +out: fdrop(fp, td); return (error); } @@ -889,7 +890,7 @@ AUDIT_ARG_CMD(cmd); AUDIT_ARG_FFLAGS(flags); - if ((cmd != SPACECTL_DEALLOC) || + if ((cmd != SPACECTL_ALLOC && cmd != SPACECTL_DEALLOC) || (range->r_offset < 0 || range->r_len < 0) || (flags & ~SPACECTL_F_SUPPORTED)) return (EINVAL); Index: sys/kern/uipc_shm.c =================================================================== --- sys/kern/uipc_shm.c +++ sys/kern/uipc_shm.c @@ -145,7 +145,6 @@ static fo_mmap_t shm_mmap; static fo_get_seals_t shm_get_seals; static fo_add_seals_t shm_add_seals; -static fo_fallocate_t shm_fallocate; static fo_fspacectl_t shm_fspacectl; /* File descriptor operations. */ @@ -166,7 +165,6 @@ .fo_mmap = shm_mmap, .fo_get_seals = shm_get_seals, .fo_add_seals = shm_add_seals, - .fo_fallocate = shm_fallocate, .fo_fspacectl = shm_fspacectl, .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE, }; @@ -1932,9 +1930,9 @@ shmfd = fp->f_data; size = offset + len; - if (cmd != SPACECTL_DEALLOC) + if (cmd != SPACECTL_ALLOC && cmd != SPACECTL_DEALLOC) return (EINVAL); - if (offset < 0 || len < 0 || flags != 0) + if (offset < 0 || len < 0 || (flags & ~SPACECTL_F_SUPPORTED) != 0) return (EINVAL); if (len == 0) /* Degenerated case */ @@ -1952,6 +1950,17 @@ rl_cookie = rangelock_wlock(&shmfd->shm_rl, offset, size, &shmfd->shm_mtx); switch (cmd) { + case SPACECTL_ALLOC: + if (size > shmfd->shm_size && flags & SPACECTL_F_CANEXTEND) { + error = shm_dotruncate_cookie(shmfd, size, rl_cookie); + /* + * Translate to posix_fallocate(2) return value as + * needed. + */ + if (error == ENOMEM) + error = ENOSPC; + } + break; case SPACECTL_DEALLOC: error = shm_deallocate(shmfd, offset, len, flags, rl_cookie, td); break; @@ -1962,40 +1971,6 @@ return (error); } - -static int -shm_fallocate(struct file *fp, off_t offset, off_t len, struct thread *td) -{ - void *rl_cookie; - struct shmfd *shmfd; - size_t size; - int error; - - /* This assumes that the caller already checked for overflow. */ - error = 0; - shmfd = fp->f_data; - size = offset + len; - - /* - * Just grab the rangelock for the range that we may be attempting to - * grow, rather than blocking read/write for regions we won't be - * touching while this (potential) resize is in progress. Other - * attempts to resize the shmfd will have to take a write lock from 0 to - * OFF_MAX, so this being potentially beyond the current usable range of - * the shmfd is not necessarily a concern. If other mechanisms are - * added to grow a shmfd, this may need to be re-evaluated. - */ - rl_cookie = rangelock_wlock(&shmfd->shm_rl, offset, size, - &shmfd->shm_mtx); - if (size > shmfd->shm_size) - error = shm_dotruncate_cookie(shmfd, size, rl_cookie); - rangelock_unlock(&shmfd->shm_rl, rl_cookie, &shmfd->shm_mtx); - /* Translate to posix_fallocate(2) return value as needed. */ - if (error == ENOMEM) - error = ENOSPC; - return (error); -} - static int sysctl_posix_shm_list(SYSCTL_HANDLER_ARGS) { Index: sys/kern/vfs_default.c =================================================================== --- sys/kern/vfs_default.c +++ sys/kern/vfs_default.c @@ -953,6 +953,7 @@ uint8_t *buf; struct thread *td; struct vnode *vp; + struct ucred *cred; size_t iosize; int error; @@ -963,8 +964,9 @@ vp = ap->a_vp; len = *ap->a_len; offset = *ap->a_offset; + cred = ap->a_cred; - error = VOP_GETATTR(vp, vap, td->td_ucred); + error = VOP_GETATTR(vp, vap, cred); if (error != 0) goto out; fsize = vap->va_size; @@ -988,29 +990,36 @@ if (error != 0) goto out; if (maxfilesize) { - if (offset > maxfilesize || len > maxfilesize || - offset + len > maxfilesize) { + if ((offset > maxfilesize || len > maxfilesize || + offset + len > maxfilesize) && + (ap->a_flags & SPACECTL_F_CANEXTEND) == 0) { error = EFBIG; goto out; } } else #endif - if (offset + len > vap->va_size) { + if ((u_quad_t)offset + len > vap->va_size && + ap->a_flags & SPACECTL_F_CANEXTEND) { /* * Test offset + len against the filesystem's maxfilesize. */ VATTR_NULL(vap); vap->va_size = offset + len; - error = VOP_SETATTR(vp, vap, td->td_ucred); + error = VOP_SETATTR(vp, vap, cred); if (error != 0) goto out; VATTR_NULL(vap); vap->va_size = fsize; - error = VOP_SETATTR(vp, vap, td->td_ucred); + error = VOP_SETATTR(vp, vap, cred); if (error != 0) goto out; } + if ((ap->a_flags & SPACECTL_F_CANEXTEND) == 0) + len = omin(len, vap->va_size - offset); + if (len == 0) + goto out; + for (;;) { /* * Read and write back anything below the nominal file @@ -1032,7 +1041,7 @@ auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_READ; auio.uio_td = td; - error = VOP_READ(vp, &auio, 0, td->td_ucred); + error = VOP_READ(vp, &auio, 0, cred); if (error != 0) break; if (auio.uio_resid > 0) { @@ -1053,7 +1062,7 @@ auio.uio_rw = UIO_WRITE; auio.uio_td = td; - error = VOP_WRITE(vp, &auio, 0, td->td_ucred); + error = VOP_WRITE(vp, &auio, 0, cred); if (error != 0) break; Index: sys/kern/vfs_vnops.c =================================================================== --- sys/kern/vfs_vnops.c +++ sys/kern/vfs_vnops.c @@ -105,7 +105,6 @@ static fo_stat_t vn_statfile; static fo_close_t vn_closefile; static fo_mmap_t vn_mmap; -static fo_fallocate_t vn_fallocate; static fo_fspacectl_t vn_fspacectl; struct fileops vnops = { @@ -123,7 +122,6 @@ .fo_seek = vn_seek, .fo_fill_kinfo = vn_fill_kinfo, .fo_mmap = vn_mmap, - .fo_fallocate = vn_fallocate, .fo_fspacectl = vn_fspacectl, .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE }; @@ -3313,7 +3311,8 @@ } static int -vn_fallocate(struct file *fp, off_t offset, off_t len, struct thread *td) +vn_fallocate(struct file *fp, off_t offset, off_t len, int flags, + struct ucred *active_cred, struct thread *td) { struct mount *mp; struct vnode *vp; @@ -3324,9 +3323,15 @@ #endif vp = fp->f_vnode; + error = 0; + + if (offset < 0 || len <= 0 || (flags & ~SPACECTL_F_SUPPORTED) != 0) + return (EINVAL); if (vp->v_type != VREG) return (ENODEV); + len = omin(len, OFF_MAX - offset); + /* Allocating blocks may take a long time, so iterate. */ for (;;) { olen = len; @@ -3349,17 +3354,14 @@ } #endif #ifdef MAC - error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); + error = mac_vnode_check_write(active_cred, fp->f_cred, vp); if (error == 0) #endif - error = VOP_ALLOCATE(vp, &offset, &len); + error = VOP_ALLOCATE(vp, &offset, &len, flags, + active_cred); VOP_UNLOCK(vp); vn_finished_write(mp); - if (olen + ooffset != offset + len) { - panic("offset + len changed from %jx/%jx to %jx/%jx", - ooffset, olen, offset, len); - } if (error != 0 || len == 0) break; KASSERT(olen > len, ("Iteration did not make progress?")); @@ -3471,10 +3473,13 @@ vp = fp->f_vnode; - if (cmd != SPACECTL_DEALLOC) + if (cmd != SPACECTL_ALLOC && cmd != SPACECTL_DEALLOC) return (EINVAL); switch (cmd) { + case SPACECTL_ALLOC: + error = vn_fallocate(fp, offset, len, flags, active_cred, td); + break; case SPACECTL_DEALLOC: error = vn_deallocate_impl(vp, offset, len, flags, 0, true, active_cred, fp->f_cred, td); Index: sys/kern/vnode_if.src =================================================================== --- sys/kern/vnode_if.src +++ sys/kern/vnode_if.src @@ -703,6 +703,8 @@ IN struct vnode *vp; INOUT off_t *offset; INOUT off_t *len; + IN int flags; + IN struct ucred *cred; }; Index: sys/sys/fcntl.h =================================================================== --- sys/sys/fcntl.h +++ sys/sys/fcntl.h @@ -355,11 +355,13 @@ * Commands for fspacectl(2) */ #define SPACECTL_DEALLOC 0 /* deallocate space */ +#define SPACECTL_ALLOC 1 /* allocate space */ /* * fspacectl(2) flags */ -#define SPACECTL_F_SUPPORTED 0 +#define SPACECTL_F_CANEXTEND (1) +#define SPACECTL_F_SUPPORTED (SPACECTL_F_CANEXTEND) #endif #ifndef _KERNEL Index: sys/sys/file.h =================================================================== --- sys/sys/file.h +++ sys/sys/file.h @@ -128,8 +128,6 @@ typedef int fo_aio_queue_t(struct file *fp, struct kaiocb *job); typedef int fo_add_seals_t(struct file *fp, int flags); typedef int fo_get_seals_t(struct file *fp, int *flags); -typedef int fo_fallocate_t(struct file *fp, off_t offset, off_t len, - struct thread *td); typedef int fo_fspacectl_t(struct file *fp, int cmd, off_t offset, off_t len, int flags, struct ucred *active_cred, struct thread *td); @@ -153,7 +151,6 @@ fo_aio_queue_t *fo_aio_queue; fo_add_seals_t *fo_add_seals; fo_get_seals_t *fo_get_seals; - fo_fallocate_t *fo_fallocate; fo_fspacectl_t *fo_fspacectl; fo_flags_t fo_flags; /* DFLAG_* below */ }; @@ -466,15 +463,6 @@ return ((*fp->f_ops->fo_get_seals)(fp, seals)); } -static __inline int -fo_fallocate(struct file *fp, off_t offset, off_t len, struct thread *td) -{ - - if (fp->f_ops->fo_fallocate == NULL) - return (ENODEV); - return ((*fp->f_ops->fo_fallocate)(fp, offset, len, td)); -} - static __inline int fo_fspacectl(struct file *fp, int cmd, off_t offset, off_t len, int flags, struct ucred *active_cred, struct thread *td) { Index: tests/sys/file/fspacectl_test.c =================================================================== --- tests/sys/file/fspacectl_test.c +++ tests/sys/file/fspacectl_test.c @@ -188,31 +188,31 @@ dataoff = lseek(fd, alloc_start, SEEK_DATA); if (dataoff == -1) - return (-1); + return (1); holeoff = lseek(fd, alloc_start, SEEK_HOLE); if (holeoff == -1) - return (-1); + return (1); /* * Check if the start offset of allocated region within file size is * legit */ if (dataoff != alloc_start && dataoff < file_sz) - return (-1); + return (1); /* * Check if the end offset of allocated region within file size is * legit */ if (holeoff < alloc_start + alloc_len && holeoff < file_sz) - return (-1); + return (1); /* * Check file size matches with expected file size. */ if (fstat(fd, &statbuf) == -1) - return (-1); + return (1); if (statbuf.st_size != file_sz) - return (-1); + return (1); return (0); } @@ -357,6 +357,149 @@ ATF_REQUIRE(close(fd) == 0); } +/* + * Check aligned allocation + */ +ATF_TC_WITHOUT_HEAD(aligned_alloc); +ATF_TC_BODY(aligned_alloc, tc) +{ + struct spacectl_range range; + blksize_t blocksize; + int fd; + + ATF_REQUIRE((blocksize = fd_get_blksize()) != -1); + range.r_offset = blocksize; + range.r_len = (file_max_blocks - 1) * blocksize - range.r_offset; + + ATF_REQUIRE((fd = open("sys_fspacectl_testfile", + O_CREAT | O_RDWR | O_TRUNC, 0600)) != -1); + ATF_REQUIRE(ftruncate(fd, file_max_blocks * blocksize) == 0); + ATF_CHECK(fspacectl(fd, SPACECTL_ALLOC, &range, 0) == 0); + ATF_CHECK(check_hole_alloc(fd, range.r_offset, range.r_len, + file_max_blocks * blocksize) == 0); + ATF_REQUIRE(close(fd) == 0); +} + +/* + * Check unaligned allocation + */ +ATF_TC_WITHOUT_HEAD(unaligned_alloc); +ATF_TC_BODY(unaligned_alloc, tc) +{ + struct spacectl_range range; + blksize_t blocksize; + int fd; + + ATF_REQUIRE((blocksize = fd_get_blksize()) != -1); + range.r_offset = blocksize / 2; + range.r_len = (file_max_blocks - 1) * blocksize + blocksize / 2 - + range.r_offset; + + ATF_REQUIRE((fd = open("sys_fspacectl_testfile", + O_CREAT | O_RDWR | O_TRUNC, 0600)) != -1); + ATF_REQUIRE(ftruncate(fd, file_max_blocks * blocksize) == 0); + ATF_CHECK(fspacectl(fd, SPACECTL_ALLOC, &range, 0) == 0); + ATF_CHECK(check_hole_alloc(fd, range.r_offset, range.r_len, + file_max_blocks * blocksize) == 0); + ATF_REQUIRE(close(fd) == 0); +} + +/* + * Check aligned allocation and extending + */ +ATF_TC_WITHOUT_HEAD(aligned_alloc_canextend); +ATF_TC_BODY(aligned_alloc_canextend, tc) +{ + struct spacectl_range range; + blksize_t blocksize; + int fd; + + ATF_REQUIRE((blocksize = fd_get_blksize()) != -1); + range.r_offset = blocksize; + range.r_len = (file_max_blocks + 1) * blocksize - range.r_offset; + + ATF_REQUIRE((fd = open("sys_fspacectl_testfile", + O_CREAT | O_RDWR | O_TRUNC, 0600)) != -1); + ATF_REQUIRE(ftruncate(fd, file_max_blocks * blocksize) == 0); + ATF_CHECK( + fspacectl(fd, SPACECTL_ALLOC, &range, SPACECTL_F_CANEXTEND) == 0); + ATF_CHECK(check_hole_alloc(fd, range.r_offset, range.r_len, + (file_max_blocks + 1) * blocksize) == 0); + ATF_REQUIRE(close(fd) == 0); +} + +/* + * Check unaligned allocation and extending + */ +ATF_TC_WITHOUT_HEAD(unaligned_alloc_canextend); +ATF_TC_BODY(unaligned_alloc_canextend, tc) +{ + struct spacectl_range range; + blksize_t blocksize; + int fd; + + ATF_REQUIRE((blocksize = fd_get_blksize()) != -1); + range.r_offset = blocksize / 2; + range.r_len = file_max_blocks * blocksize + blocksize / 2 - + range.r_offset; + + ATF_REQUIRE((fd = open("sys_fspacectl_testfile", + O_CREAT | O_RDWR | O_TRUNC, 0600)) != -1); + ATF_REQUIRE(ftruncate(fd, file_max_blocks * blocksize) == 0); + ATF_CHECK( + fspacectl(fd, SPACECTL_ALLOC, &range, SPACECTL_F_CANEXTEND) == 0); + ATF_CHECK(check_hole_alloc(fd, range.r_offset, range.r_len, + file_max_blocks * blocksize + blocksize / 2) == 0); + ATF_REQUIRE(close(fd) == 0); +} + +/* + * Check aligned allocation around EOF + */ +ATF_TC_WITHOUT_HEAD(aligned_alloc_no_canextend); +ATF_TC_BODY(aligned_alloc_no_canextend, tc) +{ + struct spacectl_range range; + blksize_t blocksize; + int fd; + + ATF_REQUIRE((blocksize = fd_get_blksize()) != -1); + range.r_offset = blocksize; + range.r_len = (file_max_blocks + 1) * blocksize - range.r_offset; + + ATF_REQUIRE((fd = open("sys_fspacectl_testfile", + O_CREAT | O_RDWR | O_TRUNC, 0600)) != -1); + ATF_REQUIRE(ftruncate(fd, file_max_blocks * blocksize) == 0); + ATF_CHECK(fspacectl(fd, SPACECTL_ALLOC, &range, 0) == 0); + ATF_CHECK(check_hole_alloc(fd, range.r_offset, range.r_len, + file_max_blocks * blocksize) == 0); + ATF_REQUIRE(close(fd) == 0); +} + +/* + * Check unaligned allocation around EOF + */ +ATF_TC_WITHOUT_HEAD(unaligned_alloc_no_canextend); +ATF_TC_BODY(unaligned_alloc_no_canextend, tc) +{ + struct spacectl_range range; + blksize_t blocksize; + int fd; + + ATF_REQUIRE((blocksize = fd_get_blksize()) != -1); + range.r_offset = blocksize / 2; + range.r_len = file_max_blocks * blocksize + blocksize / 2 - + range.r_offset; + + ATF_REQUIRE((fd = open("sys_fspacectl_testfile", + O_CREAT | O_RDWR | O_TRUNC, 0600)) != -1); + ATF_REQUIRE(ftruncate(fd, file_max_blocks * blocksize) == 0); + ATF_CHECK(fspacectl(fd, SPACECTL_ALLOC, &range, 0) == 0); + ATF_CHECK(check_hole_alloc(fd, range.r_offset, range.r_len, + file_max_blocks * blocksize) == 0); + ATF_REQUIRE(close(fd) == 0); +} + ATF_TP_ADD_TCS(tp) { ATF_TP_ADD_TC(tp, aligned_dealloc); @@ -366,5 +509,12 @@ ATF_TP_ADD_TC(tp, aligned_dealloc_offmax); ATF_TP_ADD_TC(tp, unaligned_dealloc_offmax); + ATF_TP_ADD_TC(tp, aligned_alloc); + ATF_TP_ADD_TC(tp, unaligned_alloc); + ATF_TP_ADD_TC(tp, aligned_alloc_canextend); + ATF_TP_ADD_TC(tp, unaligned_alloc_canextend); + ATF_TP_ADD_TC(tp, aligned_alloc_no_canextend); + ATF_TP_ADD_TC(tp, unaligned_alloc_no_canextend); + return atf_no_error(); }