Index: lib/libc/sys/fspacectl.2 =================================================================== --- lib/libc/sys/fspacectl.2 +++ lib/libc/sys/fspacectl.2 @@ -61,6 +61,14 @@ .Fa cmd argument may be one of: .Bl -tag -width SPACECTL_DEALLOC +.It Dv SPACECTL_ALLOC +Back a region in the file specified by the +.Fa range +argument with file system space. +This operation is used to implement +.Xr posix_fallocate 2 . +Content will remain the same for non-hole region. +Holes will be converted to non-hole region with zeroes. .It Dv SPACECTL_DEALLOC Zero a region in the file specified by the .Fa range Index: share/man/man9/VOP_ALLOCATE.9 =================================================================== --- share/man/man9/VOP_ALLOCATE.9 +++ share/man/man9/VOP_ALLOCATE.9 @@ -37,7 +37,7 @@ .In sys/param.h .In sys/vnode.h .Ft int -.Fn VOP_ALLOCATE "struct vnode *vp" "off_t *offset" "off_t *len" +.Fn VOP_ALLOCATE "struct vnode *vp" "off_t *offset" "off_t *len" "int flags" "struct ucred *cred" .Sh DESCRIPTION This call allocates storage for a range of offsets in a file. It is used to implement the @@ -52,6 +52,8 @@ The start of the range to allocate storage for in the file. .It Fa len The length of the range to allocate storage for in the file. +.It Fa flags +The parameter to control the behavior of this call. .El .Pp The Index: sys/fs/nfsclient/nfs_clvnops.c =================================================================== --- sys/fs/nfsclient/nfs_clvnops.c +++ sys/fs/nfsclient/nfs_clvnops.c @@ -3583,6 +3583,12 @@ struct nfsmount *nmp; int attrflag, error, ret; + if ((ap->a_flags & SPACECTL_F_CANEXTEND) == 0) + *ap->a_len = omin( + *ap->a_len, VTONFS(vp)->n_size - *ap->a_offset); + if (*ap->a_len == 0) + return (0); + attrflag = 0; nmp = VFSTONFS(vp->v_mount); mtx_lock(&nmp->nm_mtx); @@ -3596,7 +3602,7 @@ error = ncl_flush(vp, MNT_WAIT, td, 1, 0); if (error == 0) error = nfsrpc_allocate(vp, *ap->a_offset, *ap->a_len, - &nfsva, &attrflag, td->td_ucred, td, NULL); + &nfsva, &attrflag, ap->a_cred, td, NULL); if (error == 0) { *ap->a_offset += *ap->a_len; *ap->a_len = 0; Index: sys/fs/nfsserver/nfs_nfsdport.c =================================================================== --- sys/fs/nfsserver/nfs_nfsdport.c +++ sys/fs/nfsserver/nfs_nfsdport.c @@ -6335,7 +6335,8 @@ */ trycnt = 0; while (error == 0 && len > 0 && trycnt++ < 20) - error = VOP_ALLOCATE(vp, &off, &len); + error = VOP_ALLOCATE(vp, &off, &len, SPACECTL_F_CANEXTEND, + cred); if (error == 0 && len > 0) error = NFSERR_IO; NFSEXITCODE(error); Index: sys/kern/sys_generic.c =================================================================== --- sys/kern/sys_generic.c +++ sys/kern/sys_generic.c @@ -855,8 +855,9 @@ goto out; } - error = fo_fallocate(fp, offset, len, td); - out: + error = fo_fspacectl(fp, SPACECTL_ALLOC, offset, len, + SPACECTL_F_CANEXTEND, td->td_ucred, td); +out: fdrop(fp, td); return (error); } @@ -889,7 +890,7 @@ AUDIT_ARG_CMD(cmd); AUDIT_ARG_FFLAGS(flags); - if ((cmd != SPACECTL_DEALLOC) || + if ((cmd != SPACECTL_ALLOC && cmd != SPACECTL_DEALLOC) || (range->r_offset < 0 || range->r_len < 0) || (flags & ~SPACECTL_F_SUPPORTED)) return (EINVAL); Index: sys/kern/uipc_shm.c =================================================================== --- sys/kern/uipc_shm.c +++ sys/kern/uipc_shm.c @@ -145,7 +145,7 @@ static fo_mmap_t shm_mmap; static fo_get_seals_t shm_get_seals; static fo_add_seals_t shm_add_seals; -static fo_fallocate_t shm_fallocate; +static fo_fspacectl_t shm_fspacectl; /* File descriptor operations. */ struct fileops shm_ops = { @@ -165,7 +165,7 @@ .fo_mmap = shm_mmap, .fo_get_seals = shm_get_seals, .fo_add_seals = shm_add_seals, - .fo_fallocate = shm_fallocate, + .fo_fspacectl = shm_fspacectl, .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE, }; @@ -1875,7 +1875,56 @@ } static int -shm_fallocate(struct file *fp, off_t offset, off_t len, struct thread *td) +shm_deallocate(struct shmfd *shmfd, off_t offset, off_t len, int flags, + void *rl_cookie, struct thread *td) +{ + vm_pindex_t start, start2, end; + vm_ooffset_t size; + vm_page_t m; + int error; + + start = OFF_TO_IDX(offset); + start2 = OFF_TO_IDX(offset + PAGE_MASK); + end = OFF_TO_IDX(offset + len); + size = offset + len; + error = 0; + + VM_OBJECT_WLOCK(shmfd->shm_object); + + if (start2 < end) + vm_object_page_remove(shmfd->shm_object, start2, end, 0); + if (len > OFF_MAX - offset) + len = OFF_MAX - offset; + + if ((offset & PAGE_MASK) != offset) { + m = vm_page_grab(shmfd->shm_object, start, VM_ALLOC_NOCREAT); + if (m != NULL) { + pmap_zero_page_area(m, offset & PAGE_MASK, + PAGE_SIZE - (offset & PAGE_MASK)); + vm_page_set_dirty(m); + vm_page_xunbusy(m); + } + } + if ((size & PAGE_MASK) != size) { + m = vm_page_grab(shmfd->shm_object, end, VM_ALLOC_NOCREAT); + if (m != NULL) { + pmap_zero_page_area(m, 0, offset + len & PAGE_MASK); + vm_page_set_dirty(m); + vm_page_xunbusy(m); + } + } + if (size > shmfd->shm_size && flags & SPACECTL_F_CANEXTEND) + error = shm_largepage(shmfd) ? shm_dotruncate_largepage(shmfd, + size, rl_cookie) : shm_dotruncate_locked(shmfd, size, + rl_cookie); + + VM_OBJECT_WUNLOCK(shmfd->shm_object); + return (error); +} + +static int +shm_fspacectl(struct file *fp, int cmd, off_t offset, off_t len, int flags, + struct ucred *active_cred, struct thread *td) { void *rl_cookie; struct shmfd *shmfd; @@ -1887,6 +1936,14 @@ shmfd = fp->f_data; size = offset + len; + if (cmd != SPACECTL_ALLOC && cmd != SPACECTL_DEALLOC) + return (EINVAL); + if (offset < 0 || len < 0 || (flags & ~SPACECTL_F_SUPPORTED) != 0) + return (EINVAL); + if (len == 0) + /* Degenerated case */ + return (0); + /* * Just grab the rangelock for the range that we may be attempting to * grow, rather than blocking read/write for regions we won't be @@ -1898,8 +1955,17 @@ */ rl_cookie = rangelock_wlock(&shmfd->shm_rl, offset, size, &shmfd->shm_mtx); - if (size > shmfd->shm_size) - error = shm_dotruncate_cookie(shmfd, size, rl_cookie); + switch (cmd) { + case SPACECTL_ALLOC: + if (size > shmfd->shm_size && flags & SPACECTL_F_CANEXTEND) + error = shm_dotruncate_cookie(shmfd, size, rl_cookie); + break; + case SPACECTL_DEALLOC: + error = shm_deallocate(shmfd, offset, len, flags, rl_cookie, td); + break; + default: + panic("%s: unknown cmd %d", __func__, cmd); + } rangelock_unlock(&shmfd->shm_rl, rl_cookie, &shmfd->shm_mtx); /* Translate to posix_fallocate(2) return value as needed. */ if (error == ENOMEM) Index: sys/kern/vfs_default.c =================================================================== --- sys/kern/vfs_default.c +++ sys/kern/vfs_default.c @@ -976,6 +976,7 @@ uint8_t *buf; struct thread *td; struct vnode *vp; + struct ucred *cred; size_t iosize; int error; @@ -986,8 +987,9 @@ vp = ap->a_vp; len = *ap->a_len; offset = *ap->a_offset; + cred = ap->a_cred; - error = VOP_GETATTR(vp, vap, td->td_ucred); + error = VOP_GETATTR(vp, vap, cred); if (error != 0) goto out; fsize = vap->va_size; @@ -1011,29 +1013,36 @@ if (error != 0) goto out; if (maxfilesize) { - if (offset > maxfilesize || len > maxfilesize || - offset + len > maxfilesize) { + if ((offset > maxfilesize || len > maxfilesize || + offset + len > maxfilesize) && + (ap->a_flags & SPACECTL_F_CANEXTEND) == 0) { error = EFBIG; goto out; } } else #endif - if (offset + len > vap->va_size) { + if ((u_quad_t)offset + len > vap->va_size && + ap->a_flags & SPACECTL_F_CANEXTEND) { /* * Test offset + len against the filesystem's maxfilesize. */ VATTR_NULL(vap); vap->va_size = offset + len; - error = VOP_SETATTR(vp, vap, td->td_ucred); + error = VOP_SETATTR(vp, vap, cred); if (error != 0) goto out; VATTR_NULL(vap); vap->va_size = fsize; - error = VOP_SETATTR(vp, vap, td->td_ucred); + error = VOP_SETATTR(vp, vap, cred); if (error != 0) goto out; } + if ((ap->a_flags & SPACECTL_F_CANEXTEND) == 0) + len = omin(len, vap->va_size - offset); + if (len == 0) + goto out; + for (;;) { /* * Read and write back anything below the nominal file @@ -1055,7 +1064,7 @@ auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_READ; auio.uio_td = td; - error = VOP_READ(vp, &auio, 0, td->td_ucred); + error = VOP_READ(vp, &auio, 0, cred); if (error != 0) break; if (auio.uio_resid > 0) { @@ -1076,7 +1085,7 @@ auio.uio_rw = UIO_WRITE; auio.uio_td = td; - error = VOP_WRITE(vp, &auio, 0, td->td_ucred); + error = VOP_WRITE(vp, &auio, 0, cred); if (error != 0) break; @@ -1168,7 +1177,6 @@ if (error) return (error); - len = omin(OFF_MAX - offset, *ap->a_len); /* Extend the file if requested */ if (len != 0 && (ap->a_flags & SPACECTL_F_CANEXTEND) && (u_quad_t)offset + len > va.va_size) { Index: sys/kern/vfs_vnops.c =================================================================== --- sys/kern/vfs_vnops.c +++ sys/kern/vfs_vnops.c @@ -105,7 +105,6 @@ static fo_stat_t vn_statfile; static fo_close_t vn_closefile; static fo_mmap_t vn_mmap; -static fo_fallocate_t vn_fallocate; static fo_fspacectl_t vn_fspacectl; struct fileops vnops = { @@ -123,7 +122,6 @@ .fo_seek = vn_seek, .fo_fill_kinfo = vn_fill_kinfo, .fo_mmap = vn_mmap, - .fo_fallocate = vn_fallocate, .fo_fspacectl = vn_fspacectl, .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE }; @@ -3315,7 +3313,8 @@ } static int -vn_fallocate(struct file *fp, off_t offset, off_t len, struct thread *td) +vn_fallocate(struct file *fp, off_t offset, off_t len, int flags, + struct ucred *active_cred, struct thread *td) { struct mount *mp; struct vnode *vp; @@ -3326,9 +3325,15 @@ #endif vp = fp->f_vnode; + error = 0; + + if (offset < 0 || len <= 0 || (flags & ~SPACECTL_F_SUPPORTED) != 0) + return (EINVAL); if (vp->v_type != VREG) return (ENODEV); + len = omin(len, OFF_MAX - offset); + /* Allocating blocks may take a long time, so iterate. */ for (;;) { olen = len; @@ -3351,17 +3356,14 @@ } #endif #ifdef MAC - error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); + error = mac_vnode_check_write(active_cred, fp->f_cred, vp); if (error == 0) #endif - error = VOP_ALLOCATE(vp, &offset, &len); + error = VOP_ALLOCATE(vp, &offset, &len, flags, + active_cred); VOP_UNLOCK(vp); vn_finished_write(mp); - if (olen + ooffset != offset + len) { - panic("offset + len changed from %jx/%jx to %jx/%jx", - ooffset, olen, offset, len); - } if (error != 0 || len == 0) break; KASSERT(olen > len, ("Iteration did not make progress?")); @@ -3474,10 +3476,13 @@ vp = fp->f_vnode; - if (cmd != SPACECTL_DEALLOC) + if (cmd != SPACECTL_ALLOC && cmd != SPACECTL_DEALLOC) return (EINVAL); switch (cmd) { + case SPACECTL_ALLOC: + error = vn_fallocate(fp, offset, len, flags, active_cred, td); + break; case SPACECTL_DEALLOC: error = vn_deallocate_impl(vp, offset, len, flags, 0, true, active_cred, fp->f_cred, td); Index: sys/kern/vnode_if.src =================================================================== --- sys/kern/vnode_if.src +++ sys/kern/vnode_if.src @@ -703,6 +703,8 @@ IN struct vnode *vp; INOUT off_t *offset; INOUT off_t *len; + IN int flags; + IN struct ucred *cred; }; Index: sys/sys/fcntl.h =================================================================== --- sys/sys/fcntl.h +++ sys/sys/fcntl.h @@ -357,6 +357,7 @@ * Commands for fspacectl(2) */ #define SPACECTL_DEALLOC 0 /* deallocate space */ +#define SPACECTL_ALLOC 1 /* allocate space */ /* * fspacectl(2) flags Index: sys/sys/file.h =================================================================== --- sys/sys/file.h +++ sys/sys/file.h @@ -128,8 +128,6 @@ typedef int fo_aio_queue_t(struct file *fp, struct kaiocb *job); typedef int fo_add_seals_t(struct file *fp, int flags); typedef int fo_get_seals_t(struct file *fp, int *flags); -typedef int fo_fallocate_t(struct file *fp, off_t offset, off_t len, - struct thread *td); typedef int fo_fspacectl_t(struct file *fp, int cmd, off_t offset, off_t len, int flags, struct ucred *active_cred, struct thread *td); @@ -153,7 +151,6 @@ fo_aio_queue_t *fo_aio_queue; fo_add_seals_t *fo_add_seals; fo_get_seals_t *fo_get_seals; - fo_fallocate_t *fo_fallocate; fo_fspacectl_t *fo_fspacectl; fo_flags_t fo_flags; /* DFLAG_* below */ }; @@ -466,15 +463,6 @@ return ((*fp->f_ops->fo_get_seals)(fp, seals)); } -static __inline int -fo_fallocate(struct file *fp, off_t offset, off_t len, struct thread *td) -{ - - if (fp->f_ops->fo_fallocate == NULL) - return (ENODEV); - return ((*fp->f_ops->fo_fallocate)(fp, offset, len, td)); -} - static __inline int fo_fspacectl(struct file *fp, int cmd, off_t offset, off_t len, int flags, struct ucred *active_cred, struct thread *td) { Index: tests/sys/file/fspacectl_test.c =================================================================== --- tests/sys/file/fspacectl_test.c +++ tests/sys/file/fspacectl_test.c @@ -188,31 +188,31 @@ dataoff = lseek(fd, alloc_start, SEEK_DATA); if (dataoff == -1) - return (-1); + return (1); holeoff = lseek(fd, alloc_start, SEEK_HOLE); if (holeoff == -1) - return (-1); + return (1); /* * Check if the start offset of allocated region within file size is * legit */ if (dataoff != alloc_start && dataoff < file_sz) - return (-1); + return (1); /* * Check if the end offset of allocated region within file size is * legit */ if (holeoff < alloc_start + alloc_len && holeoff < file_sz) - return (-1); + return (1); /* * Check file size matches with expected file size. */ if (fstat(fd, &statbuf) == -1) - return (-1); + return (1); if (statbuf.st_size != file_sz) - return (-1); + return (1); return (0); } @@ -406,6 +406,149 @@ ATF_REQUIRE(close(fd) == 0); } +/* + * Check aligned allocation + */ +ATF_TC_WITHOUT_HEAD(aligned_alloc); +ATF_TC_BODY(aligned_alloc, tc) +{ + struct spacectl_range range; + blksize_t blocksize; + int fd; + + ATF_REQUIRE((blocksize = fd_get_blksize()) != -1); + range.r_offset = blocksize; + range.r_len = (file_max_blocks - 1) * blocksize - range.r_offset; + + ATF_REQUIRE((fd = open("sys_fspacectl_testfile", + O_CREAT | O_RDWR | O_TRUNC, 0600)) != -1); + ATF_REQUIRE(ftruncate(fd, file_max_blocks * blocksize) == 0); + ATF_CHECK(fspacectl(fd, SPACECTL_ALLOC, &range, 0) == 0); + ATF_CHECK(check_hole_alloc(fd, range.r_offset, range.r_len, + file_max_blocks * blocksize) == 0); + ATF_REQUIRE(close(fd) == 0); +} + +/* + * Check unaligned allocation + */ +ATF_TC_WITHOUT_HEAD(unaligned_alloc); +ATF_TC_BODY(unaligned_alloc, tc) +{ + struct spacectl_range range; + blksize_t blocksize; + int fd; + + ATF_REQUIRE((blocksize = fd_get_blksize()) != -1); + range.r_offset = blocksize / 2; + range.r_len = (file_max_blocks - 1) * blocksize + blocksize / 2 - + range.r_offset; + + ATF_REQUIRE((fd = open("sys_fspacectl_testfile", + O_CREAT | O_RDWR | O_TRUNC, 0600)) != -1); + ATF_REQUIRE(ftruncate(fd, file_max_blocks * blocksize) == 0); + ATF_CHECK(fspacectl(fd, SPACECTL_ALLOC, &range, 0) == 0); + ATF_CHECK(check_hole_alloc(fd, range.r_offset, range.r_len, + file_max_blocks * blocksize) == 0); + ATF_REQUIRE(close(fd) == 0); +} + +/* + * Check aligned allocation and extending + */ +ATF_TC_WITHOUT_HEAD(aligned_alloc_canextend); +ATF_TC_BODY(aligned_alloc_canextend, tc) +{ + struct spacectl_range range; + blksize_t blocksize; + int fd; + + ATF_REQUIRE((blocksize = fd_get_blksize()) != -1); + range.r_offset = blocksize; + range.r_len = (file_max_blocks + 1) * blocksize - range.r_offset; + + ATF_REQUIRE((fd = open("sys_fspacectl_testfile", + O_CREAT | O_RDWR | O_TRUNC, 0600)) != -1); + ATF_REQUIRE(ftruncate(fd, file_max_blocks * blocksize) == 0); + ATF_CHECK( + fspacectl(fd, SPACECTL_ALLOC, &range, SPACECTL_F_CANEXTEND) == 0); + ATF_CHECK(check_hole_alloc(fd, range.r_offset, range.r_len, + (file_max_blocks + 1) * blocksize) == 0); + ATF_REQUIRE(close(fd) == 0); +} + +/* + * Check unaligned allocation and extending + */ +ATF_TC_WITHOUT_HEAD(unaligned_alloc_canextend); +ATF_TC_BODY(unaligned_alloc_canextend, tc) +{ + struct spacectl_range range; + blksize_t blocksize; + int fd; + + ATF_REQUIRE((blocksize = fd_get_blksize()) != -1); + range.r_offset = blocksize / 2; + range.r_len = file_max_blocks * blocksize + blocksize / 2 - + range.r_offset; + + ATF_REQUIRE((fd = open("sys_fspacectl_testfile", + O_CREAT | O_RDWR | O_TRUNC, 0600)) != -1); + ATF_REQUIRE(ftruncate(fd, file_max_blocks * blocksize) == 0); + ATF_CHECK( + fspacectl(fd, SPACECTL_ALLOC, &range, SPACECTL_F_CANEXTEND) == 0); + ATF_CHECK(check_hole_alloc(fd, range.r_offset, range.r_len, + file_max_blocks * blocksize + blocksize / 2) == 0); + ATF_REQUIRE(close(fd) == 0); +} + +/* + * Check aligned allocation around EOF + */ +ATF_TC_WITHOUT_HEAD(aligned_alloc_no_canextend); +ATF_TC_BODY(aligned_alloc_no_canextend, tc) +{ + struct spacectl_range range; + blksize_t blocksize; + int fd; + + ATF_REQUIRE((blocksize = fd_get_blksize()) != -1); + range.r_offset = blocksize; + range.r_len = (file_max_blocks + 1) * blocksize - range.r_offset; + + ATF_REQUIRE((fd = open("sys_fspacectl_testfile", + O_CREAT | O_RDWR | O_TRUNC, 0600)) != -1); + ATF_REQUIRE(ftruncate(fd, file_max_blocks * blocksize) == 0); + ATF_CHECK(fspacectl(fd, SPACECTL_ALLOC, &range, 0) == 0); + ATF_CHECK(check_hole_alloc(fd, range.r_offset, range.r_len, + file_max_blocks * blocksize) == 0); + ATF_REQUIRE(close(fd) == 0); +} + +/* + * Check unaligned allocation around EOF + */ +ATF_TC_WITHOUT_HEAD(unaligned_alloc_no_canextend); +ATF_TC_BODY(unaligned_alloc_no_canextend, tc) +{ + struct spacectl_range range; + blksize_t blocksize; + int fd; + + ATF_REQUIRE((blocksize = fd_get_blksize()) != -1); + range.r_offset = blocksize / 2; + range.r_len = file_max_blocks * blocksize + blocksize / 2 - + range.r_offset; + + ATF_REQUIRE((fd = open("sys_fspacectl_testfile", + O_CREAT | O_RDWR | O_TRUNC, 0600)) != -1); + ATF_REQUIRE(ftruncate(fd, file_max_blocks * blocksize) == 0); + ATF_CHECK(fspacectl(fd, SPACECTL_ALLOC, &range, 0) == 0); + ATF_CHECK(check_hole_alloc(fd, range.r_offset, range.r_len, + file_max_blocks * blocksize) == 0); + ATF_REQUIRE(close(fd) == 0); +} + ATF_TP_ADD_TCS(tp) { ATF_TP_ADD_TC(tp, aligned_dealloc); @@ -417,5 +560,12 @@ ATF_TP_ADD_TC(tp, aligned_dealloc_offmax); ATF_TP_ADD_TC(tp, unaligned_dealloc_offmax); + ATF_TP_ADD_TC(tp, aligned_alloc); + ATF_TP_ADD_TC(tp, unaligned_alloc); + ATF_TP_ADD_TC(tp, aligned_alloc_canextend); + ATF_TP_ADD_TC(tp, unaligned_alloc_canextend); + ATF_TP_ADD_TC(tp, aligned_alloc_no_canextend); + ATF_TP_ADD_TC(tp, unaligned_alloc_no_canextend); + return atf_no_error(); }