Index: lib/libc/sys/Makefile.inc =================================================================== --- lib/libc/sys/Makefile.inc +++ lib/libc/sys/Makefile.inc @@ -189,6 +189,7 @@ fhreadlink.2 \ flock.2 \ fork.2 \ + fspacectl.2 \ fsync.2 \ getdirentries.2 \ getdtablesize.2 \ Index: lib/libc/sys/Symbol.map =================================================================== --- lib/libc/sys/Symbol.map +++ lib/libc/sys/Symbol.map @@ -410,6 +410,7 @@ fhlink; fhlinkat; fhreadlink; + fspacectl; getfhat; funlinkat; memfd_create; Index: lib/libc/sys/fspacectl.2 =================================================================== --- /dev/null +++ lib/libc/sys/fspacectl.2 @@ -0,0 +1,166 @@ +.\" +.\" SPDX-License-Identifier: BSD-2-Clause-FreeBSD +.\" +.\" Copyright (c) 2021 The FreeBSD Foundation +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.Dd Feb 08, 2021 +.Dt FSPACECTL 2 +.Os +.Sh NAME +.Nm fspacectl +.Nd space management in a file +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In fcntl.h +.Ft int +.Fn fspacectl "int fd" "int cmd" "struct spacectl_range *range" "int flags" +.Sh DESCRIPTION +.Nm +is a system call performing space management in the file referenced by +.Fa fd . +.Fa cmd +specifies the operation to take place in the file. +.Fa range +is the operation range. +.Fa flags +controls the behavior of the operation to take place. +.Pp +The +.Fa range +argument points to a structure defined as: +.Bd -literal +struct spacectl_range { + off_t r_offset; + off_t r_len; +}; +.Ed +.Pp +The operation specified by the +.Fa cmd +argument may be one of: +.Bl -tag -width SPACECTL_DEALLOC +.It Dv SPACECTL_DEALLOC +Zero a region in the file specified by the +.Fa range +argument. +If the file system supports hole-punching, +file system space deallocation may be performed in the given region. +.El +.Pp +The +.Fa flags +argument may include one or more of the following: +.Bl -tag -width SPACECTL_F_CANEXTEND +.It Dv SPACECTL_F_CANEXTEND +The operation can extend the file size if +.Fa range->r_offset ++ +.Fa range->r_len +is greater than the current file size. +.El +.Pp +For +.Dv SPACECTL_ALLOC +and +.Dv SPACECTL_DEALLOC +operations, unless +.Fa flags +includes +.Dv SPACECTL_F_CANEXTEND, region specified in +.Fa range +beyonds file size will not extend the file size. +.Pp +If signal is delivered to a thread calling this system call, +the system call would either finish as if nothing has happened, +or signal would be delivered after the operation finishes. +.Sh RETURN VALUES +Upon successful completion, the value 0 is returned; +otherwise the value -1 is returned and +.Va errno +is set to indicate the error. +.Sh ERRORS +Possible failure conditions: +.Bl -tag -width Er +.It Bq Er EBADF +The +.Fa fd +argument is not a valid file descriptor. +.It Bq Er EBADF +The +.Fa fd +argument references a file that was opened without write permission. +.It Bq Er EFBIG +The value of +.Fa offset + +.Fa len +is greater than the maximum file size when +.Dv SPACECTL_F_CANEXTEND +is set. +.It Bq Er EINTR +A signal was caught during execution. +.It Bq Er EINVAL +Either the +.Fa "range->r_offset" +argument was less than zero, or the +.Fa "range->r_len" +argument was less than or equal to zero, +.It Bq Er EINVAL +The operation is not supported by the file descriptor. +.It Bq Er EIO +An I/O error occurred while reading from or writing to a file system. +.It Bq Er EINTEGRITY +Corrupted data was detected while reading from the file system. +.It Bq Er ENODEV +The +.Fa fd +argument does not refer to a file that supports +.Nm . +.It Bq Er ENOSPC +There is insufficient free space remaining on the file system storage +media. +.It Bq Er ENOTCAPABLE +The file descriptor +.Fa fd +has insufficient rights. +.It Bq Er ESPIPE +The +.Fa fd +argument is associated with a pipe or FIFO. +.El +.Sh SEE ALSO +.Xr creat 2 , +.Xr ftruncate 2 , +.Xr open 2 , +.Xr unlink 2 +.Sh HISTORY +The +.Nm +system call appeared in +.Fx 14.0 . +.Sh AUTHORS +.Nm +and this manual page were developed by +.An Ka Ho Ng Aq Mt khng@FreeBSDFoundation.org +under sponsorship from the FreeBSD Foundation. Index: lib/libc/sys/pathconf.2 =================================================================== --- lib/libc/sys/pathconf.2 +++ lib/libc/sys/pathconf.2 @@ -166,6 +166,9 @@ .It Li _PC_MIN_HOLE_SIZE If a file system supports the reporting of holes (see .Xr lseek 2 ) , +.It Li _PC_FDEALLOC_PRESENT +If a file system supports hole-punching (see +.Xr fspacectl 2 ) , .Fn pathconf and .Fn fpathconf Index: share/man/man9/Makefile =================================================================== --- share/man/man9/Makefile +++ share/man/man9/Makefile @@ -397,6 +397,7 @@ vm_page_wire.9 \ vm_set_page_size.9 \ vmem.9 \ + vn_deallocate.9 \ vn_fullpath.9 \ vn_isdisk.9 \ vnet.9 \ @@ -412,6 +413,7 @@ VOP_BWRITE.9 \ VOP_COPY_FILE_RANGE.9 \ VOP_CREATE.9 \ + VOP_DEALLOCATE.9 \ VOP_FSYNC.9 \ VOP_GETACL.9 \ VOP_GETEXTATTR.9 \ Index: share/man/man9/VOP_DEALLOCATE.9 =================================================================== --- /dev/null +++ share/man/man9/VOP_DEALLOCATE.9 @@ -0,0 +1,103 @@ +.\" +.\" SPDX-License-Identifier: BSD-2-Clause-FreeBSD +.\" +.\" Copyright (c) 2021 The FreeBSD Foundation +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.Dd Feb 10, 2021 +.Dt VOP_DEALLOCATE 9 +.Os +.Sh NAME +.Nm VOP_DEALLOCATE +.Nd zero and/or deallocate storage from a file +.Sh SYNOPSIS +.In sys/param.h +.In sys/vnode.h +.Ft int +.Fn VOP_DEALLOCATE "struct vnode *vp" "off_t *offset" "off_t *len" "int flags" "struct ucred *cred" +.Sh DESCRIPTION +This VOP call zeroes/deallocates storage for an offset range in a file. +It is used to implement the +.Xr fspacectl +system call. +.Pp +Its arguments are: +.Bl -tag -width offset +.It Fa vp +The vnode of the file. +.It Fa offset +The start of the range to allocate storage for in the file. +.It Fa len +The length of the range to allocate storage for in the file. +.It Fa flags +The parameter to control the behavior of this call. +.It Fa cred +The credentials of the caller. +.Nm . +.El +.Pp +The +.Fa offset +and +.Fa len +arguments are updated to reflect the portion of the range that +still needs to be zeroed/deallocated on return. +Partial result is considered a successful operation. +The file's contents are not changed. +.Sh LOCKS +The file should be locked on entry and will still be locked on exit. +In case +.Dv SPACECTL_F_CANEXTEND +is specified in +.Fa flags , +the file should be locked exclusively. +.Sh RETURN VALUES +Zero is returned if the call is successful, otherwise an appropriate +error code is returned. +.Sh ERRORS +.Bl -tag -width Er +.It Bq Er EINVAL +Invalid +.Fa offset , len +or +.Fa flags +parameters are passed into this VOP call. +.It Bq Er EFBIG +An attempt was made to write a file that exceeds the process's file size +limit or the maximum file size when SPACECTL_F_CANEXPAND is set in +.Fa flags. +.It Bq Er ENODEV +The vnode type is not supported by this VOP call. +.It Bq Er ENOSPC +The file system is full. +.It Bq Er EPERM +An append-only flag is set on the file, but the caller is attempting to +write before the current end of file. +.El +.Sh SEE ALSO +.Xr vnode 9 +.Sh AUTHORS +.Nm +and this manual page were developed by +.An Ka Ho Ng Aq Mt khng@FreeBSDFoundation.org +under sponsorship from the FreeBSD Foundation. Index: share/man/man9/vn_deallocate.9 =================================================================== --- /dev/null +++ share/man/man9/vn_deallocate.9 @@ -0,0 +1,95 @@ +.\" +.\" SPDX-License-Identifier: BSD-2-Clause-FreeBSD +.\" +.\" Copyright (c) 2021 The FreeBSD Foundation +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.Dd July 9, 2001 +.Dt VN_DEALLOCATE 9 +.Os +.Sh NAME +.Nm vn_deallocate +.Nd zero and/or deallocate storage from a file +.Sh SYNOPSIS +.In sys/param.h +.In sys/vnode.h +.Ft int +.Fn vn_deallocate "struct vnode *vp" "off_t offset" "off_t len" "int flags" "int ioflg" "struct ucred *active_cred" "struct ucred *file_cred" +.Sh DESCRIPTION +The +.Fn vn_deallocate +function zeros and/or deallocate backing storage space from a file. +This function only works on vnodes with +.Dv VREG +type. +.Pp +The arguments are: +.Bl -tag -width active_cred +.It Fa vp +The vnode of the file. +.It Fa offset +The starting offset of the operation range +.It Fa len +The length of the operation range. This argument must be greater than 0. +.It Fa flags +The control flags argument of the operation. +.It Fa ioflg +The control flags argument of vnode locking. +.It Fa active_cred +The user credentials of the calling thread. +.It Fa file_cred +The credentials installed on the file description pointing to the vnode or NOCRED. +.El +.Pp +The +.Fn flags +argument may be one or more of the following flags: +.Bl -tag -width SPACECTL_F_CANEXTEND +.It Dv SPACECTL_F_CANEXTEND +The operation can extend the file size if +.Fa offset ++ +.Fa len +is greater than the current file size. +.El +.Pp +The +.Fn ioflg +argument may be one or more of the following flags: +.Bl -tag -width IO_RANGELOCKED +.It Dv IO_NODELOCKED +The caller has granted vnode lock. +.It Dv IO_RANGELOCKED +The caller has granted vnode rangelock. +.El +.Sh RETURN VALUES +Upon successful completion, the value 0 is returned; otherwise the +appropriate error is returned. +.Sh SEE ALSO +.Xr vnode 9 , +.Xr VOP_DEALLOCATE 9 +.Sh AUTHORS +.Nm +and this manual page were developed by +.An Ka Ho Ng Aq Mt khng@FreeBSDFoundation.org +under sponsorship from the FreeBSD Foundation. Index: sys/bsm/audit_kevents.h =================================================================== --- sys/bsm/audit_kevents.h +++ sys/bsm/audit_kevents.h @@ -662,6 +662,7 @@ #define AUE_SPECIALFD 43266 /* FreeBSD-specific. */ #define AUE_AIO_WRITEV 43267 /* FreeBSD-specific. */ #define AUE_AIO_READV 43268 /* FreeBSD-specific. */ +#define AUE_FSPACECTL 43269 /* FreeBSD-specific. */ /* * Darwin BSM uses a number of AUE_O_* definitions, which are aliased to the Index: sys/compat/freebsd32/freebsd32_misc.c =================================================================== --- sys/compat/freebsd32/freebsd32_misc.c +++ sys/compat/freebsd32/freebsd32_misc.c @@ -3569,6 +3569,20 @@ return (kern_posix_error(td, error)); } +int +freebsd32_fspacectl(struct thread *td, struct freebsd32_fspacectl_args *uap) +{ + int error; + struct spacectl_range range; + + error = copyin(uap->range, &range, sizeof(range)); + if (error != 0) + return (error); + + error = kern_fspacectl(td, uap->fd, uap->cmd, uap->range, uap->flags); + return (error); +} + int freebsd32_posix_fadvise(struct thread *td, struct freebsd32_posix_fadvise_args *uap) Index: sys/compat/freebsd32/syscalls.master =================================================================== --- sys/compat/freebsd32/syscalls.master +++ sys/compat/freebsd32/syscalls.master @@ -1174,5 +1174,9 @@ struct aiocb32 *aiocbp); } 579 AUE_AIO_READV STD { int freebsd32_aio_readv( \ struct aiocb32 *aiocbp); } +580 AUE_FSPACECTL STD { int freebsd32_fspacectl(int fd, \ + int cmd, \ + struct spacectl_range *range,\ + int flags); } ; vim: syntax=off Index: sys/kern/sys_generic.c =================================================================== --- sys/kern/sys_generic.c +++ sys/kern/sys_generic.c @@ -861,6 +861,59 @@ return (error); } +int +sys_fspacectl(struct thread *td, struct fspacectl_args *uap) +{ + struct spacectl_range range; + int error; + + error = copyin(uap->range, &range, sizeof(range)); + if (error != 0) + return (error); + + error = kern_fspacectl(td, uap->fd, uap->cmd, &range, uap->flags); + return (error); +} + +int +kern_fspacectl(struct thread *td, int fd, int cmd, struct spacectl_range *range, + int flags) +{ + struct file *fp; + off_t offset; + int error; + + offset = range->r_len; + + AUDIT_ARG_FD(fd); + AUDIT_ARG_CMD(cmd); + AUDIT_ARG_FFLAGS(flags); + + if ((cmd != SPACECTL_DEALLOC) || + (range->r_offset < 0 || range->r_len < 0) || + (flags & ~SPACECTL_F_SUPPORTED)) + return (EINVAL); + + error = fget(td, fd, &cap_pwrite_rights, &fp); + if (error != 0) + return (error); + AUDIT_ARG_FILE(td->td_proc, fp); + if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { + error = ESPIPE; + goto out; + } + if ((fp->f_flag & FWRITE) == 0) { + error = EBADF; + goto out; + } + + error = fo_fspacectl(fp, cmd, range->r_offset, range->r_len, flags, + td->td_ucred, td); +out: + fdrop(fp, td); + return (error); +} + int kern_specialfd(struct thread *td, int type, void *arg) { Index: sys/kern/syscalls.master =================================================================== --- sys/kern/syscalls.master +++ sys/kern/syscalls.master @@ -3238,6 +3238,14 @@ _Inout_ struct aiocb *aiocbp ); } +580 AUE_FSPACECTL STD { + int fspacectl( + int fd, + int cmd, + _In_ struct spacectl_range *range, + int flags + ); + } ; Please copy any additions and changes to the following compatability tables: ; sys/compat/freebsd32/syscalls.master Index: sys/kern/vfs_default.c =================================================================== --- sys/kern/vfs_default.c +++ sys/kern/vfs_default.c @@ -93,6 +93,7 @@ static int vop_stdread_pgcache(struct vop_read_pgcache_args *ap); static int vop_stdstat(struct vop_stat_args *ap); static int vop_stdvput_pair(struct vop_vput_pair_args *ap); +static int vop_stddeallocate(struct vop_deallocate_args *ap); /* * This vnode table stores what we want to do if the filesystem doesn't @@ -117,6 +118,7 @@ .vop_advlockasync = vop_stdadvlockasync, .vop_advlockpurge = vop_stdadvlockpurge, .vop_allocate = vop_stdallocate, + .vop_deallocate = vop_stddeallocate, .vop_bmap = vop_stdbmap, .vop_close = VOP_NULL, .vop_fsync = VOP_NULL, @@ -518,6 +520,7 @@ case _PC_ACL_EXTENDED: case _PC_ACL_NFS4: case _PC_CAP_PRESENT: + case _PC_FDEALLOC_PRESENT: case _PC_INF_PRESENT: case _PC_MAC_PRESENT: *ap->a_retval = 0; @@ -1092,6 +1095,143 @@ return (error); } +static int +vp_zerofill(struct vnode *vp, struct vattr *vap, off_t offset, off_t len, + off_t *residp, struct ucred *cred) +{ + int iosize; + int error = 0; + void *buf = NULL; + struct iovec aiov; + struct uio auio; + struct thread *td; + + iosize = vap->va_blocksize; + td = curthread; + + if (iosize == 0) + iosize = BLKDEV_IOSIZE; + if (iosize > MAXPHYS) + iosize = MAXPHYS; + buf = malloc(iosize, M_TEMP, M_ZERO | M_WAITOK); + + while (len > 0) { + int xfersize = iosize; + if (offset % iosize != 0) + xfersize -= offset % iosize; + if (xfersize > len) + xfersize = len; + + aiov.iov_base = buf; + aiov.iov_len = xfersize; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_offset = offset; + auio.uio_resid = xfersize; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_rw = UIO_WRITE; + auio.uio_td = td; + + error = VOP_WRITE(vp, &auio, 0, cred); + if (error != 0) { + len -= xfersize - auio.uio_resid; + break; + } + + len -= xfersize; + offset += xfersize; + + maybe_yield(); + } + + free(buf, M_TEMP); + *residp = len; + return (error); +} + +static int +vop_stddeallocate(struct vop_deallocate_args *ap) +{ + struct vnode *vp; + off_t offset, len; + struct ucred *cred; + int error; + struct vattr va; + off_t noff, xfersize, rem; + bool extended; + + vp = ap->a_vp; + offset = *ap->a_offset; + len = *ap->a_len; + cred = ap->a_cred; + extended = false; + + error = VOP_GETATTR(vp, &va, cred); + if (error) + return (error); + + len = omin(OFF_MAX - offset, *ap->a_len); + /* Extend the file if requested */ + if (len != 0 && (ap->a_flags & SPACECTL_F_CANEXTEND) && + (u_quad_t)offset + len > va.va_size) { + error = vn_truncate_locked(vp, offset + len, false, cred); + if (error != 0) + goto out; + + xfersize = offset + len - va.va_size; + len -= xfersize; + extended = true; + } + while (len > 0) { + noff = offset; + error = vn_bmap_seekhole_locked(vp, FIOSEEKDATA, &noff, cred); + if (error) { + if (error != ENXIO) + /* XXX: Is it okay to fallback further? */ + goto out; + + /* + * No more data region to be filled + */ + len = 0; + error = 0; + break; + } + KASSERT(noff >= offset, ("FIOSEEKDATA going backward")); + if (noff != offset) { + xfersize = omin(noff - offset, len); + len -= xfersize; + offset += xfersize; + if (len == 0) + break; + } + error = vn_bmap_seekhole_locked(vp, FIOSEEKHOLE, &noff, cred); + if (error) + goto out; + + /* Fill zeroes */ + xfersize = omin(noff - offset, len); + error = vp_zerofill(vp, &va, offset, xfersize, &rem, cred); + if (error) { + len -= xfersize - rem; + offset += xfersize - rem; + goto out; + } + + len -= xfersize; + offset += xfersize; + if (should_yield()) + break; + } +out: + if (error == 0) { + *ap->a_offset = offset; + *ap->a_len = len; + } else if (extended) + vn_truncate_locked(vp, va.va_size, false, cred); + return (error); +} + int vop_stdadvise(struct vop_advise_args *ap) { Index: sys/kern/vfs_vnops.c =================================================================== --- sys/kern/vfs_vnops.c +++ sys/kern/vfs_vnops.c @@ -106,6 +106,7 @@ static fo_close_t vn_closefile; static fo_mmap_t vn_mmap; static fo_fallocate_t vn_fallocate; +static fo_fspacectl_t vn_fspacectl; struct fileops vnops = { .fo_read = vn_io_fault, @@ -123,6 +124,7 @@ .fo_fill_kinfo = vn_fill_kinfo, .fo_mmap = vn_mmap, .fo_fallocate = vn_fallocate, + .fo_fspacectl = vn_fspacectl, .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE }; @@ -2365,7 +2367,8 @@ } int -vn_bmap_seekhole(struct vnode *vp, u_long cmd, off_t *off, struct ucred *cred) +vn_bmap_seekhole_locked(struct vnode *vp, u_long cmd, off_t *off, + struct ucred *cred) { struct vattr va; daddr_t bn, bnp; @@ -2373,22 +2376,17 @@ off_t noff; int error; - KASSERT(cmd == FIOSEEKHOLE || cmd == FIOSEEKDATA, - ("Wrong command %lu", cmd)); - - if (vn_lock(vp, LK_SHARED) != 0) - return (EBADF); if (vp->v_type != VREG) { error = ENOTTY; - goto unlock; + goto out; } error = VOP_GETATTR(vp, &va, cred); if (error != 0) - goto unlock; + goto out; noff = *off; if (noff >= va.va_size) { error = ENXIO; - goto unlock; + goto out; } bsize = vp->v_mount->mnt_stat.f_iosize; for (bn = noff / bsize; noff < va.va_size; bn++, noff += bsize - @@ -2396,14 +2394,14 @@ error = VOP_BMAP(vp, bn, NULL, &bnp, NULL, NULL); if (error == EOPNOTSUPP) { error = ENOTTY; - goto unlock; + goto out; } if ((bnp == -1 && cmd == FIOSEEKHOLE) || (bnp != -1 && cmd == FIOSEEKDATA)) { noff = bn * bsize; if (noff < *off) noff = *off; - goto unlock; + goto out; } } if (noff > va.va_size) @@ -2411,13 +2409,27 @@ /* noff == va.va_size. There is an implicit hole at the end of file. */ if (cmd == FIOSEEKDATA) error = ENXIO; -unlock: - VOP_UNLOCK(vp); +out: if (error == 0) *off = noff; return (error); } +int +vn_bmap_seekhole(struct vnode *vp, u_long cmd, off_t *off, struct ucred *cred) +{ + int error; + + KASSERT(cmd == FIOSEEKHOLE || cmd == FIOSEEKDATA, + ("Wrong command %lu", cmd)); + + if (vn_lock(vp, LK_SHARED) != 0) + return (EBADF); + error = vn_bmap_seekhole_locked(vp, cmd, off, cred); + VOP_UNLOCK(vp); + return (error); +} + int vn_seek(struct file *fp, off_t offset, int whence, struct thread *td) { @@ -3359,6 +3371,124 @@ return (error); } +static int +vn_deallocate_impl(struct vnode *vp, off_t offset, off_t len, int flags, int ioflg, + bool may_audit, struct ucred *active_cred, struct ucred *file_cred, + struct thread *td) +{ + struct mount *mp; + void *rl_cookie; + int lock_flags; + int error; +#ifdef AUDIT + int audited_vnode1 = 0; +#endif + + rl_cookie = NULL; + error = 0; + + if (offset < 0 || len <= 0 || (flags & ~SPACECTL_F_SUPPORTED) != 0) + return (EINVAL); + if (vp->v_type != VREG) + return (ENODEV); + + /* Take the maximum range if end offset overflows */ + len = omin(len, OFF_MAX - offset); + + while (len > 0) { + /* + * Try to deallocate the longest range in one pass. + * In case a pass takes too long to be executed, it returns + * partial result. The residue will be proceeded in the next + * pass. + */ + + mp = NULL; + + bwillwrite(); + + if ((ioflg & IO_NODELOCKED) == 0) { + if ((ioflg & IO_RANGELOCKED) == 0 && + rl_cookie == NULL) { + rl_cookie = vn_rangelock_wlock(vp, offset, + offset + len); + if ((error = vn_start_write(vp, &mp, + V_WAIT | PCATCH)) != 0) + goto out; + } + + if ((flags & SPACECTL_F_CANEXTEND) == 0 && + (MNT_SHARED_WRITES(mp) || + (mp == NULL && MNT_SHARED_WRITES(vp->v_mount)))) { + lock_flags = LK_SHARED; + } else { + lock_flags = LK_EXCLUSIVE; + } + vn_lock(vp, lock_flags | LK_RETRY); +#ifdef AUDIT + if (may_audit && !audited_vnode1) { + AUDIT_ARG_VNODE1(vp); + audited_vnode1 = 1; + } +#endif + } + +#ifdef MAC + if ((ioflg & IO_NOMACCHECK) == 0) + error = mac_vnode_check_write(active_cred, file_cred, + vp); +#endif + if (error == 0) + error = VOP_DEALLOCATE(vp, &offset, &len, flags, + active_cred); + + if ((ioflg & IO_NODELOCKED) == 0) { + VOP_UNLOCK(vp); + if (mp != NULL) + vn_finished_write(mp); + } + + if (error != 0) + break; + } +out: + if (rl_cookie != NULL) + vn_rangelock_unlock(vp, rl_cookie); + return (error); +} + +int +vn_deallocate(struct vnode *vp, off_t offset, off_t len, int flags, int ioflg, + struct ucred *active_cred, struct ucred *file_cred, struct thread *td) +{ + return (vn_deallocate_impl(vp, offset, len, flags, ioflg, false, + active_cred, file_cred, td)); +} + +static int +vn_fspacectl(struct file *fp, int cmd, off_t offset, off_t len, int flags, + struct ucred *active_cred, struct thread *td) +{ + int error; + struct vnode *vp; + + vp = fp->f_vnode; + + if (cmd != SPACECTL_DEALLOC) + return (EINVAL); + + switch (cmd) { + case SPACECTL_DEALLOC: + error = vn_deallocate_impl(vp, offset, len, flags, 0, true, + active_cred, fp->f_cred, td); + break; + default: + panic("vn_fspacectl: unknown cmd %d", cmd); + } + + return (error); +} + static u_long vn_lock_pair_pause_cnt; SYSCTL_ULONG(_debug, OID_AUTO, vn_lock_pair_pause, CTLFLAG_RD, &vn_lock_pair_pause_cnt, 0, Index: sys/kern/vnode_if.src =================================================================== --- sys/kern/vnode_if.src +++ sys/kern/vnode_if.src @@ -801,6 +801,17 @@ }; +%% deallocate vp L L L + +vop_deallocate { + IN struct vnode *vp; + INOUT off_t *offset; + INOUT off_t *len; + IN int flags; + IN struct ucred *cred; +}; + + # The VOPs below are spares at the end of the table to allow new VOPs to be # added in stable branches without breaking the KBI. New VOPs in HEAD should # be added above these spares. When merging a new VOP to a stable branch, Index: sys/security/audit/audit_bsm.c =================================================================== --- sys/security/audit/audit_bsm.c +++ sys/security/audit/audit_bsm.c @@ -1076,6 +1076,18 @@ FD_VNODE1_TOKENS; break; + case AUE_FSPACECTL: + if (ARG_IS_VALID(kar, ARG_CMD)) { + tok = au_to_arg32(2, "operation", ar->ar_arg_cmd); + kau_write(rec, tok); + } + if (ARG_IS_VALID(kar, ARG_FFLAGS)) { + tok = au_to_arg32(4, "flags", ar->ar_arg_fflags); + kau_write(rec, tok); + } + FD_VNODE1_TOKENS; + break; + case AUE_RFORK: if (ARG_IS_VALID(kar, ARG_FFLAGS)) { tok = au_to_arg32(1, "flags", ar->ar_arg_fflags); Index: sys/sys/fcntl.h =================================================================== --- sys/sys/fcntl.h +++ sys/sys/fcntl.h @@ -315,6 +315,14 @@ short l_type; /* lock type: read/write, etc. */ short l_whence; /* type of l_start */ }; + +/* + * Space control offset/length description + */ +struct spacectl_range { + off_t r_offset; /* starting offset */ + off_t r_len; /* length */ +}; #endif #if __BSD_VISIBLE @@ -344,6 +352,17 @@ * similar syscalls. */ #define FD_NONE -200 + +/* + * Commands for fspacectl(2) + */ +#define SPACECTL_DEALLOC 0 /* deallocate space */ + +/* + * fspacectl(2) flags + */ +#define SPACECTL_F_CANEXTEND (1) +#define SPACECTL_F_SUPPORTED (SPACECTL_F_CANEXTEND) #endif #ifndef _KERNEL @@ -361,6 +380,9 @@ int posix_fadvise(int, off_t, off_t, int); int posix_fallocate(int, off_t, off_t); #endif +#if __BSD_VISIBLE +int fspacectl(int, int, struct spacectl_range *, int); +#endif __END_DECLS #endif Index: sys/sys/file.h =================================================================== --- sys/sys/file.h +++ sys/sys/file.h @@ -35,6 +35,7 @@ #ifndef _SYS_FILE_H_ #define _SYS_FILE_H_ +#include "sys/fcntl.h" #ifndef _KERNEL #include /* XXX */ #include @@ -129,6 +130,9 @@ typedef int fo_get_seals_t(struct file *fp, int *flags); typedef int fo_fallocate_t(struct file *fp, off_t offset, off_t len, struct thread *td); +typedef int fo_fspacectl_t(struct file *fp, int cmd, + off_t offset, off_t len, int flags, + struct ucred *active_cred, struct thread *td); typedef int fo_flags_t; struct fileops { @@ -150,6 +154,7 @@ fo_add_seals_t *fo_add_seals; fo_get_seals_t *fo_get_seals; fo_fallocate_t *fo_fallocate; + fo_fspacectl_t *fo_fspacectl; fo_flags_t fo_flags; /* DFLAG_* below */ }; @@ -470,6 +475,17 @@ return ((*fp->f_ops->fo_fallocate)(fp, offset, len, td)); } +static __inline int fo_fspacectl(struct file *fp, int cmd, off_t offset, + off_t len, int flags, struct ucred *active_cred, struct thread *td) +{ + + if (fp->f_ops->fo_fspacectl == NULL) + return (ENODEV); + return ((*fp->f_ops->fo_fspacectl)(fp, cmd, offset, len, flags, + active_cred, td)); +} + + #endif /* _KERNEL */ #endif /* !SYS_FILE_H */ Index: sys/sys/syscallsubr.h =================================================================== --- sys/sys/syscallsubr.h +++ sys/sys/syscallsubr.h @@ -59,6 +59,7 @@ struct sched_param; union semun; struct sockaddr; +struct spacectl_range; struct stat; struct thr_param; struct timex; @@ -230,6 +231,10 @@ int advice); int kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len); +int kern_fdeallocate(struct thread *td, int fd, off_t offset, off_t len); +int kern_fzerorange(struct thread *td, int fd, off_t offset, off_t len); +int kern_fspacectl(struct thread *td, int fd, int cmd, + struct spacectl_range *, int flags); int kern_procctl(struct thread *td, enum idtype idtype, id_t id, int com, void *data); int kern_pread(struct thread *td, int fd, void *buf, size_t nbyte, Index: sys/sys/unistd.h =================================================================== --- sys/sys/unistd.h +++ sys/sys/unistd.h @@ -156,6 +156,7 @@ #define _PC_INF_PRESENT 62 #define _PC_MAC_PRESENT 63 #define _PC_ACL_NFS4 64 +#define _PC_FDEALLOC_PRESENT 65 #endif /* From OpenSolaris, used by SEEK_DATA/SEEK_HOLE. */ Index: sys/sys/vnode.h =================================================================== --- sys/sys/vnode.h +++ sys/sys/vnode.h @@ -722,6 +722,9 @@ void vn_printf(struct vnode *vp, const char *fmt, ...) __printflike(2,3); int vrecycle(struct vnode *vp); int vrecyclel(struct vnode *vp); +/* vn_bmap_seekhole_locked is not public KPI */ +int vn_bmap_seekhole_locked(struct vnode *vp, u_long cmd, off_t *off, + struct ucred *cred); int vn_bmap_seekhole(struct vnode *vp, u_long cmd, off_t *off, struct ucred *cred); int vn_close(struct vnode *vp, @@ -730,6 +733,9 @@ struct vnode *outvp, off_t *outoffp, size_t *lenp, unsigned int flags, struct ucred *incred, struct ucred *outcred, struct thread *fsize_td); +int vn_deallocate(struct vnode *vp, off_t offset, off_t len, int flags, + int ioflg, struct ucred *active_cred, struct ucred *file_cred, + struct thread *td); void vn_finished_write(struct mount *mp); void vn_finished_secondary_write(struct mount *mp); int vn_fsync_buf(struct vnode *vp, int waitfor); Index: tests/sys/file/Makefile =================================================================== --- tests/sys/file/Makefile +++ tests/sys/file/Makefile @@ -10,6 +10,7 @@ TAP_TESTS_SH+= flock_test PLAIN_TESTS_C+= ftruncate_test PLAIN_TESTS_C+= newfileops_on_fork_test +ATF_TESTS_C+= fspacectl_test PROGS+= flock_helper Index: tests/sys/file/fspacectl_test.c =================================================================== --- /dev/null +++ tests/sys/file/fspacectl_test.c @@ -0,0 +1,421 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2021 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Andrew Turner under sponsorship from + * the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include + +#include +#include +#include + +static off_t file_max_blocks = 32; +static const char byte_to_fill = 0x5f; + +static int +fill(int fd, off_t offset, off_t len) +{ + int error; + size_t blen; + char *buf; + struct stat statbuf; + blksize_t blocksize; + + if (fstat(fd, &statbuf) == -1) + return (1); + blocksize = statbuf.st_blksize; + error = 0; + buf = malloc(blocksize); + if (buf == NULL) + return (1); + + while (len > 0) { + blen = len < (off_t)blocksize ? len : blocksize; + memset(buf, byte_to_fill, blen); + if (pwrite(fd, buf, blen, offset) != (ssize_t)blen) { + error = 1; + break; + } + len -= blen; + offset += blen; + } + + free(buf); + return (error); +} + +static blksize_t +fd_get_blksize(void) +{ + struct statfs statfsbuf; + + if (statfs(".", &statfsbuf) == -1) + return (-1); + return statfsbuf.f_iosize; +} + +static int +check_content_dealloc(int fd, off_t hole_start, off_t hole_len, off_t file_sz) +{ + int error; + size_t blen; + off_t offset, resid; + struct stat statbuf; + char *buf, *sblk; + blksize_t blocksize; + + blocksize = fd_get_blksize(); + if (blocksize == -1) + return (1); + error = 0; + buf = malloc(blocksize * 2); + if (buf == NULL) + return (1); + sblk = buf + blocksize; + + memset(sblk, 0, blocksize); + + if ((uint64_t)hole_start + hole_len > (uint64_t)file_sz) + hole_len = file_sz - hole_start; + + /* + * Check hole is zeroed. + */ + offset = hole_start; + resid = hole_len; + while (resid > 0) { + blen = resid < (off_t)blocksize ? resid : blocksize; + if (pread(fd, buf, blen, offset) != (ssize_t)blen) { + error = 1; + break; + } + if (memcmp(buf, sblk, blen) != 0) { + error = 1; + break; + } + resid -= blen; + offset += blen; + } + + memset(sblk, byte_to_fill, blocksize); + + /* + * Check file region before hole is zeroed. + */ + offset = 0; + resid = hole_start; + while (resid > 0) { + blen = resid < (off_t)blocksize ? resid : blocksize; + if (pread(fd, buf, blen, offset) != (ssize_t)blen) { + error = 1; + break; + } + if (memcmp(buf, sblk, blen) != 0) { + error = 1; + break; + } + resid -= blen; + offset += blen; + } + + /* + * Check file region after hole is zeroed. + */ + offset = hole_start + hole_len; + resid = file_sz - offset; + while (resid > 0) { + blen = resid < (off_t)blocksize ? resid : blocksize; + if (pread(fd, buf, blen, offset) != (ssize_t)blen) { + error = 1; + break; + } + if (memcmp(buf, sblk, blen) != 0) { + error = 1; + break; + } + resid -= blen; + offset += blen; + } + + /* + * Check file size matches with expected file size. + */ + if (fstat(fd, &statbuf) == -1) + error = -1; + if (statbuf.st_size != file_sz) + error = -1; + + free(buf); + return (error); +} + +static int +check_hole_alloc(int fd, off_t alloc_start, off_t alloc_len, off_t file_sz) +{ + off_t dataoff, holeoff; + struct stat statbuf; + + if (alloc_start + alloc_len < alloc_start) + alloc_len = OFF_MAX - alloc_start; + + dataoff = lseek(fd, alloc_start, SEEK_DATA); + if (dataoff == -1) + return (-1); + holeoff = lseek(fd, alloc_start, SEEK_HOLE); + if (holeoff == -1) + return (-1); + + /* + * Check if the start offset of allocated region within file size is + * legit + */ + if (dataoff != alloc_start && dataoff < file_sz) + return (-1); + /* + * Check if the end offset of allocated region within file size is + * legit + */ + if (holeoff < alloc_start + alloc_len && holeoff < file_sz) + return (-1); + + /* + * Check file size matches with expected file size. + */ + if (fstat(fd, &statbuf) == -1) + return (-1); + if (statbuf.st_size != file_sz) + return (-1); + + return (0); +} + +/* + * Check aligned deallocation + */ +ATF_TC_WITHOUT_HEAD(aligned_dealloc); +ATF_TC_BODY(aligned_dealloc, tc) +{ + struct spacectl_range range; + blksize_t blocksize; + int fd; + + ATF_REQUIRE((blocksize = fd_get_blksize()) != -1); + range.r_offset = blocksize; + range.r_len = (file_max_blocks - 1) * blocksize - range.r_offset; + + ATF_REQUIRE((fd = open("sys_fspacectl_testfile", + O_CREAT | O_RDWR | O_TRUNC, 0600)) != -1); + ATF_REQUIRE(fill(fd, 0, file_max_blocks * blocksize) == 0); + ATF_CHECK(fspacectl(fd, SPACECTL_DEALLOC, &range, 0) == 0); + ATF_CHECK(check_content_dealloc(fd, range.r_offset, range.r_len, + file_max_blocks * blocksize) == 0); + ATF_REQUIRE(close(fd) == 0); +} + +/* + * Check unaligned deallocation + */ +ATF_TC_WITHOUT_HEAD(unaligned_dealloc); +ATF_TC_BODY(unaligned_dealloc, tc) +{ + struct spacectl_range range; + blksize_t blocksize; + int fd; + + ATF_REQUIRE((blocksize = fd_get_blksize()) != -1); + range.r_offset = blocksize / 2; + range.r_len = (file_max_blocks - 1) * blocksize + blocksize / 2 - + range.r_offset; + + ATF_REQUIRE((fd = open("sys_fspacectl_testfile", + O_CREAT | O_RDWR | O_TRUNC, 0600)) != -1); + ATF_REQUIRE(fill(fd, 0, file_max_blocks * blocksize) == 0); + ATF_CHECK(fspacectl(fd, SPACECTL_DEALLOC, &range, 0) == 0); + ATF_CHECK(check_content_dealloc(fd, range.r_offset, range.r_len, + file_max_blocks * blocksize) == 0); + ATF_REQUIRE(close(fd) == 0); +} + +/* + * Check aligned deallocation from certain offset to OFF_MAX + */ +ATF_TC_WITHOUT_HEAD(aligned_dealloc_offmax); +ATF_TC_BODY(aligned_dealloc_offmax, tc) +{ + struct spacectl_range range; + blksize_t blocksize; + int fd; + + ATF_REQUIRE((blocksize = fd_get_blksize()) != -1); + range.r_offset = blocksize; + range.r_len = OFF_MAX; + + ATF_REQUIRE((fd = open("sys_fspacectl_testfile", + O_CREAT | O_RDWR | O_TRUNC, 0600)) != -1); + ATF_REQUIRE(fill(fd, 0, file_max_blocks * blocksize) == 0); + ATF_CHECK(fspacectl(fd, SPACECTL_DEALLOC, &range, 0) == 0); + ATF_CHECK(check_content_dealloc(fd, range.r_offset, range.r_len, + file_max_blocks * blocksize) == 0); + ATF_REQUIRE(close(fd) == 0); +} + +/* + * Check unaligned deallocation from certain offset to OFF_MAX + */ +ATF_TC_WITHOUT_HEAD(unaligned_dealloc_offmax); +ATF_TC_BODY(unaligned_dealloc_offmax, tc) +{ + struct spacectl_range range; + blksize_t blocksize; + int fd; + + ATF_REQUIRE((blocksize = fd_get_blksize()) != -1); + range.r_offset = blocksize / 2; + range.r_len = OFF_MAX; + + ATF_REQUIRE((fd = open("sys_fspacectl_testfile", + O_CREAT | O_RDWR | O_TRUNC, 0600)) != -1); + ATF_REQUIRE(fill(fd, 0, file_max_blocks * blocksize) == 0); + ATF_CHECK(fspacectl(fd, SPACECTL_DEALLOC, &range, 0) == 0); + ATF_CHECK(check_content_dealloc(fd, range.r_offset, range.r_len, + file_max_blocks * blocksize) == 0); + ATF_REQUIRE(close(fd) == 0); +} + +/* + * Check aligned deallocation and extending + */ +ATF_TC_WITHOUT_HEAD(aligned_dealloc_canextend); +ATF_TC_BODY(aligned_dealloc_canextend, tc) +{ + struct spacectl_range range; + blksize_t blocksize; + int fd; + + ATF_REQUIRE((blocksize = fd_get_blksize()) != -1); + range.r_offset = blocksize; + range.r_len = (file_max_blocks + 1) * blocksize - range.r_offset; + + ATF_REQUIRE((fd = open("sys_fspacectl_testfile", + O_CREAT | O_RDWR | O_TRUNC, 0600)) != -1); + ATF_REQUIRE(fill(fd, 0, file_max_blocks * blocksize) == 0); + ATF_CHECK( + fspacectl(fd, SPACECTL_DEALLOC, &range, SPACECTL_F_CANEXTEND) == 0); + ATF_CHECK(check_content_dealloc(fd, range.r_offset, range.r_len, + (file_max_blocks + 1) * blocksize) == 0); + ATF_REQUIRE(close(fd) == 0); +} + +/* + * Check unaligned deallocation and extending + */ +ATF_TC_WITHOUT_HEAD(unaligned_dealloc_canextend); +ATF_TC_BODY(unaligned_dealloc_canextend, tc) +{ + struct spacectl_range range; + blksize_t blocksize; + int fd; + + ATF_REQUIRE((blocksize = fd_get_blksize()) != -1); + range.r_offset = blocksize / 2; + range.r_len = file_max_blocks * blocksize + blocksize / 2 - + range.r_offset; + + ATF_REQUIRE((fd = open("sys_fspacectl_testfile", + O_CREAT | O_RDWR | O_TRUNC, 0600)) != -1); + ATF_REQUIRE(fill(fd, 0, file_max_blocks * blocksize) == 0); + ATF_CHECK( + fspacectl(fd, SPACECTL_DEALLOC, &range, SPACECTL_F_CANEXTEND) == 0); + ATF_CHECK(check_content_dealloc(fd, range.r_offset, range.r_len, + file_max_blocks * blocksize + blocksize / 2) == 0); + ATF_REQUIRE(close(fd) == 0); +} + +/* + * Check aligned deallocation around EOF + */ +ATF_TC_WITHOUT_HEAD(aligned_dealloc_no_canextend); +ATF_TC_BODY(aligned_dealloc_no_canextend, tc) +{ + struct spacectl_range range; + blksize_t blocksize; + int fd; + + ATF_REQUIRE((blocksize = fd_get_blksize()) != -1); + range.r_offset = blocksize; + range.r_len = (file_max_blocks + 1) * blocksize - range.r_offset; + + ATF_REQUIRE((fd = open("sys_fspacectl_testfile", + O_CREAT | O_RDWR | O_TRUNC, 0600)) != -1); + ATF_REQUIRE(fill(fd, 0, file_max_blocks * blocksize) == 0); + ATF_CHECK(fspacectl(fd, SPACECTL_DEALLOC, &range, 0) == 0); + ATF_CHECK(check_content_dealloc(fd, range.r_offset, range.r_len, + file_max_blocks * blocksize) == 0); + ATF_REQUIRE(close(fd) == 0); +} + +/* + * Check unaligned deallocation around EOF + */ +ATF_TC_WITHOUT_HEAD(unaligned_dealloc_no_canextend); +ATF_TC_BODY(unaligned_dealloc_no_canextend, tc) +{ + struct spacectl_range range; + blksize_t blocksize; + int fd; + + ATF_REQUIRE((blocksize = fd_get_blksize()) != -1); + range.r_offset = blocksize / 2; + range.r_len = file_max_blocks * blocksize + blocksize / 2 - + range.r_offset; + + ATF_REQUIRE((fd = open("sys_fspacectl_testfile", + O_CREAT | O_RDWR | O_TRUNC, 0600)) != -1); + ATF_REQUIRE(fill(fd, 0, file_max_blocks * blocksize) == 0); + ATF_CHECK(fspacectl(fd, SPACECTL_DEALLOC, &range, 0) == 0); + ATF_CHECK(check_content_dealloc(fd, range.r_offset, range.r_len, + file_max_blocks * blocksize) == 0); + ATF_REQUIRE(close(fd) == 0); +} + +ATF_TP_ADD_TCS(tp) +{ + ATF_TP_ADD_TC(tp, aligned_dealloc); + ATF_TP_ADD_TC(tp, unaligned_dealloc); + ATF_TP_ADD_TC(tp, aligned_dealloc_canextend); + ATF_TP_ADD_TC(tp, unaligned_dealloc_canextend); + ATF_TP_ADD_TC(tp, aligned_dealloc_no_canextend); + ATF_TP_ADD_TC(tp, unaligned_dealloc_no_canextend); + ATF_TP_ADD_TC(tp, aligned_dealloc_offmax); + ATF_TP_ADD_TC(tp, unaligned_dealloc_offmax); + + return atf_no_error(); +}