Index: lib/libc/sys/Makefile.inc =================================================================== --- lib/libc/sys/Makefile.inc +++ lib/libc/sys/Makefile.inc @@ -189,6 +189,7 @@ fhreadlink.2 \ flock.2 \ fork.2 \ + fspacectl.2 \ fsync.2 \ getdirentries.2 \ getdtablesize.2 \ Index: lib/libc/sys/Symbol.map =================================================================== --- lib/libc/sys/Symbol.map +++ lib/libc/sys/Symbol.map @@ -410,6 +410,7 @@ fhlink; fhlinkat; fhreadlink; + fspacectl; getfhat; funlinkat; memfd_create; Index: lib/libc/sys/fspacectl.2 =================================================================== --- /dev/null +++ lib/libc/sys/fspacectl.2 @@ -0,0 +1,173 @@ +.\" +.\" SPDX-License-Identifier: BSD-2-Clause-FreeBSD +.\" +.\" Copyright (c) 2021 The FreeBSD Foundation +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.Dd Feb 08, 2021 +.Dt FSPACECTL 2 +.Os +.Sh NAME +.Nm fspacectl +.Nd space management in a file +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In fcntl.h +.Ft int +.Fn fspacectl "int fd" "int cmd" "struct spacectl_range *range" "int flags" +.Sh DESCRIPTION +.Nm +is a system call performing space management in the file referenced by +.Fa fd . +.Fa cmd +specifies the operation to take place in the file. +.Fa range +is the operation range. +.Fa flags +controls the behavior of the operation to take place. +.Pp +The +.Fa range +argument points to a structure defined as: +.Bd -literal +struct spacectl_range { + off_t r_offset; + off_t r_len; +}; +.Ed +.Pp +The operation specified by the +.Fa cmd +argument may be one of: +.Bl -tag -width SPACECTL_DEALLOC +.It Dv SPACECTL_ALLOC +Back a region in the file specified by the +.Fa range +argument with file system space. +This operation is used to implement +.Xr posix_fallocate 2 . +Content will remain the same for non-hole region. +Holes will be converted to non-hole region with zeroes. +.It Dv SPACECTL_DEALLOC +Zero a region in the file specified by the +.Fa range +argument. +If the file system supports hole-punching, +file system space deallocation may be performed in the given region. +.El +.Pp +The +.Fa flags +argument may include one or more of the following: +.Bl -tag -width SPACECTL_F_CANEXTEND +.It Dv SPACECTL_F_CANEXTEND +The operation can extend the file size if +.Fa range->r_offset ++ +.Fa range->r_len +is greater than the current file size. +.El +.Pp +For +.Dv SPACECTL_ALLOC +and +.Dv SPACECTL_DEALLOC +operations, unless +.Fa flags +includes +.Dv SPACECTL_F_CANEXTEND, region specified in +.Fa range +beyonds file size will not extend the file size. +.Pp +If signal is delivered to a thread calling this system call, +the system call would either finish as if nothing has happened, +or signal would be delivered after the operation finishes. +.Sh RETURN VALUES +Upon successful completion, the value 0 is returned; +otherwise the value -1 is returned and +.Va errno +is set to indicate the error. +.Sh ERRORS +Possible failure conditions: +.Bl -tag -width Er +.It Bq Er EBADF +The +.Fa fd +argument is not a valid file descriptor. +.It Bq Er EBADF +The +.Fa fd +argument references a file that was opened without write permission. +.It Bq Er EFBIG +The value of +.Fa offset + +.Fa len +is greater than the maximum file size when +.Dv SPACECTL_F_CANEXTEND +is set. +.It Bq Er EINTR +A signal was caught during execution. +.It Bq Er EINVAL +Either the +.Fa "range->r_offset" +argument was less than zero, or the +.Fa "range->r_len" +argument was less than zero, +or the operation is not supported by the file system. +.It Bq Er EIO +An I/O error occurred while reading from or writing to a file system. +.It Bq Er EINTEGRITY +Corrupted data was detected while reading from the file system. +.It Bq Er ENODEV +The +.Fa fd +argument does not refer to a file that supports +.Nm . +.It Bq Er ENOSPC +There is insufficient free space remaining on the file system storage +media. +.It Bq Er ENOTCAPABLE +The file descriptor +.Fa fd +has insufficient rights. +.It Bq Er ESPIPE +The +.Fa fd +argument is associated with a pipe or FIFO. +.El +.Sh SEE ALSO +.Xr creat 2 , +.Xr ftruncate 2 , +.Xr open 2 , +.Xr unlink 2 +.Sh HISTORY +The +.Nm +system call appeared in +.Fx 14.0 . +.Sh AUTHORS +.Nm +and this manual page were developed by +.An Ka Ho Ng Aq Mt khng@FreeBSDFoundation.org +under sponsorship from the FreeBSD Foundation. Index: lib/libc/sys/pathconf.2 =================================================================== --- lib/libc/sys/pathconf.2 +++ lib/libc/sys/pathconf.2 @@ -166,6 +166,9 @@ .It Li _PC_MIN_HOLE_SIZE If a file system supports the reporting of holes (see .Xr lseek 2 ) , +.It Li _PC_FDEALLOC_PRESENT +If a file system supports hole-punching (see +.Xr fspacectl 2 ) , .Fn pathconf and .Fn fpathconf Index: share/man/man9/Makefile =================================================================== --- share/man/man9/Makefile +++ share/man/man9/Makefile @@ -397,6 +397,7 @@ vm_page_wire.9 \ vm_set_page_size.9 \ vmem.9 \ + vn_deallocate.9 \ vn_fullpath.9 \ vn_isdisk.9 \ vnet.9 \ @@ -412,6 +413,7 @@ VOP_BWRITE.9 \ VOP_COPY_FILE_RANGE.9 \ VOP_CREATE.9 \ + VOP_DEALLOCATE.9 \ VOP_FSYNC.9 \ VOP_GETACL.9 \ VOP_GETEXTATTR.9 \ Index: share/man/man9/VOP_DEALLOCATE.9 =================================================================== --- /dev/null +++ share/man/man9/VOP_DEALLOCATE.9 @@ -0,0 +1,101 @@ +.\" +.\" SPDX-License-Identifier: BSD-2-Clause-FreeBSD +.\" +.\" Copyright (c) 2021 The FreeBSD Foundation +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.Dd Feb 10, 2021 +.Dt VOP_DEALLOCATE 9 +.Os +.Sh NAME +.Nm VOP_DEALLOCATE +.Nd zero and/or deallocate storage from a file +.Sh SYNOPSIS +.In sys/param.h +.In sys/vnode.h +.Ft int +.Fn VOP_DEALLOCATE "struct vnode *vp" "off_t *offset" "off_t *len" "int flags" "struct ucred *cred" +.Sh DESCRIPTION +This VOP call zeroes/deallocates storage for an offset range in a file. +It is used to implement the +.Xr fspacectl +system call. +.Pp +Its arguments are: +.Bl -tag -width offset +.It Fa vp +The vnode of the file. +.It Fa offset +The start of the range to allocate storage for in the file. +.It Fa len +The length of the range to allocate storage for in the file. +.It Fa flags +The parameter to control the behavior of this call. +.Nm . +.El +.Pp +The +.Fa offset +and +.Fa len +arguments are updated to reflect the portion of the range that +still needs to be zeroed/deallocated on return. +Partial result is considered a successful operation. +The file's contents are not changed. +.Sh LOCKS +The file should be locked on entry and will still be locked on exit. +In case +.Dv SPACECTL_F_CANEXTEND +is specified in +.Fa flags , +the file should be locked exclusively. +.Sh RETURN VALUES +Zero is returned if the call is successful, otherwise an appropriate +error code is returned. +.Sh ERRORS +.Bl -tag -width Er +.It Bq Er EINVAL +Invalid +.Fa offset , len +or +.Fa flags +parameters are passed into this VOP call. +.It Bq Er EFBIG +An attempt was made to write a file that exceeds the process's file size +limit or the maximum file size when SPACECTL_F_CANEXPAND is set in +.Fa flags. +.It Bq Er ENODEV +The vnode type is not supported by this VOP call. +.It Bq Er ENOSPC +The file system is full. +.It Bq Er EPERM +An append-only flag is set on the file, but the caller is attempting to +write before the current end of file. +.El +.Sh SEE ALSO +.Xr vnode 9 +.Sh AUTHORS +.Nm +and this manual page were developed by +.An Ka Ho Ng Aq Mt khng@FreeBSDFoundation.org +under sponsorship from the FreeBSD Foundation. Index: share/man/man9/vn_deallocate.9 =================================================================== --- /dev/null +++ share/man/man9/vn_deallocate.9 @@ -0,0 +1,95 @@ +.\" +.\" SPDX-License-Identifier: BSD-2-Clause-FreeBSD +.\" +.\" Copyright (c) 2021 The FreeBSD Foundation +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.Dd July 9, 2001 +.Dt VN_DEALLOCATE 9 +.Os +.Sh NAME +.Nm vn_deallocate +.Nd zero and/or deallocate storage from a file +.Sh SYNOPSIS +.In sys/param.h +.In sys/vnode.h +.Ft int +.Fn vn_deallocate "struct vnode *vp" "off_t offset" "off_t len" "int flags" "int ioflg" "struct ucred *active_cred" "struct ucred *file_cred" +.Sh DESCRIPTION +The +.Fn vn_deallocate +function zeros and/or deallocate backing storage space from a file. +This function only works on vnodes with +.Dv VREG +type. +.Pp +The arguments are: +.Bl -tag -width active_cred +.It Fa vp +The vnode of the file. +.It Fa offset +The starting offset of the operation range +.It Fa len +The length of the operation range +.It Fa flags +The control flags argument of the operation. +.It Fa ioflg +The control flags argument of vnode locking. +.It Fa active_cred +The user credentials of the calling thread. +.It Fa file_cred +The credentials installed on the file description pointing to the vnode or NOCRED. +.El +.Pp +The +.Fn flags +argument may be one or more of the following flags: +.Bl -tag -width SPACECTL_F_CANEXTEND +.It Dv SPACECTL_F_CANEXTEND +The operation can extend the file size if +.Fa offset ++ +.Fa len +is greater than the current file size. +.El +.Pp +The +.Fn ioflg +argument may be one or more of the following flags: +.Bl -tag -width IO_RANGELOCKED +.It Dv IO_NODELOCKED +The caller has granted vnode lock. +.It Dv IO_RANGELOCKED +The caller has granted vnode rangelock. +.El +.Sh RETURN VALUES +Upon successful completion, the value 0 is returned; otherwise the +appropriate error is returned. +.Sh SEE ALSO +.Xr vnode 9 , +.Xr VOP_DEALLOCATE 9 +.Sh AUTHORS +.Nm +and this manual page were developed by +.An Ka Ho Ng Aq Mt khng@FreeBSDFoundation.org +under sponsorship from the FreeBSD Foundation. Index: sys/bsm/audit_kevents.h =================================================================== --- sys/bsm/audit_kevents.h +++ sys/bsm/audit_kevents.h @@ -662,6 +662,7 @@ #define AUE_SPECIALFD 43266 /* FreeBSD-specific. */ #define AUE_AIO_WRITEV 43267 /* FreeBSD-specific. */ #define AUE_AIO_READV 43268 /* FreeBSD-specific. */ +#define AUE_FSPACECTL 43269 /* FreeBSD-specific. */ /* * Darwin BSM uses a number of AUE_O_* definitions, which are aliased to the Index: sys/compat/freebsd32/freebsd32_misc.c =================================================================== --- sys/compat/freebsd32/freebsd32_misc.c +++ sys/compat/freebsd32/freebsd32_misc.c @@ -3569,6 +3569,20 @@ return (kern_posix_error(td, error)); } +int +freebsd32_fspacectl(struct thread *td, struct freebsd32_fspacectl_args *uap) +{ + int error; + struct spacectl_range range; + + error = copyin(uap->range, &range, sizeof(range)); + if (error != 0) + return (error); + + error = kern_fspacectl(td, uap->fd, uap->cmd, uap->range, uap->flags); + return (error); +} + int freebsd32_posix_fadvise(struct thread *td, struct freebsd32_posix_fadvise_args *uap) Index: sys/compat/freebsd32/freebsd32_proto.h =================================================================== --- sys/compat/freebsd32/freebsd32_proto.h +++ sys/compat/freebsd32/freebsd32_proto.h @@ -751,6 +751,12 @@ struct freebsd32_aio_readv_args { char aiocbp_l_[PADL_(struct aiocb32 *)]; struct aiocb32 * aiocbp; char aiocbp_r_[PADR_(struct aiocb32 *)]; }; +struct freebsd32_fspacectl_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char cmd_l_[PADL_(int)]; int cmd; char cmd_r_[PADR_(int)]; + char range_l_[PADL_(struct spacectl_range *)]; struct spacectl_range * range; char range_r_[PADR_(struct spacectl_range *)]; + char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; +}; #if !defined(PAD64_REQUIRED) && !defined(__amd64__) #define PAD64_REQUIRED #endif @@ -893,6 +899,7 @@ int freebsd32___sysctlbyname(struct thread *, struct freebsd32___sysctlbyname_args *); int freebsd32_aio_writev(struct thread *, struct freebsd32_aio_writev_args *); int freebsd32_aio_readv(struct thread *, struct freebsd32_aio_readv_args *); +int freebsd32_fspacectl(struct thread *, struct freebsd32_fspacectl_args *); #ifdef COMPAT_43 @@ -1476,6 +1483,7 @@ #define FREEBSD32_SYS_AUE_freebsd32___sysctlbyname AUE_SYSCTL #define FREEBSD32_SYS_AUE_freebsd32_aio_writev AUE_AIO_WRITEV #define FREEBSD32_SYS_AUE_freebsd32_aio_readv AUE_AIO_READV +#define FREEBSD32_SYS_AUE_freebsd32_fspacectl AUE_FSPACECTL #undef PAD_ #undef PADL_ Index: sys/compat/freebsd32/freebsd32_syscall.h =================================================================== --- sys/compat/freebsd32/freebsd32_syscall.h +++ sys/compat/freebsd32/freebsd32_syscall.h @@ -506,4 +506,5 @@ #define FREEBSD32_SYS___specialfd 577 #define FREEBSD32_SYS_freebsd32_aio_writev 578 #define FREEBSD32_SYS_freebsd32_aio_readv 579 -#define FREEBSD32_SYS_MAXSYSCALL 580 +#define FREEBSD32_SYS_freebsd32_fspacectl 580 +#define FREEBSD32_SYS_MAXSYSCALL 581 Index: sys/compat/freebsd32/freebsd32_syscalls.c =================================================================== --- sys/compat/freebsd32/freebsd32_syscalls.c +++ sys/compat/freebsd32/freebsd32_syscalls.c @@ -616,4 +616,5 @@ "__specialfd", /* 577 = __specialfd */ "freebsd32_aio_writev", /* 578 = freebsd32_aio_writev */ "freebsd32_aio_readv", /* 579 = freebsd32_aio_readv */ + "freebsd32_fspacectl", /* 580 = freebsd32_fspacectl */ }; Index: sys/compat/freebsd32/freebsd32_sysent.c =================================================================== --- sys/compat/freebsd32/freebsd32_sysent.c +++ sys/compat/freebsd32/freebsd32_sysent.c @@ -669,4 +669,5 @@ { .sy_narg = AS(__specialfd_args), .sy_call = (sy_call_t *)sys___specialfd, .sy_auevent = AUE_SPECIALFD, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 577 = __specialfd */ { .sy_narg = AS(freebsd32_aio_writev_args), .sy_call = (sy_call_t *)freebsd32_aio_writev, .sy_auevent = AUE_AIO_WRITEV, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 578 = freebsd32_aio_writev */ { .sy_narg = AS(freebsd32_aio_readv_args), .sy_call = (sy_call_t *)freebsd32_aio_readv, .sy_auevent = AUE_AIO_READV, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 579 = freebsd32_aio_readv */ + { .sy_narg = AS(freebsd32_fspacectl_args), .sy_call = (sy_call_t *)freebsd32_fspacectl, .sy_auevent = AUE_FSPACECTL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 580 = freebsd32_fspacectl */ }; Index: sys/compat/freebsd32/freebsd32_systrace_args.c =================================================================== --- sys/compat/freebsd32/freebsd32_systrace_args.c +++ sys/compat/freebsd32/freebsd32_systrace_args.c @@ -3407,6 +3407,16 @@ *n_args = 1; break; } + /* freebsd32_fspacectl */ + case 580: { + struct freebsd32_fspacectl_args *p = params; + iarg[0] = p->fd; /* int */ + iarg[1] = p->cmd; /* int */ + uarg[2] = (intptr_t) p->range; /* struct spacectl_range * */ + iarg[3] = p->flags; /* int */ + *n_args = 4; + break; + } default: *n_args = 0; break; @@ -9183,6 +9193,25 @@ break; }; break; + /* freebsd32_fspacectl */ + case 580: + switch(ndx) { + case 0: + p = "int"; + break; + case 1: + p = "int"; + break; + case 2: + p = "userland struct spacectl_range *"; + break; + case 3: + p = "int"; + break; + default: + break; + }; + break; default: break; }; @@ -11102,6 +11131,11 @@ if (ndx == 0 || ndx == 1) p = "int"; break; + /* freebsd32_fspacectl */ + case 580: + if (ndx == 0 || ndx == 1) + p = "int"; + break; default: break; }; Index: sys/compat/freebsd32/syscalls.master =================================================================== --- sys/compat/freebsd32/syscalls.master +++ sys/compat/freebsd32/syscalls.master @@ -1174,5 +1174,9 @@ struct aiocb32 *aiocbp); } 579 AUE_AIO_READV STD { int freebsd32_aio_readv( \ struct aiocb32 *aiocbp); } +580 AUE_FSPACECTL STD { int freebsd32_fspacectl(int fd, \ + int cmd, \ + struct spacectl_range *range,\ + int flags); } ; vim: syntax=off Index: sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c =================================================================== --- sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c +++ sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c @@ -3797,6 +3797,49 @@ return (error); } +/* + * common code for zfs_space-related operations + * + * This function is called from zfs_space and zfs_deallocate. + */ +static int +zfs_space_common(znode_t *zp, int cmd, off_t off, off_t len, int flag, + cred_t *cr) +{ + zfsvfs_t *zfsvfs = ZTOZSB(zp); + uint64_t uoff, ulen; + int error; + + if (cmd != F_FREESP) + return (EINVAL); + + /* + * Callers might not be able to detect properly that we are read-only, + * so check it explicitly here. + */ + if (zfs_is_readonly(zfsvfs)) + return (EROFS); + + if (len < 0) + return (EINVAL); + + /* + * Permissions aren't checked on Solaris because on this OS + * zfs_space() can only be called with an opened file handle. + * On Linux we can get here through truncate_range() which + * operates directly on inodes, so we need to check access rights. + */ + if ((error = zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr))) + return (error); + + uoff = off; + ulen = len; + + error = zfs_freesp(zp, uoff, ulen, flag, TRUE); + + return (error); +} + /* * Free or allocate space in a file. Currently, this function only * supports the `F_FREESP' command. However, this command is somewhat @@ -3821,47 +3864,16 @@ offset_t offset, cred_t *cr) { zfsvfs_t *zfsvfs = ZTOZSB(zp); - uint64_t off, len; + off_t off, len; int error; ZFS_ENTER(zfsvfs); ZFS_VERIFY_ZP(zp); - if (cmd != F_FREESP) { - ZFS_EXIT(zfsvfs); - return (SET_ERROR(EINVAL)); - } - - /* - * Callers might not be able to detect properly that we are read-only, - * so check it explicitly here. - */ - if (zfs_is_readonly(zfsvfs)) { - ZFS_EXIT(zfsvfs); - return (SET_ERROR(EROFS)); - } - - if (bfp->l_len < 0) { - ZFS_EXIT(zfsvfs); - return (SET_ERROR(EINVAL)); - } - - /* - * Permissions aren't checked on Solaris because on this OS - * zfs_space() can only be called with an opened file handle. - * On Linux we can get here through truncate_range() which - * operates directly on inodes, so we need to check access rights. - */ - if ((error = zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr))) { - ZFS_EXIT(zfsvfs); - return (error); - } - off = bfp->l_start; len = bfp->l_len; /* 0 means from off to end of file */ - error = zfs_freesp(zp, off, len, flag, TRUE); - + error = zfs_space_common(zp, cmd, off, len, flag, cr); ZFS_EXIT(zfsvfs); return (error); } @@ -5189,6 +5201,9 @@ case _PC_NAME_MAX: *ap->a_retval = NAME_MAX; return (0); + case _PC_FDEALLOC_PRESENT: + *ap->a_retval = 1; + return (0); case _PC_PIPE_BUF: if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO) { *ap->a_retval = PIPE_BUF; @@ -5781,6 +5796,44 @@ return (error); } +/* + * This is mostly the same as zfs_space except it also limits the range of + * operation to zp->z_size if SPACECTL_F_CANEXTEND is not specified. The log + * record of zfs_space during replay would be the same as ordinary zfs_space. + */ +static int +zfs_deallocate(struct vop_deallocate_args *ap) +{ + vnode_t *vp; + znode_t *zp; + zfsvfs_t *zfsvfs; + off_t offset, len; + int error; + + vp = ap->a_vp; + zp = VTOZ(vp); + zfsvfs = ZTOZSB(zp); + + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp); + + offset = *ap->a_offset; + len = *ap->a_len; + + if ((ap->a_flags & SPACECTL_F_CANEXTEND) == 0) + len = omin(len, zp->z_size - offset); + if (len == 0) { + error = 0; + goto out; + } + error = zfs_space_common(zp, F_FREESP, offset, len, O_RDWR, ap->a_cred); +out: + if (__predict_true(error == 0)) + *ap->a_len = 0; + ZFS_EXIT(zfsvfs); + return (error); +} + struct vop_vector zfs_vnodeops; struct vop_vector zfs_fifoops; struct vop_vector zfs_shareops; @@ -5798,6 +5851,7 @@ .vop_fplookup_symlink = zfs_freebsd_fplookup_symlink, .vop_access = zfs_freebsd_access, .vop_allocate = VOP_EINVAL, + .vop_deallocate = zfs_deallocate, .vop_lookup = zfs_cache_lookup, .vop_cachedlookup = zfs_freebsd_cachedlookup, .vop_getattr = zfs_freebsd_getattr, Index: sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c =================================================================== --- sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c +++ sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c @@ -1449,14 +1449,8 @@ error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, off, len); - if (error == 0) { - /* - * In FreeBSD we cannot free block in the middle of a file, - * but only at the end of a file, so this code path should - * never happen. - */ - vnode_pager_setsize(ZTOV(zp), off); - } + if (error == 0) + vnode_pager_purge_range(ZTOV(zp), off, off + len); zfs_rangelock_exit(lr); Index: sys/fs/nfsclient/nfs_clvnops.c =================================================================== --- sys/fs/nfsclient/nfs_clvnops.c +++ sys/fs/nfsclient/nfs_clvnops.c @@ -3583,6 +3583,12 @@ struct nfsmount *nmp; int attrflag, error, ret; + if ((ap->a_flags & SPACECTL_F_CANEXTEND) == 0) + *ap->a_len = omin( + *ap->a_len, VTONFS(vp)->n_size - *ap->a_offset); + if (*ap->a_len == 0) + return (0); + attrflag = 0; nmp = VFSTONFS(vp->v_mount); mtx_lock(&nmp->nm_mtx); @@ -3596,7 +3602,7 @@ error = ncl_flush(vp, MNT_WAIT, td, 1, 0); if (error == 0) error = nfsrpc_allocate(vp, *ap->a_offset, *ap->a_len, - &nfsva, &attrflag, td->td_ucred, td, NULL); + &nfsva, &attrflag, ap->a_cred, td, NULL); if (error == 0) { *ap->a_offset += *ap->a_len; *ap->a_len = 0; Index: sys/fs/nfsserver/nfs_nfsdport.c =================================================================== --- sys/fs/nfsserver/nfs_nfsdport.c +++ sys/fs/nfsserver/nfs_nfsdport.c @@ -6329,7 +6329,8 @@ */ trycnt = 0; while (error == 0 && len > 0 && trycnt++ < 20) - error = VOP_ALLOCATE(vp, &off, &len); + error = VOP_ALLOCATE(vp, &off, &len, + SPACECTL_F_CANEXTEND, curthread->td_ucred); if (error == 0 && len > 0) error = NFSERR_IO; NFSEXITCODE(error); Index: sys/kern/init_sysent.c =================================================================== --- sys/kern/init_sysent.c +++ sys/kern/init_sysent.c @@ -635,4 +635,5 @@ { .sy_narg = AS(__specialfd_args), .sy_call = (sy_call_t *)sys___specialfd, .sy_auevent = AUE_SPECIALFD, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 577 = __specialfd */ { .sy_narg = AS(aio_writev_args), .sy_call = (sy_call_t *)sys_aio_writev, .sy_auevent = AUE_AIO_WRITEV, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 578 = aio_writev */ { .sy_narg = AS(aio_readv_args), .sy_call = (sy_call_t *)sys_aio_readv, .sy_auevent = AUE_AIO_READV, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 579 = aio_readv */ + { .sy_narg = AS(fspacectl_args), .sy_call = (sy_call_t *)sys_fspacectl, .sy_auevent = AUE_FSPACECTL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 580 = fspacectl */ }; Index: sys/kern/sys_generic.c =================================================================== --- sys/kern/sys_generic.c +++ sys/kern/sys_generic.c @@ -855,8 +855,62 @@ goto out; } - error = fo_fallocate(fp, offset, len, td); - out: + error = fo_fspacectl(fp, SPACECTL_ALLOC, offset, len, + SPACECTL_F_CANEXTEND, td->td_ucred, td); +out: + fdrop(fp, td); + return (error); +} + +int +sys_fspacectl(struct thread *td, struct fspacectl_args *uap) +{ + struct spacectl_range range; + int error; + + error = copyin(uap->range, &range, sizeof(range)); + if (error != 0) + return (error); + + error = kern_fspacectl(td, uap->fd, uap->cmd, &range, uap->flags); + return (error); +} + +int +kern_fspacectl(struct thread *td, int fd, int cmd, struct spacectl_range *range, + int flags) +{ + struct file *fp; + off_t offset; + int error; + + offset = range->r_len; + + AUDIT_ARG_FD(fd); + AUDIT_ARG_CMD(cmd); + AUDIT_ARG_FFLAGS(flags); + + if ((cmd != SPACECTL_ALLOC && cmd != SPACECTL_DEALLOC) || + (range->r_offset < 0 || range->r_len < 0) || + (flags & ~SPACECTL_F_SUPPORTED)) + return (EINVAL); + + error = fget(td, fd, &cap_pwrite_rights, &fp); + if (error != 0) + return (error); + AUDIT_ARG_FILE(td->td_proc, fp); + if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { + error = ESPIPE; + goto out; + } + if ((fp->f_flag & FWRITE) == 0) { + error = EBADF; + goto out; + } + + error = fo_fspacectl(fp, cmd, range->r_offset, range->r_len, flags, + td->td_ucred, td); +out: fdrop(fp, td); return (error); } Index: sys/kern/syscalls.c =================================================================== --- sys/kern/syscalls.c +++ sys/kern/syscalls.c @@ -586,4 +586,5 @@ "__specialfd", /* 577 = __specialfd */ "aio_writev", /* 578 = aio_writev */ "aio_readv", /* 579 = aio_readv */ + "fspacectl", /* 580 = fspacectl */ }; Index: sys/kern/syscalls.master =================================================================== --- sys/kern/syscalls.master +++ sys/kern/syscalls.master @@ -3238,6 +3238,14 @@ _Inout_ struct aiocb *aiocbp ); } +580 AUE_FSPACECTL STD { + int fspacectl( + int fd, + int cmd, + _In_ struct spacectl_range *range, + int flags + ); + } ; Please copy any additions and changes to the following compatability tables: ; sys/compat/freebsd32/syscalls.master Index: sys/kern/systrace_args.c =================================================================== --- sys/kern/systrace_args.c +++ sys/kern/systrace_args.c @@ -3399,6 +3399,16 @@ *n_args = 1; break; } + /* fspacectl */ + case 580: { + struct fspacectl_args *p = params; + iarg[0] = p->fd; /* int */ + iarg[1] = p->cmd; /* int */ + uarg[2] = (intptr_t) p->range; /* struct spacectl_range * */ + iarg[3] = p->flags; /* int */ + *n_args = 4; + break; + } default: *n_args = 0; break; @@ -9088,6 +9098,25 @@ break; }; break; + /* fspacectl */ + case 580: + switch(ndx) { + case 0: + p = "int"; + break; + case 1: + p = "int"; + break; + case 2: + p = "userland struct spacectl_range *"; + break; + case 3: + p = "int"; + break; + default: + break; + }; + break; default: break; }; @@ -11034,6 +11063,11 @@ if (ndx == 0 || ndx == 1) p = "int"; break; + /* fspacectl */ + case 580: + if (ndx == 0 || ndx == 1) + p = "int"; + break; default: break; }; Index: sys/kern/uipc_shm.c =================================================================== --- sys/kern/uipc_shm.c +++ sys/kern/uipc_shm.c @@ -145,7 +145,7 @@ static fo_mmap_t shm_mmap; static fo_get_seals_t shm_get_seals; static fo_add_seals_t shm_add_seals; -static fo_fallocate_t shm_fallocate; +static fo_fspacectl_t shm_fspacectl; /* File descriptor operations. */ struct fileops shm_ops = { @@ -165,7 +165,7 @@ .fo_mmap = shm_mmap, .fo_get_seals = shm_get_seals, .fo_add_seals = shm_add_seals, - .fo_fallocate = shm_fallocate, + .fo_fspacectl = shm_fspacectl, .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE, }; @@ -1875,7 +1875,56 @@ } static int -shm_fallocate(struct file *fp, off_t offset, off_t len, struct thread *td) +shm_deallocate(struct shmfd *shmfd, off_t offset, off_t len, int flags, + void *rl_cookie, struct thread *td) +{ + vm_pindex_t start, start2, end; + vm_ooffset_t size; + vm_page_t m; + int error; + + start = OFF_TO_IDX(offset); + start2 = OFF_TO_IDX(offset + PAGE_MASK); + end = OFF_TO_IDX(offset + len); + size = offset + len; + error = 0; + + VM_OBJECT_WLOCK(shmfd->shm_object); + + if (start2 < end) + vm_object_page_remove(shmfd->shm_object, start2, end, 0); + if (len > OFF_MAX - offset) + len = OFF_MAX - offset; + + if ((offset & PAGE_MASK) != offset) { + m = vm_page_grab(shmfd->shm_object, start, VM_ALLOC_NOCREAT); + if (m != NULL) { + pmap_zero_page_area(m, offset & PAGE_MASK, + PAGE_SIZE - (offset & PAGE_MASK)); + vm_page_set_dirty(m); + vm_page_xunbusy(m); + } + } + if ((size & PAGE_MASK) != size) { + m = vm_page_grab(shmfd->shm_object, end, VM_ALLOC_NOCREAT); + if (m != NULL) { + pmap_zero_page_area(m, 0, offset + len & PAGE_MASK); + vm_page_set_dirty(m); + vm_page_xunbusy(m); + } + } + if (size > shmfd->shm_size && flags & SPACECTL_F_CANEXTEND) + error = shm_largepage(shmfd) ? shm_dotruncate_largepage(shmfd, + size, rl_cookie) : shm_dotruncate_locked(shmfd, size, + rl_cookie); + + VM_OBJECT_WUNLOCK(shmfd->shm_object); + return (error); +} + +static int +shm_fspacectl(struct file *fp, int cmd, off_t offset, off_t len, int flags, + struct ucred *active_cred, struct thread *td) { void *rl_cookie; struct shmfd *shmfd; @@ -1887,6 +1936,14 @@ shmfd = fp->f_data; size = offset + len; + if (cmd != SPACECTL_ALLOC && cmd != SPACECTL_DEALLOC) + return (EINVAL); + if (offset < 0 || len < 0 || (flags & ~SPACECTL_F_SUPPORTED) != 0) + return (EINVAL); + if (len == 0) + /* Degenerated case */ + return (0); + /* * Just grab the rangelock for the range that we may be attempting to * grow, rather than blocking read/write for regions we won't be @@ -1898,8 +1955,17 @@ */ rl_cookie = rangelock_wlock(&shmfd->shm_rl, offset, size, &shmfd->shm_mtx); - if (size > shmfd->shm_size) - error = shm_dotruncate_cookie(shmfd, size, rl_cookie); + switch (cmd) { + case SPACECTL_ALLOC: + if (size > shmfd->shm_size && flags & SPACECTL_F_CANEXTEND) + error = shm_dotruncate_cookie(shmfd, size, rl_cookie); + break; + case SPACECTL_DEALLOC: + error = shm_deallocate(shmfd, offset, len, flags, rl_cookie, td); + break; + default: + panic("%s: unknown cmd %d", __func__, cmd); + } rangelock_unlock(&shmfd->shm_rl, rl_cookie, &shmfd->shm_mtx); /* Translate to posix_fallocate(2) return value as needed. */ if (error == ENOMEM) Index: sys/kern/vfs_default.c =================================================================== --- sys/kern/vfs_default.c +++ sys/kern/vfs_default.c @@ -92,6 +92,7 @@ static int vop_stdgetpages_async(struct vop_getpages_async_args *ap); static int vop_stdread_pgcache(struct vop_read_pgcache_args *ap); static int vop_stdstat(struct vop_stat_args *ap); +static int vop_stddeallocate(struct vop_deallocate_args *ap); /* * This vnode table stores what we want to do if the filesystem doesn't @@ -116,6 +117,7 @@ .vop_advlockasync = vop_stdadvlockasync, .vop_advlockpurge = vop_stdadvlockpurge, .vop_allocate = vop_stdallocate, + .vop_deallocate = vop_stddeallocate, .vop_bmap = vop_stdbmap, .vop_close = VOP_NULL, .vop_fsync = VOP_NULL, @@ -501,6 +503,7 @@ case _PC_ACL_EXTENDED: case _PC_ACL_NFS4: case _PC_CAP_PRESENT: + case _PC_FDEALLOC_PRESENT: case _PC_INF_PRESENT: case _PC_MAC_PRESENT: *ap->a_retval = 0; @@ -956,6 +959,7 @@ uint8_t *buf; struct thread *td; struct vnode *vp; + struct ucred *cred; size_t iosize; int error; @@ -966,8 +970,9 @@ vp = ap->a_vp; len = *ap->a_len; offset = *ap->a_offset; + cred = ap->a_cred; - error = VOP_GETATTR(vp, vap, td->td_ucred); + error = VOP_GETATTR(vp, vap, cred); if (error != 0) goto out; fsize = vap->va_size; @@ -991,29 +996,36 @@ if (error != 0) goto out; if (maxfilesize) { - if (offset > maxfilesize || len > maxfilesize || - offset + len > maxfilesize) { + if ((offset > maxfilesize || len > maxfilesize || + offset + len > maxfilesize) && + (ap->a_flags & SPACECTL_F_CANEXTEND) == 0) { error = EFBIG; goto out; } } else #endif - if (offset + len > vap->va_size) { + if ((u_quad_t)offset + len > vap->va_size && + ap->a_flags & SPACECTL_F_CANEXTEND) { /* * Test offset + len against the filesystem's maxfilesize. */ VATTR_NULL(vap); vap->va_size = offset + len; - error = VOP_SETATTR(vp, vap, td->td_ucred); + error = VOP_SETATTR(vp, vap, cred); if (error != 0) goto out; VATTR_NULL(vap); vap->va_size = fsize; - error = VOP_SETATTR(vp, vap, td->td_ucred); + error = VOP_SETATTR(vp, vap, cred); if (error != 0) goto out; } + if ((ap->a_flags & SPACECTL_F_CANEXTEND) == 0) + len = omin(len, vap->va_size - offset); + if (len == 0) + goto out; + for (;;) { /* * Read and write back anything below the nominal file @@ -1035,7 +1047,7 @@ auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_READ; auio.uio_td = td; - error = VOP_READ(vp, &auio, 0, td->td_ucred); + error = VOP_READ(vp, &auio, 0, cred); if (error != 0) break; if (auio.uio_resid > 0) { @@ -1056,7 +1068,7 @@ auio.uio_rw = UIO_WRITE; auio.uio_td = td; - error = VOP_WRITE(vp, &auio, 0, td->td_ucred); + error = VOP_WRITE(vp, &auio, 0, cred); if (error != 0) break; @@ -1075,6 +1087,142 @@ return (error); } +static int +vp_zerofill(struct vnode *vp, struct vattr *vap, off_t offset, off_t len, + off_t *residp, struct ucred *cred) +{ + int iosize; + int error = 0; + void *buf = NULL; + struct iovec aiov; + struct uio auio; + struct thread *td; + + iosize = vap->va_blocksize; + td = curthread; + + if (iosize == 0) + iosize = BLKDEV_IOSIZE; + if (iosize > MAXPHYS) + iosize = MAXPHYS; + buf = malloc(iosize, M_TEMP, M_ZERO | M_WAITOK); + + while (len > 0) { + int xfersize = iosize; + if (offset % iosize != 0) + xfersize -= offset % iosize; + if (xfersize > len) + xfersize = len; + + aiov.iov_base = buf; + aiov.iov_len = xfersize; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_offset = offset; + auio.uio_resid = xfersize; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_rw = UIO_WRITE; + auio.uio_td = td; + + error = VOP_WRITE(vp, &auio, 0, cred); + if (error != 0) { + len -= xfersize - auio.uio_resid; + break; + } + + len -= xfersize; + offset += xfersize; + + maybe_yield(); + } + + free(buf, M_TEMP); + *residp = len; + return (error); +} + +static int +vop_stddeallocate(struct vop_deallocate_args *ap) +{ + struct vnode *vp; + off_t offset, len; + struct ucred *cred; + int error; + struct vattr va; + off_t noff, xfersize, rem; + bool extended; + + vp = ap->a_vp; + offset = *ap->a_offset; + len = *ap->a_len; + cred = ap->a_cred; + extended = false; + + error = VOP_GETATTR(vp, &va, cred); + if (error) + return (error); + + /* Extend the file if requested */ + if (len != 0 && (ap->a_flags & SPACECTL_F_CANEXTEND) && + (u_quad_t)offset + len > va.va_size) { + error = vn_truncate_locked(vp, offset + len, false, cred); + if (error != 0) + goto out; + + xfersize = offset + len - va.va_size; + len -= xfersize; + extended = true; + } + while (len > 0) { + noff = offset; + error = vn_bmap_seekhole_locked(vp, FIOSEEKDATA, &noff, cred); + if (error) { + if (error != ENXIO) + /* XXX: Is it okay to fallback further? */ + goto out; + + /* + * No more data region to be filled + */ + len = 0; + error = 0; + break; + } + KASSERT(noff >= offset, ("FIOSEEKDATA going backward")); + if (noff != offset) { + xfersize = omin(noff - offset, len); + len -= xfersize; + offset += xfersize; + if (len == 0) + break; + } + error = vn_bmap_seekhole_locked(vp, FIOSEEKHOLE, &noff, cred); + if (error) + goto out; + + /* Fill zeroes */ + xfersize = omin(noff - offset, len); + error = vp_zerofill(vp, &va, offset, xfersize, &rem, cred); + if (error) { + len -= xfersize - rem; + offset += xfersize - rem; + goto out; + } + + len -= xfersize; + offset += xfersize; + if (should_yield()) + break; + } +out: + if (error == 0) { + *ap->a_offset = offset; + *ap->a_len = len; + } else if (extended) + vn_truncate_locked(vp, va.va_size, false, cred); + return (error); +} + int vop_stdadvise(struct vop_advise_args *ap) { Index: sys/kern/vfs_vnops.c =================================================================== --- sys/kern/vfs_vnops.c +++ sys/kern/vfs_vnops.c @@ -105,7 +105,7 @@ static fo_stat_t vn_statfile; static fo_close_t vn_closefile; static fo_mmap_t vn_mmap; -static fo_fallocate_t vn_fallocate; +static fo_fspacectl_t vn_fspacectl; struct fileops vnops = { .fo_read = vn_io_fault, @@ -122,7 +122,7 @@ .fo_seek = vn_seek, .fo_fill_kinfo = vn_fill_kinfo, .fo_mmap = vn_mmap, - .fo_fallocate = vn_fallocate, + .fo_fspacectl = vn_fspacectl, .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE }; @@ -2345,7 +2345,8 @@ } int -vn_bmap_seekhole(struct vnode *vp, u_long cmd, off_t *off, struct ucred *cred) +vn_bmap_seekhole_locked(struct vnode *vp, u_long cmd, off_t *off, + struct ucred *cred) { struct vattr va; daddr_t bn, bnp; @@ -2353,22 +2354,17 @@ off_t noff; int error; - KASSERT(cmd == FIOSEEKHOLE || cmd == FIOSEEKDATA, - ("Wrong command %lu", cmd)); - - if (vn_lock(vp, LK_SHARED) != 0) - return (EBADF); if (vp->v_type != VREG) { error = ENOTTY; - goto unlock; + goto out; } error = VOP_GETATTR(vp, &va, cred); if (error != 0) - goto unlock; + goto out; noff = *off; if (noff >= va.va_size) { error = ENXIO; - goto unlock; + goto out; } bsize = vp->v_mount->mnt_stat.f_iosize; for (bn = noff / bsize; noff < va.va_size; bn++, noff += bsize - @@ -2376,14 +2372,14 @@ error = VOP_BMAP(vp, bn, NULL, &bnp, NULL, NULL); if (error == EOPNOTSUPP) { error = ENOTTY; - goto unlock; + goto out; } if ((bnp == -1 && cmd == FIOSEEKHOLE) || (bnp != -1 && cmd == FIOSEEKDATA)) { noff = bn * bsize; if (noff < *off) noff = *off; - goto unlock; + goto out; } } if (noff > va.va_size) @@ -2391,13 +2387,27 @@ /* noff == va.va_size. There is an implicit hole at the end of file. */ if (cmd == FIOSEEKDATA) error = ENXIO; -unlock: - VOP_UNLOCK(vp); +out: if (error == 0) *off = noff; return (error); } +int +vn_bmap_seekhole(struct vnode *vp, u_long cmd, off_t *off, struct ucred *cred) +{ + int error; + + KASSERT(cmd == FIOSEEKHOLE || cmd == FIOSEEKDATA, + ("Wrong command %lu", cmd)); + + if (vn_lock(vp, LK_SHARED) != 0) + return (EBADF); + error = vn_bmap_seekhole_locked(vp, cmd, off, cred); + VOP_UNLOCK(vp); + return (error); +} + int vn_seek(struct file *fp, off_t offset, int whence, struct thread *td) { @@ -3283,7 +3293,8 @@ } static int -vn_fallocate(struct file *fp, off_t offset, off_t len, struct thread *td) +vn_fallocate(struct file *fp, off_t offset, off_t len, int flags, + struct ucred *active_cred, struct thread *td) { struct mount *mp; struct vnode *vp; @@ -3294,8 +3305,17 @@ #endif vp = fp->f_vnode; + error = 0; + + if (offset < 0 || len < 0 || (flags & ~SPACECTL_F_SUPPORTED) != 0) + return (EINVAL); if (vp->v_type != VREG) return (ENODEV); + if (len == 0) + /* Degenerated case */ + return (0); + + len = omin(len, OFF_MAX - offset); /* Allocating blocks may take a long time, so iterate. */ for (;;) { @@ -3319,17 +3339,14 @@ } #endif #ifdef MAC - error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); + error = mac_vnode_check_write(active_cred, fp->f_cred, vp); if (error == 0) #endif - error = VOP_ALLOCATE(vp, &offset, &len); + error = VOP_ALLOCATE(vp, &offset, &len, flags, + active_cred); VOP_UNLOCK(vp); vn_finished_write(mp); - if (olen + ooffset != offset + len) { - panic("offset + len changed from %jx/%jx to %jx/%jx", - ooffset, olen, offset, len); - } if (error != 0 || len == 0) break; KASSERT(olen > len, ("Iteration did not make progress?")); @@ -3339,6 +3356,130 @@ return (error); } +static int +vn_deallocate_impl(struct vnode *vp, off_t offset, off_t len, int flags, int ioflg, + bool may_audit, struct ucred *active_cred, struct ucred *file_cred, + struct thread *td) +{ + struct mount *mp; + void *rl_cookie; + int lock_flags; + int error; +#ifdef AUDIT + int audited_vnode1 = 0; +#endif + + rl_cookie = NULL; + error = 0; + + if (offset < 0 || len < 0 || (flags & ~SPACECTL_F_SUPPORTED) != 0) + return (EINVAL); + if (vp->v_type != VREG) + return (ENODEV); + if (len == 0) + /* Degenerated case */ + return (0); + + /* Take the maximum range if end offset overflows */ + len = omin(len, OFF_MAX - offset); + + while (len > 0) { + /* + * Try to deallocate the longest range in one pass. + * In case a pass takes too long to be executed, it returns + * partial result. The residue will be proceeded in the next + * pass. + */ + + mp = NULL; + + bwillwrite(); + + if ((ioflg & IO_NODELOCKED) == 0) { + if ((ioflg & IO_RANGELOCKED) == 0 && + rl_cookie == NULL) { + rl_cookie = vn_rangelock_wlock(vp, offset, + offset + len); + if ((error = vn_start_write(vp, &mp, + V_WAIT | PCATCH)) != 0) + goto out; + } + + if ((flags & SPACECTL_F_CANEXTEND) == 0 && + (MNT_SHARED_WRITES(mp) || + (mp == NULL && MNT_SHARED_WRITES(vp->v_mount)))) { + lock_flags = LK_SHARED; + } else { + lock_flags = LK_EXCLUSIVE; + } + vn_lock(vp, lock_flags | LK_RETRY); +#ifdef AUDIT + if (may_audit && !audited_vnode1) { + AUDIT_ARG_VNODE1(vp); + audited_vnode1 = 1; + } +#endif + } + +#ifdef MAC + if ((ioflg & IO_NOMACCHECK) == 0) + error = mac_vnode_check_write(active_cred, file_cred, + vp); +#endif + if (error == 0) + error = VOP_DEALLOCATE(vp, &offset, &len, flags, + active_cred); + + if ((ioflg & IO_NODELOCKED) == 0) { + VOP_UNLOCK(vp); + if (mp != NULL) + vn_finished_write(mp); + } + + if (error != 0) + break; + } +out: + if (rl_cookie != NULL) + vn_rangelock_unlock(vp, rl_cookie); + return (error); +} + +int +vn_deallocate(struct vnode *vp, off_t offset, off_t len, int flags, int ioflg, + struct ucred *active_cred, struct ucred *file_cred, struct thread *td) +{ + return (vn_deallocate_impl(vp, offset, len, flags, ioflg, false, + active_cred, file_cred, td)); +} + +static int +vn_fspacectl(struct file *fp, int cmd, off_t offset, off_t len, int flags, + struct ucred *active_cred, struct thread *td) +{ + int error; + struct vnode *vp; + + vp = fp->f_vnode; + + if (cmd != SPACECTL_ALLOC && cmd != SPACECTL_DEALLOC) + return (EINVAL); + + switch (cmd) { + case SPACECTL_ALLOC: + error = vn_fallocate(fp, offset, len, flags, active_cred, td); + break; + case SPACECTL_DEALLOC: + error = vn_deallocate_impl(vp, offset, len, flags, 0, true, + active_cred, fp->f_cred, td); + break; + default: + panic("vn_fspacectl: unknown cmd %d", cmd); + } + + return (error); +} + static u_long vn_lock_pair_pause_cnt; SYSCTL_ULONG(_debug, OID_AUTO, vn_lock_pair_pause, CTLFLAG_RD, &vn_lock_pair_pause_cnt, 0, Index: sys/kern/vnode_if.src =================================================================== --- sys/kern/vnode_if.src +++ sys/kern/vnode_if.src @@ -703,6 +703,8 @@ IN struct vnode *vp; INOUT off_t *offset; INOUT off_t *len; + IN int flags; + IN struct ucred *cred; }; @@ -792,6 +794,17 @@ }; +%% deallocate vp L L L + +vop_deallocate { + IN struct vnode *vp; + INOUT off_t *offset; + INOUT off_t *len; + IN int flags; + IN struct ucred *cred; +}; + + # The VOPs below are spares at the end of the table to allow new VOPs to be # added in stable branches without breaking the KBI. New VOPs in HEAD should # be added above these spares. When merging a new VOP to a stable branch, Index: sys/security/audit/audit_bsm.c =================================================================== --- sys/security/audit/audit_bsm.c +++ sys/security/audit/audit_bsm.c @@ -1076,6 +1076,18 @@ FD_VNODE1_TOKENS; break; + case AUE_FSPACECTL: + if (ARG_IS_VALID(kar, ARG_CMD)) { + tok = au_to_arg32(2, "operation", ar->ar_arg_cmd); + kau_write(rec, tok); + } + if (ARG_IS_VALID(kar, ARG_FFLAGS)) { + tok = au_to_arg32(4, "flags", ar->ar_arg_fflags); + kau_write(rec, tok); + } + FD_VNODE1_TOKENS; + break; + case AUE_RFORK: if (ARG_IS_VALID(kar, ARG_FFLAGS)) { tok = au_to_arg32(1, "flags", ar->ar_arg_fflags); Index: sys/sys/fcntl.h =================================================================== --- sys/sys/fcntl.h +++ sys/sys/fcntl.h @@ -314,6 +314,14 @@ short l_type; /* lock type: read/write, etc. */ short l_whence; /* type of l_start */ }; + +/* + * Space control offset/length description + */ +struct spacectl_range { + off_t r_offset; /* starting offset */ + off_t r_len; /* length */ +}; #endif #if __BSD_VISIBLE @@ -343,6 +351,18 @@ * similar syscalls. */ #define FD_NONE -200 + +/* + * Commands for fspacectl(2) + */ +#define SPACECTL_DEALLOC 0 /* deallocate space */ +#define SPACECTL_ALLOC 1 /* allocate space */ + +/* + * fspacectl(2) flags + */ +#define SPACECTL_F_CANEXTEND (1) +#define SPACECTL_F_SUPPORTED (SPACECTL_F_CANEXTEND) #endif #ifndef _KERNEL @@ -360,6 +380,9 @@ int posix_fadvise(int, off_t, off_t, int); int posix_fallocate(int, off_t, off_t); #endif +#if __BSD_VISIBLE +int fspacectl(int, int, struct spacectl_range *, int); +#endif __END_DECLS #endif Index: sys/sys/file.h =================================================================== --- sys/sys/file.h +++ sys/sys/file.h @@ -35,6 +35,7 @@ #ifndef _SYS_FILE_H_ #define _SYS_FILE_H_ +#include "sys/fcntl.h" #ifndef _KERNEL #include /* XXX */ #include @@ -127,8 +128,9 @@ typedef int fo_aio_queue_t(struct file *fp, struct kaiocb *job); typedef int fo_add_seals_t(struct file *fp, int flags); typedef int fo_get_seals_t(struct file *fp, int *flags); -typedef int fo_fallocate_t(struct file *fp, off_t offset, off_t len, - struct thread *td); +typedef int fo_fspacectl_t(struct file *fp, int cmd, + off_t offset, off_t len, int flags, + struct ucred *active_cred, struct thread *td); typedef int fo_flags_t; struct fileops { @@ -149,7 +151,7 @@ fo_aio_queue_t *fo_aio_queue; fo_add_seals_t *fo_add_seals; fo_get_seals_t *fo_get_seals; - fo_fallocate_t *fo_fallocate; + fo_fspacectl_t *fo_fspacectl; fo_flags_t fo_flags; /* DFLAG_* below */ }; @@ -461,15 +463,17 @@ return ((*fp->f_ops->fo_get_seals)(fp, seals)); } -static __inline int -fo_fallocate(struct file *fp, off_t offset, off_t len, struct thread *td) +static __inline int fo_fspacectl(struct file *fp, int cmd, off_t offset, + off_t len, int flags, struct ucred *active_cred, struct thread *td) { - if (fp->f_ops->fo_fallocate == NULL) + if (fp->f_ops->fo_fspacectl == NULL) return (ENODEV); - return ((*fp->f_ops->fo_fallocate)(fp, offset, len, td)); + return ((*fp->f_ops->fo_fspacectl)(fp, cmd, offset, len, flags, + active_cred, td)); } + #endif /* _KERNEL */ #endif /* !SYS_FILE_H */ Index: sys/sys/syscall.h =================================================================== --- sys/sys/syscall.h +++ sys/sys/syscall.h @@ -515,4 +515,5 @@ #define SYS___specialfd 577 #define SYS_aio_writev 578 #define SYS_aio_readv 579 -#define SYS_MAXSYSCALL 580 +#define SYS_fspacectl 580 +#define SYS_MAXSYSCALL 581 Index: sys/sys/syscall.mk =================================================================== --- sys/sys/syscall.mk +++ sys/sys/syscall.mk @@ -420,4 +420,5 @@ rpctls_syscall.o \ __specialfd.o \ aio_writev.o \ - aio_readv.o + aio_readv.o \ + fspacectl.o Index: sys/sys/syscallsubr.h =================================================================== --- sys/sys/syscallsubr.h +++ sys/sys/syscallsubr.h @@ -59,6 +59,7 @@ struct sched_param; union semun; struct sockaddr; +struct spacectl_range; struct stat; struct thr_param; struct timex; @@ -230,6 +231,10 @@ int advice); int kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len); +int kern_fdeallocate(struct thread *td, int fd, off_t offset, off_t len); +int kern_fzerorange(struct thread *td, int fd, off_t offset, off_t len); +int kern_fspacectl(struct thread *td, int fd, int cmd, + struct spacectl_range *, int flags); int kern_procctl(struct thread *td, enum idtype idtype, id_t id, int com, void *data); int kern_pread(struct thread *td, int fd, void *buf, size_t nbyte, Index: sys/sys/sysproto.h =================================================================== --- sys/sys/sysproto.h +++ sys/sys/sysproto.h @@ -1847,6 +1847,12 @@ struct aio_readv_args { char aiocbp_l_[PADL_(struct aiocb *)]; struct aiocb * aiocbp; char aiocbp_r_[PADR_(struct aiocb *)]; }; +struct fspacectl_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char cmd_l_[PADL_(int)]; int cmd; char cmd_r_[PADR_(int)]; + char range_l_[PADL_(struct spacectl_range *)]; struct spacectl_range * range; char range_r_[PADR_(struct spacectl_range *)]; + char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; +}; int nosys(struct thread *, struct nosys_args *); void sys_sys_exit(struct thread *, struct sys_exit_args *); int sys_fork(struct thread *, struct fork_args *); @@ -2241,6 +2247,7 @@ int sys___specialfd(struct thread *, struct __specialfd_args *); int sys_aio_writev(struct thread *, struct aio_writev_args *); int sys_aio_readv(struct thread *, struct aio_readv_args *); +int sys_fspacectl(struct thread *, struct fspacectl_args *); #ifdef COMPAT_43 @@ -3175,6 +3182,7 @@ #define SYS_AUE___specialfd AUE_SPECIALFD #define SYS_AUE_aio_writev AUE_AIO_WRITEV #define SYS_AUE_aio_readv AUE_AIO_READV +#define SYS_AUE_fspacectl AUE_FSPACECTL #undef PAD_ #undef PADL_ Index: sys/sys/unistd.h =================================================================== --- sys/sys/unistd.h +++ sys/sys/unistd.h @@ -156,6 +156,7 @@ #define _PC_INF_PRESENT 62 #define _PC_MAC_PRESENT 63 #define _PC_ACL_NFS4 64 +#define _PC_FDEALLOC_PRESENT 65 #endif /* From OpenSolaris, used by SEEK_DATA/SEEK_HOLE. */ Index: sys/sys/vnode.h =================================================================== --- sys/sys/vnode.h +++ sys/sys/vnode.h @@ -719,6 +719,9 @@ void vn_printf(struct vnode *vp, const char *fmt, ...) __printflike(2,3); int vrecycle(struct vnode *vp); int vrecyclel(struct vnode *vp); +/* vn_bmap_seekhole_locked is not public KPI */ +int vn_bmap_seekhole_locked(struct vnode *vp, u_long cmd, off_t *off, + struct ucred *cred); int vn_bmap_seekhole(struct vnode *vp, u_long cmd, off_t *off, struct ucred *cred); int vn_close(struct vnode *vp, @@ -727,6 +730,9 @@ struct vnode *outvp, off_t *outoffp, size_t *lenp, unsigned int flags, struct ucred *incred, struct ucred *outcred, struct thread *fsize_td); +int vn_deallocate(struct vnode *vp, off_t offset, off_t len, int flags, + int ioflg, struct ucred *active_cred, struct ucred *file_cred, + struct thread *td); void vn_finished_write(struct mount *mp); void vn_finished_secondary_write(struct mount *mp); int vn_fsync_buf(struct vnode *vp, int waitfor); Index: tests/sys/file/Makefile =================================================================== --- tests/sys/file/Makefile +++ tests/sys/file/Makefile @@ -10,6 +10,7 @@ TAP_TESTS_SH+= flock_test PLAIN_TESTS_C+= ftruncate_test PLAIN_TESTS_C+= newfileops_on_fork_test +ATF_TESTS_C+= fspacectl_test PROGS+= flock_helper Index: tests/sys/file/fspacectl_test.c =================================================================== --- /dev/null +++ tests/sys/file/fspacectl_test.c @@ -0,0 +1,522 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2021 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Andrew Turner under sponsorship from + * the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include + +#include +#include +#include + +static off_t file_max_blocks = 32; +static const char byte_to_fill = 0x5f; + +static int +fill(int fd, off_t offset, off_t len) +{ + int error; + size_t blen; + char *buf; + struct stat statbuf; + blksize_t blocksize; + + if (fstat(fd, &statbuf) == -1) + return (1); + blocksize = statbuf.st_blksize; + error = 0; + buf = malloc(blocksize); + if (buf == NULL) + return (1); + + while (len > 0) { + blen = len < (off_t)blocksize ? len : blocksize; + memset(buf, byte_to_fill, blen); + if (pwrite(fd, buf, blen, offset) != (ssize_t)blen) { + error = 1; + break; + } + len -= blen; + offset += blen; + } + + free(buf); + return (error); +} + +static blksize_t +fd_get_blksize(void) +{ + struct statfs statfsbuf; + + if (statfs(".", &statfsbuf) == -1) + return (-1); + return statfsbuf.f_iosize; +} + +static int +check_content_dealloc(int fd, off_t hole_start, off_t hole_len, off_t file_sz) +{ + int error; + size_t blen; + off_t offset, resid; + struct stat statbuf; + char *buf, *sblk; + blksize_t blocksize; + + blocksize = fd_get_blksize(); + if (blocksize == -1) + return (1); + error = 0; + buf = malloc(blocksize * 2); + if (buf == NULL) + return (1); + sblk = buf + blocksize; + + memset(sblk, 0, blocksize); + + /* + * Check hole is zeroed. + */ + offset = hole_start; + resid = hole_len; + if ((uint64_t)offset + resid > (uint64_t)file_sz) + resid = file_sz - offset; + while (resid > 0) { + blen = resid < (off_t)blocksize ? resid : blocksize; + if (pread(fd, buf, blen, offset) != (ssize_t)blen) { + error = 1; + break; + } + if (memcmp(buf, sblk, blen) != 0) { + error = 1; + break; + } + resid -= blen; + offset += blen; + } + + memset(sblk, byte_to_fill, blocksize); + + /* + * Check file region before hole is zeroed. + */ + offset = 0; + resid = hole_start; + while (resid > 0) { + blen = resid < (off_t)blocksize ? resid : blocksize; + if (pread(fd, buf, blen, offset) != (ssize_t)blen) { + error = 1; + break; + } + if (memcmp(buf, sblk, blen) != 0) { + error = 1; + break; + } + resid -= blen; + offset += blen; + } + + /* + * Check file region after hole is zeroed. + */ + offset = hole_start + hole_len; + resid = file_sz - offset; + while (resid > 0) { + blen = resid < (off_t)blocksize ? resid : blocksize; + if (pread(fd, buf, blen, offset) != (ssize_t)blen) { + error = 1; + break; + } + if (memcmp(buf, sblk, blen) != 0) { + error = 1; + break; + } + resid -= blen; + offset += blen; + } + + /* + * Check file size matches with expected file size. + */ + if (fstat(fd, &statbuf) == -1) + error = -1; + if (statbuf.st_size != file_sz) + error = -1; + + free(buf); + return (error); +} + +static int +check_hole_alloc(int fd, off_t alloc_start, off_t alloc_len, off_t file_sz) +{ + off_t dataoff, holeoff; + struct stat statbuf; + + if (alloc_start + alloc_len < alloc_start) + alloc_len = OFF_MAX - alloc_start; + + dataoff = lseek(fd, alloc_start, SEEK_DATA); + if (dataoff == -1) + return (1); + holeoff = lseek(fd, alloc_start, SEEK_HOLE); + if (holeoff == -1) + return (1); + + /* + * Check if the start offset of allocated region within file size is + * legit + */ + if (dataoff != alloc_start && dataoff < file_sz) + return (1); + /* + * Check if the end offset of allocated region within file size is + * legit + */ + if (holeoff < alloc_start + alloc_len && holeoff < file_sz) + return (1); + + /* + * Check file size matches with expected file size. + */ + if (fstat(fd, &statbuf) == -1) + return (1); + if (statbuf.st_size != file_sz) + return (1); + + return (0); +} + +/* + * Check aligned deallocation + */ +ATF_TC_WITHOUT_HEAD(aligned_dealloc); +ATF_TC_BODY(aligned_dealloc, tc) +{ + struct spacectl_range range; + blksize_t blocksize; + int fd; + + ATF_REQUIRE((blocksize = fd_get_blksize()) != -1); + range.r_offset = blocksize; + range.r_len = (file_max_blocks - 1) * blocksize - range.r_offset; + + ATF_REQUIRE((fd = open("sys_fspacectl_testfile", + O_CREAT | O_RDWR | O_TRUNC, 0600)) != -1); + ATF_REQUIRE(fill(fd, 0, file_max_blocks * blocksize) == 0); + ATF_CHECK(fspacectl(fd, SPACECTL_DEALLOC, &range, 0) == 0); + ATF_CHECK(check_content_dealloc(fd, range.r_offset, range.r_len, + file_max_blocks * blocksize) == 0); + ATF_REQUIRE(close(fd) == 0); +} + +/* + * Check unaligned deallocation + */ +ATF_TC_WITHOUT_HEAD(unaligned_dealloc); +ATF_TC_BODY(unaligned_dealloc, tc) +{ + struct spacectl_range range; + blksize_t blocksize; + int fd; + + ATF_REQUIRE((blocksize = fd_get_blksize()) != -1); + range.r_offset = blocksize / 2; + range.r_len = (file_max_blocks - 1) * blocksize + blocksize / 2 - + range.r_offset; + + ATF_REQUIRE((fd = open("sys_fspacectl_testfile", + O_CREAT | O_RDWR | O_TRUNC, 0600)) != -1); + ATF_REQUIRE(fill(fd, 0, file_max_blocks * blocksize) == 0); + ATF_CHECK(fspacectl(fd, SPACECTL_DEALLOC, &range, 0) == 0); + ATF_CHECK(check_content_dealloc(fd, range.r_offset, range.r_len, + file_max_blocks * blocksize) == 0); + ATF_REQUIRE(close(fd) == 0); +} + +/* + * Check aligned deallocation and extending + */ +ATF_TC_WITHOUT_HEAD(aligned_dealloc_canextend); +ATF_TC_BODY(aligned_dealloc_canextend, tc) +{ + struct spacectl_range range; + blksize_t blocksize; + int fd; + + ATF_REQUIRE((blocksize = fd_get_blksize()) != -1); + range.r_offset = blocksize; + range.r_len = (file_max_blocks + 1) * blocksize - range.r_offset; + + ATF_REQUIRE((fd = open("sys_fspacectl_testfile", + O_CREAT | O_RDWR | O_TRUNC, 0600)) != -1); + ATF_REQUIRE(fill(fd, 0, file_max_blocks * blocksize) == 0); + ATF_CHECK( + fspacectl(fd, SPACECTL_DEALLOC, &range, SPACECTL_F_CANEXTEND) == 0); + ATF_CHECK(check_content_dealloc(fd, range.r_offset, range.r_len, + (file_max_blocks + 1) * blocksize) == 0); + ATF_REQUIRE(close(fd) == 0); +} + +/* + * Check unaligned deallocation and extending + */ +ATF_TC_WITHOUT_HEAD(unaligned_dealloc_canextend); +ATF_TC_BODY(unaligned_dealloc_canextend, tc) +{ + struct spacectl_range range; + blksize_t blocksize; + int fd; + + ATF_REQUIRE((blocksize = fd_get_blksize()) != -1); + range.r_offset = blocksize / 2; + range.r_len = file_max_blocks * blocksize + blocksize / 2 - + range.r_offset; + + ATF_REQUIRE((fd = open("sys_fspacectl_testfile", + O_CREAT | O_RDWR | O_TRUNC, 0600)) != -1); + ATF_REQUIRE(fill(fd, 0, file_max_blocks * blocksize) == 0); + ATF_CHECK( + fspacectl(fd, SPACECTL_DEALLOC, &range, SPACECTL_F_CANEXTEND) == 0); + ATF_CHECK(check_content_dealloc(fd, range.r_offset, range.r_len, + file_max_blocks * blocksize + blocksize / 2) == 0); + ATF_REQUIRE(close(fd) == 0); +} + +/* + * Check aligned deallocation around EOF + */ +ATF_TC_WITHOUT_HEAD(aligned_dealloc_no_canextend); +ATF_TC_BODY(aligned_dealloc_no_canextend, tc) +{ + struct spacectl_range range; + blksize_t blocksize; + int fd; + + ATF_REQUIRE((blocksize = fd_get_blksize()) != -1); + range.r_offset = blocksize; + range.r_len = (file_max_blocks + 1) * blocksize - range.r_offset; + + ATF_REQUIRE((fd = open("sys_fspacectl_testfile", + O_CREAT | O_RDWR | O_TRUNC, 0600)) != -1); + ATF_REQUIRE(fill(fd, 0, file_max_blocks * blocksize) == 0); + ATF_CHECK(fspacectl(fd, SPACECTL_DEALLOC, &range, 0) == 0); + ATF_CHECK(check_content_dealloc(fd, range.r_offset, range.r_len, + file_max_blocks * blocksize) == 0); + ATF_REQUIRE(close(fd) == 0); +} + +/* + * Check unaligned deallocation around EOF + */ +ATF_TC_WITHOUT_HEAD(unaligned_dealloc_no_canextend); +ATF_TC_BODY(unaligned_dealloc_no_canextend, tc) +{ + struct spacectl_range range; + blksize_t blocksize; + int fd; + + ATF_REQUIRE((blocksize = fd_get_blksize()) != -1); + range.r_offset = blocksize / 2; + range.r_len = file_max_blocks * blocksize + blocksize / 2 - + range.r_offset; + + ATF_REQUIRE((fd = open("sys_fspacectl_testfile", + O_CREAT | O_RDWR | O_TRUNC, 0600)) != -1); + ATF_REQUIRE(fill(fd, 0, file_max_blocks * blocksize) == 0); + ATF_CHECK(fspacectl(fd, SPACECTL_DEALLOC, &range, 0) == 0); + ATF_CHECK(check_content_dealloc(fd, range.r_offset, range.r_len, + file_max_blocks * blocksize) == 0); + ATF_REQUIRE(close(fd) == 0); +} + +/* + * Check aligned allocation + */ +ATF_TC_WITHOUT_HEAD(aligned_alloc); +ATF_TC_BODY(aligned_alloc, tc) +{ + struct spacectl_range range; + blksize_t blocksize; + int fd; + + ATF_REQUIRE((blocksize = fd_get_blksize()) != -1); + range.r_offset = blocksize; + range.r_len = (file_max_blocks - 1) * blocksize - range.r_offset; + + ATF_REQUIRE((fd = open("sys_fspacectl_testfile", + O_CREAT | O_RDWR | O_TRUNC, 0600)) != -1); + ATF_REQUIRE(ftruncate(fd, file_max_blocks * blocksize) == 0); + ATF_CHECK(fspacectl(fd, SPACECTL_ALLOC, &range, 0) == 0); + ATF_CHECK(check_hole_alloc(fd, range.r_offset, range.r_len, + file_max_blocks * blocksize) == 0); + ATF_REQUIRE(close(fd) == 0); +} + +/* + * Check unaligned allocation + */ +ATF_TC_WITHOUT_HEAD(unaligned_alloc); +ATF_TC_BODY(unaligned_alloc, tc) +{ + struct spacectl_range range; + blksize_t blocksize; + int fd; + + ATF_REQUIRE((blocksize = fd_get_blksize()) != -1); + range.r_offset = blocksize / 2; + range.r_len = (file_max_blocks - 1) * blocksize + blocksize / 2 - + range.r_offset; + + ATF_REQUIRE((fd = open("sys_fspacectl_testfile", + O_CREAT | O_RDWR | O_TRUNC, 0600)) != -1); + ATF_REQUIRE(ftruncate(fd, file_max_blocks * blocksize) == 0); + ATF_CHECK(fspacectl(fd, SPACECTL_ALLOC, &range, 0) == 0); + ATF_CHECK(check_hole_alloc(fd, range.r_offset, range.r_len, + file_max_blocks * blocksize) == 0); + ATF_REQUIRE(close(fd) == 0); +} + +/* + * Check aligned allocation and extending + */ +ATF_TC_WITHOUT_HEAD(aligned_alloc_canextend); +ATF_TC_BODY(aligned_alloc_canextend, tc) +{ + struct spacectl_range range; + blksize_t blocksize; + int fd; + + ATF_REQUIRE((blocksize = fd_get_blksize()) != -1); + range.r_offset = blocksize; + range.r_len = (file_max_blocks + 1) * blocksize - range.r_offset; + + ATF_REQUIRE((fd = open("sys_fspacectl_testfile", + O_CREAT | O_RDWR | O_TRUNC, 0600)) != -1); + ATF_REQUIRE(ftruncate(fd, file_max_blocks * blocksize) == 0); + ATF_CHECK( + fspacectl(fd, SPACECTL_ALLOC, &range, SPACECTL_F_CANEXTEND) == 0); + ATF_CHECK(check_hole_alloc(fd, range.r_offset, range.r_len, + (file_max_blocks + 1) * blocksize) == 0); + ATF_REQUIRE(close(fd) == 0); +} + +/* + * Check unaligned allocation and extending + */ +ATF_TC_WITHOUT_HEAD(unaligned_alloc_canextend); +ATF_TC_BODY(unaligned_alloc_canextend, tc) +{ + struct spacectl_range range; + blksize_t blocksize; + int fd; + + ATF_REQUIRE((blocksize = fd_get_blksize()) != -1); + range.r_offset = blocksize / 2; + range.r_len = file_max_blocks * blocksize + blocksize / 2 - + range.r_offset; + + ATF_REQUIRE((fd = open("sys_fspacectl_testfile", + O_CREAT | O_RDWR | O_TRUNC, 0600)) != -1); + ATF_REQUIRE(ftruncate(fd, file_max_blocks * blocksize) == 0); + ATF_CHECK( + fspacectl(fd, SPACECTL_ALLOC, &range, SPACECTL_F_CANEXTEND) == 0); + ATF_CHECK(check_hole_alloc(fd, range.r_offset, range.r_len, + file_max_blocks * blocksize + blocksize / 2) == 0); + ATF_REQUIRE(close(fd) == 0); +} + +/* + * Check aligned allocation around EOF + */ +ATF_TC_WITHOUT_HEAD(aligned_alloc_no_canextend); +ATF_TC_BODY(aligned_alloc_no_canextend, tc) +{ + struct spacectl_range range; + blksize_t blocksize; + int fd; + + ATF_REQUIRE((blocksize = fd_get_blksize()) != -1); + range.r_offset = blocksize; + range.r_len = (file_max_blocks + 1) * blocksize - range.r_offset; + + ATF_REQUIRE((fd = open("sys_fspacectl_testfile", + O_CREAT | O_RDWR | O_TRUNC, 0600)) != -1); + ATF_REQUIRE(ftruncate(fd, file_max_blocks * blocksize) == 0); + ATF_CHECK(fspacectl(fd, SPACECTL_ALLOC, &range, 0) == 0); + ATF_CHECK(check_hole_alloc(fd, range.r_offset, range.r_len, + file_max_blocks * blocksize) == 0); + ATF_REQUIRE(close(fd) == 0); +} + +/* + * Check unaligned allocation around EOF + */ +ATF_TC_WITHOUT_HEAD(unaligned_alloc_no_canextend); +ATF_TC_BODY(unaligned_alloc_no_canextend, tc) +{ + struct spacectl_range range; + blksize_t blocksize; + int fd; + + ATF_REQUIRE((blocksize = fd_get_blksize()) != -1); + range.r_offset = blocksize / 2; + range.r_len = file_max_blocks * blocksize + blocksize / 2 - + range.r_offset; + + ATF_REQUIRE((fd = open("sys_fspacectl_testfile", + O_CREAT | O_RDWR | O_TRUNC, 0600)) != -1); + ATF_REQUIRE(ftruncate(fd, file_max_blocks * blocksize) == 0); + ATF_CHECK(fspacectl(fd, SPACECTL_ALLOC, &range, 0) == 0); + ATF_CHECK(check_hole_alloc(fd, range.r_offset, range.r_len, + file_max_blocks * blocksize) == 0); + ATF_REQUIRE(close(fd) == 0); +} + +ATF_TP_ADD_TCS(tp) +{ + ATF_TP_ADD_TC(tp, aligned_dealloc); + ATF_TP_ADD_TC(tp, unaligned_dealloc); + ATF_TP_ADD_TC(tp, aligned_dealloc_canextend); + ATF_TP_ADD_TC(tp, unaligned_dealloc_canextend); + ATF_TP_ADD_TC(tp, aligned_dealloc_no_canextend); + ATF_TP_ADD_TC(tp, unaligned_dealloc_no_canextend); + + ATF_TP_ADD_TC(tp, aligned_alloc); + ATF_TP_ADD_TC(tp, unaligned_alloc); + ATF_TP_ADD_TC(tp, aligned_alloc_canextend); + ATF_TP_ADD_TC(tp, unaligned_alloc_canextend); + ATF_TP_ADD_TC(tp, aligned_alloc_no_canextend); + ATF_TP_ADD_TC(tp, unaligned_alloc_no_canextend); + + return atf_no_error(); +}