Index: lib/libc/sys/Symbol.map =================================================================== --- lib/libc/sys/Symbol.map +++ lib/libc/sys/Symbol.map @@ -407,9 +407,11 @@ aio_writev; close_range; copy_file_range; + fdeallocate; fhlink; fhlinkat; fhreadlink; + fzero; getfhat; funlinkat; memfd_create; Index: sys/bsm/audit_kevents.h =================================================================== --- sys/bsm/audit_kevents.h +++ sys/bsm/audit_kevents.h @@ -662,6 +662,8 @@ #define AUE_SPECIALFD 43266 /* FreeBSD-specific. */ #define AUE_AIO_WRITEV 43267 /* FreeBSD-specific. */ #define AUE_AIO_READV 43268 /* FreeBSD-specific. */ +#define AUE_FDEALLOCATE 43269 /* FreeBSD-specific. */ +#define AUE_FZERO 43270 /* FreeBSD-specific. */ /* * Darwin BSM uses a number of AUE_O_* definitions, which are aliased to the Index: sys/compat/freebsd32/freebsd32_misc.c =================================================================== --- sys/compat/freebsd32/freebsd32_misc.c +++ sys/compat/freebsd32/freebsd32_misc.c @@ -3569,6 +3569,26 @@ return (kern_posix_error(td, error)); } +int +freebsd32_fdeallocate(struct thread *td, struct freebsd32_fdeallocate_args *uap) +{ + int error; + + error = kern_fdeallocate(td, uap->fd, + PAIR32TO64(off_t, uap->offset), PAIR32TO64(off_t, uap->len)); + return (kern_posix_error(td, error)); +} + +int +freebsd32_fzero(struct thread *td, struct freebsd32_fzero_args *uap) +{ + int error; + + error = kern_fzero(td, uap->fd, + PAIR32TO64(off_t, uap->offset), PAIR32TO64(off_t, uap->len)); + return (kern_posix_error(td, error)); +} + int freebsd32_posix_fadvise(struct thread *td, struct freebsd32_posix_fadvise_args *uap) Index: sys/compat/freebsd32/freebsd32_proto.h =================================================================== --- sys/compat/freebsd32/freebsd32_proto.h +++ sys/compat/freebsd32/freebsd32_proto.h @@ -751,6 +751,39 @@ struct freebsd32_aio_readv_args { char aiocbp_l_[PADL_(struct aiocb32 *)]; struct aiocb32 * aiocbp; char aiocbp_r_[PADR_(struct aiocb32 *)]; }; +#ifdef PAD64_REQUIRED +struct freebsd32_fallocate_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char pad_l_[PADL_(int)]; int pad; char pad_r_[PADR_(int)]; + char offset1_l_[PADL_(uint32_t)]; uint32_t offset1; char offset1_r_[PADR_(uint32_t)]; + char offset2_l_[PADL_(uint32_t)]; uint32_t offset2; char offset2_r_[PADR_(uint32_t)]; + char len1_l_[PADL_(uint32_t)]; uint32_t len1; char len1_r_[PADR_(uint32_t)]; + char len2_l_[PADL_(uint32_t)]; uint32_t len2; char len2_r_[PADR_(uint32_t)]; +}; +struct freebsd32_fzero_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char pad_l_[PADL_(int)]; int pad; char pad_r_[PADR_(int)]; + char offset1_l_[PADL_(uint32_t)]; uint32_t offset1; char offset1_r_[PADR_(uint32_t)]; + char offset2_l_[PADL_(uint32_t)]; uint32_t offset2; char offset2_r_[PADR_(uint32_t)]; + char len1_l_[PADL_(uint32_t)]; uint32_t len1; char len1_r_[PADR_(uint32_t)]; + char len2_l_[PADL_(uint32_t)]; uint32_t len2; char len2_r_[PADR_(uint32_t)]; +}; +#else +struct freebsd32_fdeallocate_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char offset1_l_[PADL_(uint32_t)]; uint32_t offset1; char offset1_r_[PADR_(uint32_t)]; + char offset2_l_[PADL_(uint32_t)]; uint32_t offset2; char offset2_r_[PADR_(uint32_t)]; + char len1_l_[PADL_(uint32_t)]; uint32_t len1; char len1_r_[PADR_(uint32_t)]; + char len2_l_[PADL_(uint32_t)]; uint32_t len2; char len2_r_[PADR_(uint32_t)]; +}; +struct freebsd32_fzero_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char offset1_l_[PADL_(uint32_t)]; uint32_t offset1; char offset1_r_[PADR_(uint32_t)]; + char offset2_l_[PADL_(uint32_t)]; uint32_t offset2; char offset2_r_[PADR_(uint32_t)]; + char len1_l_[PADL_(uint32_t)]; uint32_t len1; char len1_r_[PADR_(uint32_t)]; + char len2_l_[PADL_(uint32_t)]; uint32_t len2; char len2_r_[PADR_(uint32_t)]; +}; +#endif #if !defined(PAD64_REQUIRED) && !defined(__amd64__) #define PAD64_REQUIRED #endif @@ -893,6 +926,13 @@ int freebsd32___sysctlbyname(struct thread *, struct freebsd32___sysctlbyname_args *); int freebsd32_aio_writev(struct thread *, struct freebsd32_aio_writev_args *); int freebsd32_aio_readv(struct thread *, struct freebsd32_aio_readv_args *); +#ifdef PAD64_REQUIRED +int freebsd32_fallocate(struct thread *, struct freebsd32_fallocate_args *); +int freebsd32_fzero(struct thread *, struct freebsd32_fzero_args *); +#else +int freebsd32_fdeallocate(struct thread *, struct freebsd32_fdeallocate_args *); +int freebsd32_fzero(struct thread *, struct freebsd32_fzero_args *); +#endif #ifdef COMPAT_43 @@ -970,6 +1010,9 @@ #ifdef PAD64_REQUIRED #else #endif +#ifdef PAD64_REQUIRED +#else +#endif int ofreebsd32_lseek(struct thread *, struct ofreebsd32_lseek_args *); int ofreebsd32_stat(struct thread *, struct ofreebsd32_stat_args *); int ofreebsd32_lstat(struct thread *, struct ofreebsd32_lstat_args *); @@ -1044,6 +1087,9 @@ #ifdef PAD64_REQUIRED #else #endif +#ifdef PAD64_REQUIRED +#else +#endif int freebsd4_freebsd32_getfsstat(struct thread *, struct freebsd4_freebsd32_getfsstat_args *); int freebsd4_freebsd32_statfs(struct thread *, struct freebsd4_freebsd32_statfs_args *); int freebsd4_freebsd32_fstatfs(struct thread *, struct freebsd4_freebsd32_fstatfs_args *); @@ -1132,6 +1178,9 @@ #ifdef PAD64_REQUIRED #else #endif +#ifdef PAD64_REQUIRED +#else +#endif int freebsd6_freebsd32_pread(struct thread *, struct freebsd6_freebsd32_pread_args *); int freebsd6_freebsd32_pwrite(struct thread *, struct freebsd6_freebsd32_pwrite_args *); int freebsd6_freebsd32_mmap(struct thread *, struct freebsd6_freebsd32_mmap_args *); @@ -1181,6 +1230,9 @@ #ifdef PAD64_REQUIRED #else #endif +#ifdef PAD64_REQUIRED +#else +#endif int freebsd7_freebsd32_semctl(struct thread *, struct freebsd7_freebsd32_semctl_args *); int freebsd7_freebsd32_msgctl(struct thread *, struct freebsd7_freebsd32_msgctl_args *); int freebsd7_freebsd32_shmctl(struct thread *, struct freebsd7_freebsd32_shmctl_args *); @@ -1208,6 +1260,9 @@ #ifdef PAD64_REQUIRED #else #endif +#ifdef PAD64_REQUIRED +#else +#endif int freebsd10_freebsd32_pipe(struct thread *, struct freebsd10_freebsd32_pipe_args *); #endif /* COMPAT_FREEBSD10 */ @@ -1274,6 +1329,9 @@ #ifdef PAD64_REQUIRED #else #endif +#ifdef PAD64_REQUIRED +#else +#endif int freebsd11_freebsd32_stat(struct thread *, struct freebsd11_freebsd32_stat_args *); int freebsd11_freebsd32_fstat(struct thread *, struct freebsd11_freebsd32_fstat_args *); int freebsd11_freebsd32_lstat(struct thread *, struct freebsd11_freebsd32_lstat_args *); @@ -1306,6 +1364,9 @@ #ifdef PAD64_REQUIRED #else #endif +#ifdef PAD64_REQUIRED +#else +#endif #endif /* COMPAT_FREEBSD12 */ @@ -1476,6 +1537,10 @@ #define FREEBSD32_SYS_AUE_freebsd32___sysctlbyname AUE_SYSCTL #define FREEBSD32_SYS_AUE_freebsd32_aio_writev AUE_AIO_WRITEV #define FREEBSD32_SYS_AUE_freebsd32_aio_readv AUE_AIO_READV +#define FREEBSD32_SYS_AUE_freebsd32_fallocate AUE_FDEALLOCATE +#define FREEBSD32_SYS_AUE_freebsd32_fzero AUE_FZERO +#define FREEBSD32_SYS_AUE_freebsd32_fdeallocate AUE_FDEALLOCATE +#define FREEBSD32_SYS_AUE_freebsd32_fzero AUE_FZERO #undef PAD_ #undef PADL_ Index: sys/compat/freebsd32/freebsd32_syscall.h =================================================================== --- sys/compat/freebsd32/freebsd32_syscall.h +++ sys/compat/freebsd32/freebsd32_syscall.h @@ -506,4 +506,8 @@ #define FREEBSD32_SYS___specialfd 577 #define FREEBSD32_SYS_freebsd32_aio_writev 578 #define FREEBSD32_SYS_freebsd32_aio_readv 579 -#define FREEBSD32_SYS_MAXSYSCALL 580 +#define FREEBSD32_SYS_freebsd32_fallocate 580 +#define FREEBSD32_SYS_freebsd32_fzero 581 +#define FREEBSD32_SYS_freebsd32_fdeallocate 580 +#define FREEBSD32_SYS_freebsd32_fzero 581 +#define FREEBSD32_SYS_MAXSYSCALL 582 Index: sys/compat/freebsd32/freebsd32_syscalls.c =================================================================== --- sys/compat/freebsd32/freebsd32_syscalls.c +++ sys/compat/freebsd32/freebsd32_syscalls.c @@ -616,4 +616,11 @@ "__specialfd", /* 577 = __specialfd */ "freebsd32_aio_writev", /* 578 = freebsd32_aio_writev */ "freebsd32_aio_readv", /* 579 = freebsd32_aio_readv */ +#ifdef PAD64_REQUIRED + "freebsd32_fallocate", /* 580 = freebsd32_fallocate */ + "freebsd32_fzero", /* 581 = freebsd32_fzero */ +#else + "freebsd32_fdeallocate", /* 580 = freebsd32_fdeallocate */ + "freebsd32_fzero", /* 581 = freebsd32_fzero */ +#endif }; Index: sys/compat/freebsd32/freebsd32_sysent.c =================================================================== --- sys/compat/freebsd32/freebsd32_sysent.c +++ sys/compat/freebsd32/freebsd32_sysent.c @@ -669,4 +669,11 @@ { .sy_narg = AS(__specialfd_args), .sy_call = (sy_call_t *)sys___specialfd, .sy_auevent = AUE_SPECIALFD, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 577 = __specialfd */ { .sy_narg = AS(freebsd32_aio_writev_args), .sy_call = (sy_call_t *)freebsd32_aio_writev, .sy_auevent = AUE_AIO_WRITEV, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 578 = freebsd32_aio_writev */ { .sy_narg = AS(freebsd32_aio_readv_args), .sy_call = (sy_call_t *)freebsd32_aio_readv, .sy_auevent = AUE_AIO_READV, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 579 = freebsd32_aio_readv */ +#ifdef PAD64_REQUIRED + { .sy_narg = AS(freebsd32_fallocate_args), .sy_call = (sy_call_t *)freebsd32_fallocate, .sy_auevent = AUE_FDEALLOCATE, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 580 = freebsd32_fallocate */ + { .sy_narg = AS(freebsd32_fzero_args), .sy_call = (sy_call_t *)freebsd32_fzero, .sy_auevent = AUE_FZERO, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 581 = freebsd32_fzero */ +#else + { .sy_narg = AS(freebsd32_fdeallocate_args), .sy_call = (sy_call_t *)freebsd32_fdeallocate, .sy_auevent = AUE_FDEALLOCATE, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 580 = freebsd32_fdeallocate */ + { .sy_narg = AS(freebsd32_fzero_args), .sy_call = (sy_call_t *)freebsd32_fzero, .sy_auevent = AUE_FZERO, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 581 = freebsd32_fzero */ +#endif }; Index: sys/compat/freebsd32/freebsd32_systrace_args.c =================================================================== --- sys/compat/freebsd32/freebsd32_systrace_args.c +++ sys/compat/freebsd32/freebsd32_systrace_args.c @@ -3407,6 +3407,55 @@ *n_args = 1; break; } +#ifdef PAD64_REQUIRED + /* freebsd32_fallocate */ + case 580: { + struct freebsd32_fallocate_args *p = params; + iarg[0] = p->fd; /* int */ + iarg[1] = p->pad; /* int */ + uarg[2] = p->offset1; /* uint32_t */ + uarg[3] = p->offset2; /* uint32_t */ + uarg[4] = p->len1; /* uint32_t */ + uarg[5] = p->len2; /* uint32_t */ + *n_args = 6; + break; + } + /* freebsd32_fzero */ + case 581: { + struct freebsd32_fzero_args *p = params; + iarg[0] = p->fd; /* int */ + iarg[1] = p->pad; /* int */ + uarg[2] = p->offset1; /* uint32_t */ + uarg[3] = p->offset2; /* uint32_t */ + uarg[4] = p->len1; /* uint32_t */ + uarg[5] = p->len2; /* uint32_t */ + *n_args = 6; + break; + } +#else + /* freebsd32_fdeallocate */ + case 580: { + struct freebsd32_fdeallocate_args *p = params; + iarg[0] = p->fd; /* int */ + uarg[1] = p->offset1; /* uint32_t */ + uarg[2] = p->offset2; /* uint32_t */ + uarg[3] = p->len1; /* uint32_t */ + uarg[4] = p->len2; /* uint32_t */ + *n_args = 5; + break; + } + /* freebsd32_fzero */ + case 581: { + struct freebsd32_fzero_args *p = params; + iarg[0] = p->fd; /* int */ + uarg[1] = p->offset1; /* uint32_t */ + uarg[2] = p->offset2; /* uint32_t */ + uarg[3] = p->len1; /* uint32_t */ + uarg[4] = p->len2; /* uint32_t */ + *n_args = 5; + break; + } +#endif default: *n_args = 0; break; @@ -9183,6 +9232,103 @@ break; }; break; +#ifdef PAD64_REQUIRED + /* freebsd32_fallocate */ + case 580: + switch(ndx) { + case 0: + p = "int"; + break; + case 1: + p = "int"; + break; + case 2: + p = "uint32_t"; + break; + case 3: + p = "uint32_t"; + break; + case 4: + p = "uint32_t"; + break; + case 5: + p = "uint32_t"; + break; + default: + break; + }; + break; + /* freebsd32_fzero */ + case 581: + switch(ndx) { + case 0: + p = "int"; + break; + case 1: + p = "int"; + break; + case 2: + p = "uint32_t"; + break; + case 3: + p = "uint32_t"; + break; + case 4: + p = "uint32_t"; + break; + case 5: + p = "uint32_t"; + break; + default: + break; + }; + break; +#else + /* freebsd32_fdeallocate */ + case 580: + switch(ndx) { + case 0: + p = "int"; + break; + case 1: + p = "uint32_t"; + break; + case 2: + p = "uint32_t"; + break; + case 3: + p = "uint32_t"; + break; + case 4: + p = "uint32_t"; + break; + default: + break; + }; + break; + /* freebsd32_fzero */ + case 581: + switch(ndx) { + case 0: + p = "int"; + break; + case 1: + p = "uint32_t"; + break; + case 2: + p = "uint32_t"; + break; + case 3: + p = "uint32_t"; + break; + case 4: + p = "uint32_t"; + break; + default: + break; + }; + break; +#endif default: break; }; @@ -11102,6 +11248,29 @@ if (ndx == 0 || ndx == 1) p = "int"; break; +#ifdef PAD64_REQUIRED + /* freebsd32_fallocate */ + case 580: + if (ndx == 0 || ndx == 1) + p = "int"; + break; + /* freebsd32_fzero */ + case 581: + if (ndx == 0 || ndx == 1) + p = "off_t"; + break; +#else + /* freebsd32_fdeallocate */ + case 580: + if (ndx == 0 || ndx == 1) + p = "int"; + break; + /* freebsd32_fzero */ + case 581: + if (ndx == 0 || ndx == 1) + p = "off_t"; + break; +#endif default: break; }; Index: sys/compat/freebsd32/syscalls.master =================================================================== --- sys/compat/freebsd32/syscalls.master +++ sys/compat/freebsd32/syscalls.master @@ -1174,5 +1174,22 @@ struct aiocb32 *aiocbp); } 579 AUE_AIO_READV STD { int freebsd32_aio_readv( \ struct aiocb32 *aiocbp); } +#ifdef PAD64_REQUIRED +580 AUE_FDEALLOCATE STD { int freebsd32_fallocate(int fd, \ + int pad, \ + uint32_t offset1, uint32_t offset2,\ + uint32_t len1, uint32_t len2); } +581 AUE_FZERO STD { off_t freebsd32_fzero(int fd, \ + int pad, \ + uint32_t offset1, uint32_t offset2,\ + uint32_t len1, uint32_t len2); } +#else +580 AUE_FDEALLOCATE STD { int freebsd32_fdeallocate(int fd, \ + uint32_t offset1, uint32_t offset2,\ + uint32_t len1, uint32_t len2); } +581 AUE_FZERO STD { off_t freebsd32_fzero(int fd, \ + uint32_t offset1, uint32_t offset2,\ + uint32_t len1, uint32_t len2); } +#endif ; vim: syntax=off Index: sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c =================================================================== --- sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c +++ sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c @@ -3797,6 +3797,49 @@ return (error); } +/* + * common code for zfs_space-related operations + * + * This function is called from zfs_space and zfs_deallocate. + */ +static int +zfs_space_common(znode_t *zp, int cmd, off_t off, off_t len, int flag, + cred_t *cr) +{ + zfsvfs_t *zfsvfs = ZTOZSB(zp); + uint64_t uoff, ulen; + int error; + + if (cmd != F_FREESP) + return (EINVAL); + + /* + * Callers might not be able to detect properly that we are read-only, + * so check it explicitly here. + */ + if (zfs_is_readonly(zfsvfs)) + return (EROFS); + + if (len < 0) + return (EINVAL); + + /* + * Permissions aren't checked on Solaris because on this OS + * zfs_space() can only be called with an opened file handle. + * On Linux we can get here through truncate_range() which + * operates directly on inodes, so we need to check access rights. + */ + if ((error = zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr))) + return (error); + + uoff = off; + ulen = len; + + error = zfs_freesp(zp, uoff, ulen, flag, TRUE); + + return (error); +} + /* * Free or allocate space in a file. Currently, this function only * supports the `F_FREESP' command. However, this command is somewhat @@ -3821,47 +3864,16 @@ offset_t offset, cred_t *cr) { zfsvfs_t *zfsvfs = ZTOZSB(zp); - uint64_t off, len; + off_t off, len; int error; ZFS_ENTER(zfsvfs); ZFS_VERIFY_ZP(zp); - if (cmd != F_FREESP) { - ZFS_EXIT(zfsvfs); - return (SET_ERROR(EINVAL)); - } - - /* - * Callers might not be able to detect properly that we are read-only, - * so check it explicitly here. - */ - if (zfs_is_readonly(zfsvfs)) { - ZFS_EXIT(zfsvfs); - return (SET_ERROR(EROFS)); - } - - if (bfp->l_len < 0) { - ZFS_EXIT(zfsvfs); - return (SET_ERROR(EINVAL)); - } - - /* - * Permissions aren't checked on Solaris because on this OS - * zfs_space() can only be called with an opened file handle. - * On Linux we can get here through truncate_range() which - * operates directly on inodes, so we need to check access rights. - */ - if ((error = zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr))) { - ZFS_EXIT(zfsvfs); - return (error); - } - off = bfp->l_start; len = bfp->l_len; /* 0 means from off to end of file */ - error = zfs_freesp(zp, off, len, flag, TRUE); - + error = zfs_space_common(zp, cmd, off, len, flag, cr); ZFS_EXIT(zfsvfs); return (error); } @@ -5783,6 +5795,73 @@ return (error); } +/* + * This is mostly the same as zfs_space except it limits the range of operation + * to zp->z_size, so the log record of zfs_space during replay would be reused. + */ +static int +zfs_deallocate(struct vop_deallocate_args *ap) +{ + vnode_t *vp; + znode_t *zp; + zfsvfs_t *zfsvfs; + off_t offset, len; + int error; + + vp = ap->a_vp; + zp = VTOZ(vp); + zfsvfs = ZTOZSB(zp); + + if (ap->a_offset < 0 || ap->a_len < 0) + return (EINVAL); + + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp); + + offset = ap->a_offset; + len = ap->a_len; + + if (len > zp->z_size - offset) + len = zp->z_size - offset; + if (len == 0) { + error = 0; + goto out; + } + error = zfs_space_common(zp, F_FREESP, offset, len, O_RDWR, ap->a_cred); +out: + ZFS_EXIT(zfsvfs); + return (error); +} + +static int +zfs_zero(struct vop_zero_args *ap) +{ + vnode_t *vp; + znode_t *zp; + flock64_t fl; + int error; + + vp = ap->a_vp; + zp = VTOZ(vp); + + if (ap->a_offset < 0 || *ap->a_len < 0) + return (EINVAL); + + fl.l_type = F_WRLCK; + fl.l_whence = SEEK_SET; + fl.l_start = ap->a_offset; + fl.l_len = *ap->a_len; + if (fl.l_len > OFF_MAX - fl.l_start) + fl.l_len = OFF_MAX - fl.l_start; + if (fl.l_len == 0) + return (0); + + error = zfs_space(zp, F_FREESP, &fl, O_RDWR, ap->a_offset, ap->a_cred); + if (error == 0) + *ap->a_len = 0; + return (error); +} + struct vop_vector zfs_vnodeops; struct vop_vector zfs_fifoops; struct vop_vector zfs_shareops; @@ -5802,6 +5881,8 @@ #endif .vop_access = zfs_freebsd_access, .vop_allocate = VOP_EINVAL, + .vop_deallocate = zfs_deallocate, + .vop_zero = zfs_zero, .vop_lookup = zfs_cache_lookup, .vop_cachedlookup = zfs_freebsd_cachedlookup, .vop_getattr = zfs_freebsd_getattr, Index: sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c =================================================================== --- sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c +++ sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c @@ -1449,14 +1449,8 @@ error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, off, len); - if (error == 0) { - /* - * In FreeBSD we cannot free block in the middle of a file, - * but only at the end of a file, so this code path should - * never happen. - */ - vnode_pager_setsize(ZTOV(zp), off); - } + if (error == 0) + vnode_pager_purge_range(ZTOV(zp), off, off + len); zfs_rangelock_exit(lr); Index: sys/kern/init_sysent.c =================================================================== --- sys/kern/init_sysent.c +++ sys/kern/init_sysent.c @@ -635,4 +635,6 @@ { .sy_narg = AS(__specialfd_args), .sy_call = (sy_call_t *)sys___specialfd, .sy_auevent = AUE_SPECIALFD, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 577 = __specialfd */ { .sy_narg = AS(aio_writev_args), .sy_call = (sy_call_t *)sys_aio_writev, .sy_auevent = AUE_AIO_WRITEV, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 578 = aio_writev */ { .sy_narg = AS(aio_readv_args), .sy_call = (sy_call_t *)sys_aio_readv, .sy_auevent = AUE_AIO_READV, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 579 = aio_readv */ + { .sy_narg = AS(fdeallocate_args), .sy_call = (sy_call_t *)sys_fdeallocate, .sy_auevent = AUE_FDEALLOCATE, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 580 = fdeallocate */ + { .sy_narg = AS(fzero_args), .sy_call = (sy_call_t *)sys_fzero, .sy_auevent = AUE_FZERO, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 581 = fzero */ }; Index: sys/kern/sys_generic.c =================================================================== --- sys/kern/sys_generic.c +++ sys/kern/sys_generic.c @@ -861,6 +861,92 @@ return (error); } +int +sys_fdeallocate(struct thread *td, struct fdeallocate_args *uap) +{ + int error; + + error = kern_fdeallocate(td, uap->fd, uap->offset, uap->len); + return (error); +} + +int +kern_fdeallocate(struct thread *td, int fd, off_t offset, off_t len) +{ + struct file *fp; + int error; + + AUDIT_ARG_FD(fd); + if (offset < 0 || len <= 0) + return (EINVAL); + + AUDIT_ARG_FD(fd); + error = fget(td, fd, &cap_pwrite_rights, &fp); + if (error != 0) + return (error); + AUDIT_ARG_FILE(td->td_proc, fp); + if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { + error = ESPIPE; + goto out; + } + if ((fp->f_flag & FWRITE) == 0) { + error = EBADF; + goto out; + } + + error = fo_fdeallocate(fp, offset, len, td->td_ucred, td); +out: + fdrop(fp, td); + return (error); +} + +int +sys_fzero(struct thread *td, struct fzero_args *uap) +{ + int error; + + error = kern_fzero(td, uap->fd, uap->offset, uap->len); + return (error); +} + +int +kern_fzero(struct thread *td, int fd, off_t offset, off_t len) +{ + struct file *fp; + int error; + off_t cnt; + + cnt = len; + + AUDIT_ARG_FD(fd); + if (offset < 0 || len <= 0) + return (EINVAL); + + AUDIT_ARG_FD(fd); + error = fget(td, fd, &cap_pwrite_rights, &fp); + if (error != 0) + return (error); + AUDIT_ARG_FILE(td->td_proc, fp); + if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { + error = ESPIPE; + goto out; + } + if ((fp->f_flag & FWRITE) == 0) { + error = EBADF; + goto out; + } + + error = fo_fzero(fp, offset, &cnt, td->td_ucred, td); + if (cnt != len && + (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) + error = 0; + len -= cnt; +out: + fdrop(fp, td); + td->td_retval[0] = len; + return (error); +} + int kern_specialfd(struct thread *td, int type, void *arg) { Index: sys/kern/syscalls.c =================================================================== --- sys/kern/syscalls.c +++ sys/kern/syscalls.c @@ -586,4 +586,6 @@ "__specialfd", /* 577 = __specialfd */ "aio_writev", /* 578 = aio_writev */ "aio_readv", /* 579 = aio_readv */ + "fdeallocate", /* 580 = fdeallocate */ + "fzero", /* 581 = fzero */ }; Index: sys/kern/syscalls.master =================================================================== --- sys/kern/syscalls.master +++ sys/kern/syscalls.master @@ -3258,6 +3258,20 @@ _Inout_ struct aiocb *aiocbp ); } +580 AUE_FDEALLOCATE STD { + int fdeallocate( + int fd, + off_t offset, + off_t len + ); + } +581 AUE_FZERO STD { + off_t fzero( + int fd, + off_t offset, + off_t len + ); + } ; Please copy any additions and changes to the following compatability tables: ; sys/compat/freebsd32/syscalls.master Index: sys/kern/systrace_args.c =================================================================== --- sys/kern/systrace_args.c +++ sys/kern/systrace_args.c @@ -3399,6 +3399,24 @@ *n_args = 1; break; } + /* fdeallocate */ + case 580: { + struct fdeallocate_args *p = params; + iarg[0] = p->fd; /* int */ + iarg[1] = p->offset; /* off_t */ + iarg[2] = p->len; /* off_t */ + *n_args = 3; + break; + } + /* fzero */ + case 581: { + struct fzero_args *p = params; + iarg[0] = p->fd; /* int */ + iarg[1] = p->offset; /* off_t */ + iarg[2] = p->len; /* off_t */ + *n_args = 3; + break; + } default: *n_args = 0; break; @@ -9088,6 +9106,38 @@ break; }; break; + /* fdeallocate */ + case 580: + switch(ndx) { + case 0: + p = "int"; + break; + case 1: + p = "off_t"; + break; + case 2: + p = "off_t"; + break; + default: + break; + }; + break; + /* fzero */ + case 581: + switch(ndx) { + case 0: + p = "int"; + break; + case 1: + p = "off_t"; + break; + case 2: + p = "off_t"; + break; + default: + break; + }; + break; default: break; }; @@ -11034,6 +11084,16 @@ if (ndx == 0 || ndx == 1) p = "int"; break; + /* fdeallocate */ + case 580: + if (ndx == 0 || ndx == 1) + p = "int"; + break; + /* fzero */ + case 581: + if (ndx == 0 || ndx == 1) + p = "off_t"; + break; default: break; }; Index: sys/kern/vfs_default.c =================================================================== --- sys/kern/vfs_default.c +++ sys/kern/vfs_default.c @@ -92,6 +92,7 @@ static int vop_stdgetpages_async(struct vop_getpages_async_args *ap); static int vop_stdread_pgcache(struct vop_read_pgcache_args *ap); static int vop_stdstat(struct vop_stat_args *ap); +static int vop_stdzero(struct vop_zero_args *ap); /* * This vnode table stores what we want to do if the filesystem doesn't @@ -116,6 +117,8 @@ .vop_advlockasync = vop_stdadvlockasync, .vop_advlockpurge = vop_stdadvlockpurge, .vop_allocate = vop_stdallocate, + .vop_deallocate = VOP_EOPNOTSUPP, + .vop_zero = vop_stdzero, .vop_bmap = vop_stdbmap, .vop_close = VOP_NULL, .vop_fsync = VOP_NULL, @@ -1075,6 +1078,143 @@ return (error); } +static int +vp_zerofill(struct vnode *vp, struct vattr *vap, off_t offset, off_t len, + off_t *residp, struct ucred *cred) +{ + int iosize; + int error = EOVERFLOW; + void *buf = NULL; + struct iovec aiov; + struct uio auio; + struct thread *td; + + iosize = vap->va_blocksize; + td = curthread; + + if (iosize == 0) + iosize = BLKDEV_IOSIZE; + if (iosize > MAXPHYS) + iosize = MAXPHYS; + buf = malloc(iosize, M_TEMP, M_ZERO | M_WAITOK); + + while (len > 0) { + int xfersize = iosize; + if (offset % iosize != 0) + xfersize -= offset % iosize; + if (xfersize > len) + xfersize = len; + + aiov.iov_base = buf; + aiov.iov_len = xfersize; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_offset = offset; + auio.uio_resid = xfersize; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_rw = UIO_WRITE; + auio.uio_td = td; + + error = VOP_WRITE(vp, &auio, 0, cred); + if (error != 0) { + len -= xfersize - auio.uio_resid; + break; + } + + len -= xfersize; + offset += xfersize; + } + + free(buf, M_TEMP); + *residp = len; + return (error); +} + +static int +vop_stdzero(struct vop_zero_args *ap) +{ + struct vnode *vp; + off_t offset, resid; + struct ucred *cred; + int error; + struct vattr va; + + vp = ap->a_vp; + offset = ap->a_offset; + resid = *ap->a_len; + cred = ap->a_cred; + + if (offset + resid < offset) + return (EOVERFLOW); + if (resid == 0) + return (0); + + error = VOP_GETATTR(vp, &va, cred); + if (error) + return (error); + if (va.va_size > OFF_MAX) + return (EOVERFLOW); + + while (resid > 0) { + off_t noff; + off_t xfersize; + off_t rem; + + noff = offset; + + error = vn_bmap_seekhole_locked(vp, FIOSEEKDATA, &noff, cred); + if (error) { + if (error == ENXIO) { + /* No more data region to be filled */ + error = vn_truncate_locked( + vp, offset + resid, false, cred); + if (error) + goto out; + offset += resid; + resid = 0; + break; + } + /* XXX: Is it okay to fallback further? */ + goto out; + } + KASSERT(noff >= offset, ("FIOSEEKDATA going backward")); + if (noff != offset) { + xfersize = omin(noff - offset, resid); + resid -= xfersize; + offset += xfersize; + continue; + } + error = vn_bmap_seekhole_locked(vp, FIOSEEKHOLE, &noff, cred); + if (error) + goto out; + + xfersize = noff - offset; + if (xfersize > resid) + xfersize = resid; + + /* Fill zeroes */ + error = vp_zerofill(vp, &va, offset, xfersize, &rem, cred); + if (error) { + resid -= xfersize - rem; + offset += xfersize - rem; + goto out; + } + + resid -= xfersize; + offset += xfersize; + } +out: + if (*ap->a_len != resid && + (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) + error = 0; + + if (!error) + *ap->a_len = resid; + else + vn_truncate_locked(vp, va.va_size, false, cred); + return (error); +} + int vop_stdadvise(struct vop_advise_args *ap) { Index: sys/kern/vfs_vnops.c =================================================================== --- sys/kern/vfs_vnops.c +++ sys/kern/vfs_vnops.c @@ -106,6 +106,8 @@ static fo_close_t vn_closefile; static fo_mmap_t vn_mmap; static fo_fallocate_t vn_fallocate; +static fo_fdeallocate_t vn_fdeallocate; +static fo_fzero_t vn_fzero; struct fileops vnops = { .fo_read = vn_io_fault, @@ -123,6 +125,8 @@ .fo_fill_kinfo = vn_fill_kinfo, .fo_mmap = vn_mmap, .fo_fallocate = vn_fallocate, + .fo_fdeallocate = vn_fdeallocate, + .fo_fzero = vn_fzero, .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE }; @@ -2345,7 +2349,8 @@ } int -vn_bmap_seekhole(struct vnode *vp, u_long cmd, off_t *off, struct ucred *cred) +vn_bmap_seekhole_locked(struct vnode *vp, u_long cmd, off_t *off, + struct ucred *cred) { struct vattr va; daddr_t bn, bnp; @@ -2353,22 +2358,17 @@ off_t noff; int error; - KASSERT(cmd == FIOSEEKHOLE || cmd == FIOSEEKDATA, - ("Wrong command %lu", cmd)); - - if (vn_lock(vp, LK_SHARED) != 0) - return (EBADF); if (vp->v_type != VREG) { error = ENOTTY; - goto unlock; + goto out; } error = VOP_GETATTR(vp, &va, cred); if (error != 0) - goto unlock; + goto out; noff = *off; if (noff >= va.va_size) { error = ENXIO; - goto unlock; + goto out; } bsize = vp->v_mount->mnt_stat.f_iosize; for (bn = noff / bsize; noff < va.va_size; bn++, noff += bsize - @@ -2376,14 +2376,14 @@ error = VOP_BMAP(vp, bn, NULL, &bnp, NULL, NULL); if (error == EOPNOTSUPP) { error = ENOTTY; - goto unlock; + goto out; } if ((bnp == -1 && cmd == FIOSEEKHOLE) || (bnp != -1 && cmd == FIOSEEKDATA)) { noff = bn * bsize; if (noff < *off) noff = *off; - goto unlock; + goto out; } } if (noff > va.va_size) @@ -2391,13 +2391,28 @@ /* noff == va.va_size. There is an implicit hole at the end of file. */ if (cmd == FIOSEEKDATA) error = ENXIO; -unlock: +out: VOP_UNLOCK(vp); if (error == 0) *off = noff; return (error); } +int +vn_bmap_seekhole(struct vnode *vp, u_long cmd, off_t *off, struct ucred *cred) +{ + int error; + + KASSERT(cmd == FIOSEEKHOLE || cmd == FIOSEEKDATA, + ("Wrong command %lu", cmd)); + + if (vn_lock(vp, LK_SHARED) != 0) + return (EBADF); + error = vn_bmap_seekhole_locked(vp, cmd, off, cred); + VOP_UNLOCK(vp); + return (error); +} + int vn_seek(struct file *fp, off_t offset, int whence, struct thread *td) { @@ -3339,6 +3354,84 @@ return (error); } +static int +vn_fdeallocate(struct file *fp, off_t offset, off_t len, + struct ucred *active_cred, struct thread *td) +{ + int error; + struct mount *mp; + struct vnode *vp; + int lock_flags; + void *rl_cookie = NULL; + + vp = fp->f_vnode; + + if (offset < 0 || len < 0) + return (EINVAL); + if (vp->v_type != VREG) + return (ENODEV); + + /* Take the maximum range if len equals 0 or end offset overflows */ + if (len == 0 || len > OFF_MAX - offset) + len = OFF_MAX - offset; + if (len == 0) + return (0); + + rl_cookie = vn_rangelock_wlock(vp, offset, offset + len); + if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) + goto out; + + if (MNT_SHARED_WRITES(mp) || + (mp == NULL && MNT_SHARED_WRITES(vp->v_mount))) { + lock_flags = LK_SHARED; + } else { + lock_flags = LK_EXCLUSIVE; + } + vn_lock(vp, lock_flags | LK_RETRY); + + error = VOP_DEALLOCATE(vp, offset, len, active_cred); + VOP_UNLOCK(vp); + vn_finished_write(mp); + +out: + vn_rangelock_unlock(vp, rl_cookie); + return (error); +} + +static int +vn_fzero(struct file *fp, off_t offset, off_t *residp, + struct ucred *active_cred, struct thread *td) +{ + int error; + struct mount *mp; + struct vnode *vp; + void *rl_cookie = NULL; + off_t resid; + + vp = fp->f_vnode; + resid = *residp; + + if (offset + resid < offset) + return (EOVERFLOW); + + if (vp->v_type != VREG) + return (ENODEV); + + rl_cookie = vn_rangelock_wlock(vp, offset, offset + resid); + if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) + goto out; + + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); + error = VOP_ZERO(vp, offset, &resid, active_cred); + VOP_UNLOCK(vp); + vn_finished_write(mp); + +out: + *residp = resid; + vn_rangelock_unlock(vp, rl_cookie); + return (error); +} + static u_long vn_lock_pair_pause_cnt; SYSCTL_ULONG(_debug, OID_AUTO, vn_lock_pair_pause, CTLFLAG_RD, &vn_lock_pair_pause_cnt, 0, Index: sys/kern/vnode_if.src =================================================================== --- sys/kern/vnode_if.src +++ sys/kern/vnode_if.src @@ -706,6 +706,26 @@ }; +%% deallocate vp L L L + +vop_deallocate { + IN struct vnode *vp; + IN off_t offset; + IN off_t len; + IN struct ucred *cred; +}; + + +%% zero vp L L L + +vop_zero { + IN struct vnode *vp; + IN off_t offset; + INOUT off_t *len; + IN struct ucred *cred; +}; + + %% advise vp U U U vop_advise { Index: sys/security/audit/audit_bsm.c =================================================================== --- sys/security/audit/audit_bsm.c +++ sys/security/audit/audit_bsm.c @@ -1019,12 +1019,14 @@ * XXXRW: Some of these need to handle non-vnode cases as well. */ case AUE_FCHDIR: + case AUE_FDEALLOCATE: case AUE_FPATHCONF: case AUE_FSTAT: case AUE_FSTATFS: case AUE_FSYNC: case AUE_FTRUNCATE: case AUE_FUTIMES: + case AUE_FZERO: case AUE_GETDIRENTRIES: case AUE_GETDIRENTRIESATTR: case AUE_LSEEK: Index: sys/sys/fcntl.h =================================================================== --- sys/sys/fcntl.h +++ sys/sys/fcntl.h @@ -360,6 +360,8 @@ int posix_fadvise(int, off_t, off_t, int); int posix_fallocate(int, off_t, off_t); #endif +int fdeallocate(int, off_t, off_t); +off_t fzero(int, off_t, off_t); __END_DECLS #endif Index: sys/sys/file.h =================================================================== --- sys/sys/file.h +++ sys/sys/file.h @@ -129,6 +129,10 @@ typedef int fo_get_seals_t(struct file *fp, int *flags); typedef int fo_fallocate_t(struct file *fp, off_t offset, off_t len, struct thread *td); +typedef int fo_fdeallocate_t(struct file *fp, off_t offset, off_t len, + struct ucred *active_cred, struct thread *td); +typedef int fo_fzero_t(struct file *fp, off_t offset, off_t *residp, + struct ucred *active_cred, struct thread *td); typedef int fo_flags_t; struct fileops { @@ -150,6 +154,8 @@ fo_add_seals_t *fo_add_seals; fo_get_seals_t *fo_get_seals; fo_fallocate_t *fo_fallocate; + fo_fdeallocate_t *fo_fdeallocate; + fo_fzero_t *fo_fzero; fo_flags_t fo_flags; /* DFLAG_* below */ }; @@ -470,6 +476,27 @@ return ((*fp->f_ops->fo_fallocate)(fp, offset, len, td)); } +static __inline int fo_fdeallocate(struct file *fp, off_t offset, off_t len, + struct ucred *active_cred, struct thread *td) +{ + + if (fp->f_ops->fo_fdeallocate == NULL) + return (ENODEV); + return ((*fp->f_ops->fo_fdeallocate)(fp, offset, len, active_cred, + td)); +} + +static __inline int fo_fzero(struct file *fp, off_t offset, off_t *residp, + struct ucred *active_cred, struct thread *td) +{ + + if (fp->f_ops->fo_fzero == NULL) + return (ENODEV); + return ((*fp->f_ops->fo_fzero)(fp, offset, residp, active_cred, + td)); +} + + #endif /* _KERNEL */ #endif /* !SYS_FILE_H */ Index: sys/sys/syscall.h =================================================================== --- sys/sys/syscall.h +++ sys/sys/syscall.h @@ -515,4 +515,6 @@ #define SYS___specialfd 577 #define SYS_aio_writev 578 #define SYS_aio_readv 579 -#define SYS_MAXSYSCALL 580 +#define SYS_fdeallocate 580 +#define SYS_fzero 581 +#define SYS_MAXSYSCALL 582 Index: sys/sys/syscall.mk =================================================================== --- sys/sys/syscall.mk +++ sys/sys/syscall.mk @@ -420,4 +420,6 @@ rpctls_syscall.o \ __specialfd.o \ aio_writev.o \ - aio_readv.o + aio_readv.o \ + fdeallocate.o \ + fzero.o Index: sys/sys/syscallsubr.h =================================================================== --- sys/sys/syscallsubr.h +++ sys/sys/syscallsubr.h @@ -230,6 +230,8 @@ int advice); int kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len); +int kern_fdeallocate(struct thread *td, int fd, off_t offset, off_t len); +int kern_fzero(struct thread *td, int fd, off_t offset, off_t len); int kern_procctl(struct thread *td, enum idtype idtype, id_t id, int com, void *data); int kern_pread(struct thread *td, int fd, void *buf, size_t nbyte, Index: sys/sys/sysproto.h =================================================================== --- sys/sys/sysproto.h +++ sys/sys/sysproto.h @@ -1847,6 +1847,16 @@ struct aio_readv_args { char aiocbp_l_[PADL_(struct aiocb *)]; struct aiocb * aiocbp; char aiocbp_r_[PADR_(struct aiocb *)]; }; +struct fdeallocate_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char offset_l_[PADL_(off_t)]; off_t offset; char offset_r_[PADR_(off_t)]; + char len_l_[PADL_(off_t)]; off_t len; char len_r_[PADR_(off_t)]; +}; +struct fzero_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char offset_l_[PADL_(off_t)]; off_t offset; char offset_r_[PADR_(off_t)]; + char len_l_[PADL_(off_t)]; off_t len; char len_r_[PADR_(off_t)]; +}; int nosys(struct thread *, struct nosys_args *); void sys_sys_exit(struct thread *, struct sys_exit_args *); int sys_fork(struct thread *, struct fork_args *); @@ -2241,6 +2251,8 @@ int sys___specialfd(struct thread *, struct __specialfd_args *); int sys_aio_writev(struct thread *, struct aio_writev_args *); int sys_aio_readv(struct thread *, struct aio_readv_args *); +int sys_fdeallocate(struct thread *, struct fdeallocate_args *); +int sys_fzero(struct thread *, struct fzero_args *); #ifdef COMPAT_43 @@ -3175,6 +3187,8 @@ #define SYS_AUE___specialfd AUE_SPECIALFD #define SYS_AUE_aio_writev AUE_AIO_WRITEV #define SYS_AUE_aio_readv AUE_AIO_READV +#define SYS_AUE_fdeallocate AUE_FDEALLOCATE +#define SYS_AUE_fzero AUE_FZERO #undef PAD_ #undef PADL_ Index: sys/sys/vnode.h =================================================================== --- sys/sys/vnode.h +++ sys/sys/vnode.h @@ -719,6 +719,9 @@ void vn_printf(struct vnode *vp, const char *fmt, ...) __printflike(2,3); int vrecycle(struct vnode *vp); int vrecyclel(struct vnode *vp); +/* vn_bmap_seekhole_locked is not public KPI */ +int vn_bmap_seekhole_locked(struct vnode *vp, u_long cmd, off_t *off, + struct ucred *cred); int vn_bmap_seekhole(struct vnode *vp, u_long cmd, off_t *off, struct ucred *cred); int vn_close(struct vnode *vp,