Index: lib/libc/sys/Symbol.map =================================================================== --- lib/libc/sys/Symbol.map +++ lib/libc/sys/Symbol.map @@ -413,6 +413,7 @@ memfd_create; shm_create_largepage; shm_rename; + fzero; }; FBSDprivate_1.0 { Index: sys/bsm/audit_kevents.h =================================================================== --- sys/bsm/audit_kevents.h +++ sys/bsm/audit_kevents.h @@ -659,6 +659,7 @@ #define AUE_SHMRENAME 43263 /* FreeBSD-specific. */ #define AUE_REALPATHAT 43264 /* FreeBSD-specific. */ #define AUE_CLOSERANGE 43265 /* FreeBSD-specific. */ +#define AUE_FZERO 43266 /* FreeBSD-specific. */ /* * Darwin BSM uses a number of AUE_O_* definitions, which are aliased to the Index: sys/compat/freebsd32/freebsd32_misc.c =================================================================== --- sys/compat/freebsd32/freebsd32_misc.c +++ sys/compat/freebsd32/freebsd32_misc.c @@ -3556,6 +3556,17 @@ return (kern_posix_error(td, error)); } +int +freebsd32_fzero(struct thread *td, + struct freebsd32_fzero_args *uap) +{ + int error; + + error = kern_fzero(td, uap->fd, PAIR32TO64(off_t, uap->offset), + PAIR32TO64(off_t, uap->len), uap->flags); + return (error); +} + int freebsd32_posix_fadvise(struct thread *td, struct freebsd32_posix_fadvise_args *uap) Index: sys/compat/freebsd32/freebsd32_proto.h =================================================================== --- sys/compat/freebsd32/freebsd32_proto.h +++ sys/compat/freebsd32/freebsd32_proto.h @@ -742,6 +742,26 @@ char new_l_[PADL_(void *)]; void * new; char new_r_[PADR_(void *)]; char newlen_l_[PADL_(size_t)]; size_t newlen; char newlen_r_[PADR_(size_t)]; }; +#ifdef PAD64_REQUIRED +struct freebsd32_fzero_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char pad_l_[PADL_(int)]; int pad; char pad_r_[PADR_(int)]; + char offset1_l_[PADL_(uint32_t)]; uint32_t offset1; char offset1_r_[PADR_(uint32_t)]; + char offset2_l_[PADL_(uint32_t)]; uint32_t offset2; char offset2_r_[PADR_(uint32_t)]; + char len1_l_[PADL_(uint32_t)]; uint32_t len1; char len1_r_[PADR_(uint32_t)]; + char len2_l_[PADL_(uint32_t)]; uint32_t len2; char len2_r_[PADR_(uint32_t)]; + char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; +}; +#else +struct freebsd32_fzero_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char offset1_l_[PADL_(uint32_t)]; uint32_t offset1; char offset1_r_[PADR_(uint32_t)]; + char offset2_l_[PADL_(uint32_t)]; uint32_t offset2; char offset2_r_[PADR_(uint32_t)]; + char len1_l_[PADL_(uint32_t)]; uint32_t len1; char len1_r_[PADR_(uint32_t)]; + char len2_l_[PADL_(uint32_t)]; uint32_t len2; char len2_r_[PADR_(uint32_t)]; + char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; +}; +#endif #if !defined(PAD64_REQUIRED) && !defined(__amd64__) #define PAD64_REQUIRED #endif @@ -881,6 +901,11 @@ int freebsd32_cpuset_getdomain(struct thread *, struct freebsd32_cpuset_getdomain_args *); int freebsd32_cpuset_setdomain(struct thread *, struct freebsd32_cpuset_setdomain_args *); int freebsd32___sysctlbyname(struct thread *, struct freebsd32___sysctlbyname_args *); +#ifdef PAD64_REQUIRED +int freebsd32_fzero(struct thread *, struct freebsd32_fzero_args *); +#else +int freebsd32_fzero(struct thread *, struct freebsd32_fzero_args *); +#endif #ifdef COMPAT_43 @@ -958,6 +983,9 @@ #ifdef PAD64_REQUIRED #else #endif +#ifdef PAD64_REQUIRED +#else +#endif int ofreebsd32_lseek(struct thread *, struct ofreebsd32_lseek_args *); int ofreebsd32_stat(struct thread *, struct ofreebsd32_stat_args *); int ofreebsd32_lstat(struct thread *, struct ofreebsd32_lstat_args *); @@ -1032,6 +1060,9 @@ #ifdef PAD64_REQUIRED #else #endif +#ifdef PAD64_REQUIRED +#else +#endif int freebsd4_freebsd32_getfsstat(struct thread *, struct freebsd4_freebsd32_getfsstat_args *); int freebsd4_freebsd32_statfs(struct thread *, struct freebsd4_freebsd32_statfs_args *); int freebsd4_freebsd32_fstatfs(struct thread *, struct freebsd4_freebsd32_fstatfs_args *); @@ -1120,6 +1151,9 @@ #ifdef PAD64_REQUIRED #else #endif +#ifdef PAD64_REQUIRED +#else +#endif int freebsd6_freebsd32_pread(struct thread *, struct freebsd6_freebsd32_pread_args *); int freebsd6_freebsd32_pwrite(struct thread *, struct freebsd6_freebsd32_pwrite_args *); int freebsd6_freebsd32_mmap(struct thread *, struct freebsd6_freebsd32_mmap_args *); @@ -1169,6 +1203,9 @@ #ifdef PAD64_REQUIRED #else #endif +#ifdef PAD64_REQUIRED +#else +#endif int freebsd7_freebsd32_semctl(struct thread *, struct freebsd7_freebsd32_semctl_args *); int freebsd7_freebsd32_msgctl(struct thread *, struct freebsd7_freebsd32_msgctl_args *); int freebsd7_freebsd32_shmctl(struct thread *, struct freebsd7_freebsd32_shmctl_args *); @@ -1196,6 +1233,9 @@ #ifdef PAD64_REQUIRED #else #endif +#ifdef PAD64_REQUIRED +#else +#endif int freebsd10_freebsd32_pipe(struct thread *, struct freebsd10_freebsd32_pipe_args *); #endif /* COMPAT_FREEBSD10 */ @@ -1262,6 +1302,9 @@ #ifdef PAD64_REQUIRED #else #endif +#ifdef PAD64_REQUIRED +#else +#endif int freebsd11_freebsd32_stat(struct thread *, struct freebsd11_freebsd32_stat_args *); int freebsd11_freebsd32_fstat(struct thread *, struct freebsd11_freebsd32_fstat_args *); int freebsd11_freebsd32_lstat(struct thread *, struct freebsd11_freebsd32_lstat_args *); @@ -1294,6 +1337,9 @@ #ifdef PAD64_REQUIRED #else #endif +#ifdef PAD64_REQUIRED +#else +#endif #endif /* COMPAT_FREEBSD12 */ @@ -1461,6 +1507,8 @@ #define FREEBSD32_SYS_AUE_freebsd32_cpuset_getdomain AUE_NULL #define FREEBSD32_SYS_AUE_freebsd32_cpuset_setdomain AUE_NULL #define FREEBSD32_SYS_AUE_freebsd32___sysctlbyname AUE_SYSCTL +#define FREEBSD32_SYS_AUE_freebsd32_fzero AUE_FZERO +#define FREEBSD32_SYS_AUE_freebsd32_fzero AUE_FZERO #undef PAD_ #undef PADL_ Index: sys/compat/freebsd32/freebsd32_syscall.h =================================================================== --- sys/compat/freebsd32/freebsd32_syscall.h +++ sys/compat/freebsd32/freebsd32_syscall.h @@ -503,4 +503,6 @@ #define FREEBSD32_SYS___realpathat 574 #define FREEBSD32_SYS_close_range 575 #define FREEBSD32_SYS_rpctls_syscall 576 -#define FREEBSD32_SYS_MAXSYSCALL 577 +#define FREEBSD32_SYS_freebsd32_fzero 577 +#define FREEBSD32_SYS_freebsd32_fzero 577 +#define FREEBSD32_SYS_MAXSYSCALL 578 Index: sys/compat/freebsd32/freebsd32_syscalls.c =================================================================== --- sys/compat/freebsd32/freebsd32_syscalls.c +++ sys/compat/freebsd32/freebsd32_syscalls.c @@ -613,4 +613,9 @@ "__realpathat", /* 574 = __realpathat */ "close_range", /* 575 = close_range */ "rpctls_syscall", /* 576 = rpctls_syscall */ +#ifdef PAD64_REQUIRED + "freebsd32_fzero", /* 577 = freebsd32_fzero */ +#else + "freebsd32_fzero", /* 577 = freebsd32_fzero */ +#endif }; Index: sys/compat/freebsd32/freebsd32_sysent.c =================================================================== --- sys/compat/freebsd32/freebsd32_sysent.c +++ sys/compat/freebsd32/freebsd32_sysent.c @@ -666,4 +666,9 @@ { .sy_narg = AS(__realpathat_args), .sy_call = (sy_call_t *)sys___realpathat, .sy_auevent = AUE_REALPATHAT, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 574 = __realpathat */ { .sy_narg = AS(close_range_args), .sy_call = (sy_call_t *)sys_close_range, .sy_auevent = AUE_CLOSERANGE, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 575 = close_range */ { .sy_narg = AS(rpctls_syscall_args), .sy_call = (sy_call_t *)lkmressys, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_ABSENT }, /* 576 = rpctls_syscall */ +#ifdef PAD64_REQUIRED + { .sy_narg = AS(freebsd32_fzero_args), .sy_call = (sy_call_t *)freebsd32_fzero, .sy_auevent = AUE_FZERO, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 577 = freebsd32_fzero */ +#else + { .sy_narg = AS(freebsd32_fzero_args), .sy_call = (sy_call_t *)freebsd32_fzero, .sy_auevent = AUE_FZERO, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 577 = freebsd32_fzero */ +#endif }; Index: sys/compat/freebsd32/freebsd32_systrace_args.c =================================================================== --- sys/compat/freebsd32/freebsd32_systrace_args.c +++ sys/compat/freebsd32/freebsd32_systrace_args.c @@ -3384,6 +3384,34 @@ *n_args = 2; break; } +#ifdef PAD64_REQUIRED + /* freebsd32_fzero */ + case 577: { + struct freebsd32_fzero_args *p = params; + iarg[0] = p->fd; /* int */ + iarg[1] = p->pad; /* int */ + uarg[2] = p->offset1; /* uint32_t */ + uarg[3] = p->offset2; /* uint32_t */ + uarg[4] = p->len1; /* uint32_t */ + uarg[5] = p->len2; /* uint32_t */ + iarg[6] = p->flags; /* int */ + *n_args = 7; + break; + } +#else + /* freebsd32_fzero */ + case 577: { + struct freebsd32_fzero_args *p = params; + iarg[0] = p->fd; /* int */ + uarg[1] = p->offset1; /* uint32_t */ + uarg[2] = p->offset2; /* uint32_t */ + uarg[3] = p->len1; /* uint32_t */ + uarg[4] = p->len2; /* uint32_t */ + iarg[5] = p->flags; /* int */ + *n_args = 6; + break; + } +#endif default: *n_args = 0; break; @@ -9124,6 +9152,62 @@ break; }; break; +#ifdef PAD64_REQUIRED + /* freebsd32_fzero */ + case 577: + switch(ndx) { + case 0: + p = "int"; + break; + case 1: + p = "int"; + break; + case 2: + p = "uint32_t"; + break; + case 3: + p = "uint32_t"; + break; + case 4: + p = "uint32_t"; + break; + case 5: + p = "uint32_t"; + break; + case 6: + p = "int"; + break; + default: + break; + }; + break; +#else + /* freebsd32_fzero */ + case 577: + switch(ndx) { + case 0: + p = "int"; + break; + case 1: + p = "uint32_t"; + break; + case 2: + p = "uint32_t"; + break; + case 3: + p = "uint32_t"; + break; + case 4: + p = "uint32_t"; + break; + case 5: + p = "int"; + break; + default: + break; + }; + break; +#endif default: break; }; @@ -11028,6 +11112,19 @@ if (ndx == 0 || ndx == 1) p = "int"; break; +#ifdef PAD64_REQUIRED + /* freebsd32_fzero */ + case 577: + if (ndx == 0 || ndx == 1) + p = "off_t"; + break; +#else + /* freebsd32_fzero */ + case 577: + if (ndx == 0 || ndx == 1) + p = "off_t"; + break; +#endif default: break; }; Index: sys/compat/freebsd32/syscalls.master =================================================================== --- sys/compat/freebsd32/syscalls.master +++ sys/compat/freebsd32/syscalls.master @@ -1167,5 +1167,17 @@ ; 576 is initialised by the krpc code, if present. 576 AUE_NULL NOSTD|NOPROTO { int rpctls_syscall(int op, \ const char *path); } +#ifdef PAD64_REQUIRED +577 AUE_FZERO STD { off_t freebsd32_fzero(int fd, \ + int pad, \ + uint32_t offset1, uint32_t offset2,\ + uint32_t len1, uint32_t len2, + int flags); } +#else +577 AUE_FZERO STD { off_t freebsd32_fzero(int fd,\ + uint32_t offset1, uint32_t offset2,\ + uint32_t len1, uint32_t len2, + int flags); } +#endif ; vim: syntax=off Index: sys/contrib/openzfs/module/os/freebsd/zfs/zfs_file_os.c =================================================================== --- sys/contrib/openzfs/module/os/freebsd/zfs/zfs_file_os.c +++ sys/contrib/openzfs/module/os/freebsd/zfs/zfs_file_os.c @@ -158,7 +158,8 @@ rc = fo_read(fp, &auio, td->td_ucred, FOF_OFFSET, td); if (rc) return (SET_ERROR(rc)); - *resid = auio.uio_resid; + if (resid != NULL) + *resid = auio.uio_resid; *offp += count - auio.uio_resid; return (SET_ERROR(0)); } Index: sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops.c =================================================================== --- sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops.c +++ sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops.c @@ -6519,6 +6519,33 @@ return (error); } +static int +zfs_freebsd_deallocate(struct vop_deallocate_args *ap) +{ + vnode_t *vp; + znode_t *zp; + flock64_t fl; + int error; + + vp = ap->a_vp; + zp = vp->v_data; + + if (ap->a_offset < 0 || ap->a_len < 0) + return (EINVAL); + if (ap->a_len == 0) + return (0); + + fl.l_type = F_WRLCK; + fl.l_whence = SEEK_SET; + fl.l_start = ap->a_offset; + fl.l_len = ap->a_len; + if (fl.l_start + fl.l_len < fl.l_start) + fl.l_len = OFF_MAX - fl.l_start; + + error = zfs_space(zp, F_FREESP, &fl, O_RDWR, ap->a_offset, ap->a_cred); + return (error); +} + struct vop_vector zfs_vnodeops; struct vop_vector zfs_fifoops; struct vop_vector zfs_shareops; @@ -6535,6 +6562,7 @@ #endif .vop_access = zfs_freebsd_access, .vop_allocate = VOP_EINVAL, + .vop_deallocate = zfs_freebsd_deallocate, .vop_lookup = zfs_cache_lookup, .vop_cachedlookup = zfs_freebsd_cachedlookup, .vop_getattr = zfs_freebsd_getattr, Index: sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c =================================================================== --- sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c +++ sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c @@ -1445,14 +1445,8 @@ error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, off, len); - if (error == 0) { - /* - * In FreeBSD we cannot free block in the middle of a file, - * but only at the end of a file, so this code path should - * never happen. - */ - vnode_pager_setsize(ZTOV(zp), off); - } + if (error == 0) + vnode_pager_purge_range(ZTOV(zp), off, off + len); zfs_rangelock_exit(lr); Index: sys/kern/capabilities.conf =================================================================== --- sys/kern/capabilities.conf +++ sys/kern/capabilities.conf @@ -513,6 +513,7 @@ posix_fallocate pread preadv +fzero ## ## Allow access to profiling state on the current process. Index: sys/kern/init_sysent.c =================================================================== --- sys/kern/init_sysent.c +++ sys/kern/init_sysent.c @@ -632,4 +632,5 @@ { .sy_narg = AS(__realpathat_args), .sy_call = (sy_call_t *)sys___realpathat, .sy_auevent = AUE_REALPATHAT, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 574 = __realpathat */ { .sy_narg = AS(close_range_args), .sy_call = (sy_call_t *)sys_close_range, .sy_auevent = AUE_CLOSERANGE, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 575 = close_range */ { .sy_narg = AS(rpctls_syscall_args), .sy_call = (sy_call_t *)lkmressys, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_ABSENT }, /* 576 = rpctls_syscall */ + { .sy_narg = AS(fzero_args), .sy_call = (sy_call_t *)sys_fzero, .sy_auevent = AUE_FZERO, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 577 = fzero */ }; Index: sys/kern/sys_generic.c =================================================================== --- sys/kern/sys_generic.c +++ sys/kern/sys_generic.c @@ -860,6 +860,60 @@ return (error); } +int +sys_fzero(struct thread *td, struct fzero_args *uap) +{ + int error; + + error = kern_fzero(td, uap->fd, uap->offset, uap->len, uap->flags); + return (error); +} + +int +kern_fzero(struct thread *td, int fd, off_t offset, off_t len, int flags) +{ + struct file *fp; + int error; + off_t cnt; + int fzero_flags; + + cnt = len; + fzero_flags = 0; + + AUDIT_ARG_FD(fd); + if ((flags & ~FZERO_FL_MASK) != 0) + return (EINVAL); + if (offset < 0 || len <= 0) + return (EINVAL); + + if ((flags & FZERO_FL_KEEP_SIZE) != 0) + fzero_flags |= VN_FZERO_NOEXPAND; + + AUDIT_ARG_FD(fd); + error = fget(td, fd, &cap_pwrite_rights, &fp); + if (error != 0) + return (error); + AUDIT_ARG_FILE(td->td_proc, fp); + if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { + error = ESPIPE; + goto out; + } + if ((fp->f_flag & FWRITE) == 0) { + error = EBADF; + goto out; + } + + error = fo_fzero(fp, offset, &cnt, fzero_flags, td->td_ucred, td); + if (cnt != len && + (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) + error = 0; + len -= cnt; +out: + fdrop(fp, td); + td->td_retval[0] = len; + return (error); +} + int poll_no_poll(int events) { Index: sys/kern/syscalls.c =================================================================== --- sys/kern/syscalls.c +++ sys/kern/syscalls.c @@ -583,4 +583,5 @@ "__realpathat", /* 574 = __realpathat */ "close_range", /* 575 = close_range */ "rpctls_syscall", /* 576 = rpctls_syscall */ + "fzero", /* 577 = fzero */ }; Index: sys/kern/syscalls.master =================================================================== --- sys/kern/syscalls.master +++ sys/kern/syscalls.master @@ -3241,6 +3241,14 @@ _In_z_ const char *path ); } +577 AUE_FZERO STD { + off_t fzero( + int fd, + off_t offset, + off_t len, + int flags + ); + } ; Please copy any additions and changes to the following compatability tables: ; sys/compat/freebsd32/syscalls.master Index: sys/kern/systrace_args.c =================================================================== --- sys/kern/systrace_args.c +++ sys/kern/systrace_args.c @@ -3376,6 +3376,16 @@ *n_args = 2; break; } + /* fzero */ + case 577: { + struct fzero_args *p = params; + iarg[0] = p->fd; /* int */ + iarg[1] = p->offset; /* off_t */ + iarg[2] = p->len; /* off_t */ + iarg[3] = p->flags; /* int */ + *n_args = 4; + break; + } default: *n_args = 0; break; @@ -9029,6 +9039,25 @@ break; }; break; + /* fzero */ + case 577: + switch(ndx) { + case 0: + p = "int"; + break; + case 1: + p = "off_t"; + break; + case 2: + p = "off_t"; + break; + case 3: + p = "int"; + break; + default: + break; + }; + break; default: break; }; @@ -10960,6 +10989,11 @@ if (ndx == 0 || ndx == 1) p = "int"; break; + /* fzero */ + case 577: + if (ndx == 0 || ndx == 1) + p = "off_t"; + break; default: break; }; Index: sys/kern/vfs_default.c =================================================================== --- sys/kern/vfs_default.c +++ sys/kern/vfs_default.c @@ -116,6 +116,7 @@ .vop_advlockasync = vop_stdadvlockasync, .vop_advlockpurge = vop_stdadvlockpurge, .vop_allocate = vop_stdallocate, + .vop_deallocate = VOP_EOPNOTSUPP, .vop_bmap = vop_stdbmap, .vop_close = VOP_NULL, .vop_fsync = VOP_NULL, Index: sys/kern/vfs_vnops.c =================================================================== --- sys/kern/vfs_vnops.c +++ sys/kern/vfs_vnops.c @@ -45,8 +45,6 @@ #include __FBSDID("$FreeBSD$"); -#include "opt_hwpmc_hooks.h" - #include #include #include @@ -105,6 +103,7 @@ static fo_close_t vn_closefile; static fo_mmap_t vn_mmap; static fo_fallocate_t vn_fallocate; +static fo_fzero_t vn_fzero; struct fileops vnops = { .fo_read = vn_io_fault, @@ -122,6 +121,7 @@ .fo_fill_kinfo = vn_fill_kinfo, .fo_mmap = vn_mmap, .fo_fallocate = vn_fallocate, + .fo_fzero = vn_fzero, .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE }; @@ -2325,7 +2325,8 @@ } int -vn_bmap_seekhole(struct vnode *vp, u_long cmd, off_t *off, struct ucred *cred) +vn_bmap_seekhole_locked( + struct vnode *vp, u_long cmd, off_t *off, struct ucred *cred) { struct vattr va; daddr_t bn, bnp; @@ -2333,37 +2334,32 @@ off_t noff; int error; - KASSERT(cmd == FIOSEEKHOLE || cmd == FIOSEEKDATA, - ("Wrong command %lu", cmd)); - - if (vn_lock(vp, LK_SHARED) != 0) - return (EBADF); if (vp->v_type != VREG) { error = ENOTTY; - goto unlock; + goto out; } error = VOP_GETATTR(vp, &va, cred); if (error != 0) - goto unlock; + goto out; noff = *off; if (noff >= va.va_size) { error = ENXIO; - goto unlock; + goto out; } bsize = vp->v_mount->mnt_stat.f_iosize; - for (bn = noff / bsize; noff < va.va_size; bn++, noff += bsize - - noff % bsize) { + for (bn = noff / bsize; noff < va.va_size; + bn++, noff += bsize - noff % bsize) { error = VOP_BMAP(vp, bn, NULL, &bnp, NULL, NULL); if (error == EOPNOTSUPP) { error = ENOTTY; - goto unlock; + goto out; } if ((bnp == -1 && cmd == FIOSEEKHOLE) || (bnp != -1 && cmd == FIOSEEKDATA)) { noff = bn * bsize; if (noff < *off) noff = *off; - goto unlock; + goto out; } } if (noff > va.va_size) @@ -2371,13 +2367,27 @@ /* noff == va.va_size. There is an implicit hole at the end of file. */ if (cmd == FIOSEEKDATA) error = ENXIO; -unlock: - VOP_UNLOCK(vp); +out: if (error == 0) *off = noff; return (error); } +int +vn_bmap_seekhole(struct vnode *vp, u_long cmd, off_t *off, struct ucred *cred) +{ + int error; + + KASSERT(cmd == FIOSEEKHOLE || cmd == FIOSEEKDATA, + ("Wrong command %lu", cmd)); + + if (vn_lock(vp, LK_SHARED) != 0) + return (EBADF); + error = vn_bmap_seekhole_locked(vp, cmd, off, cred); + VOP_UNLOCK(vp); + return (error); +} + int vn_seek(struct file *fp, off_t offset, int whence, struct thread *td) { @@ -3318,3 +3328,205 @@ return (error); } + +static int +vp_zerofill(struct vnode *vp, struct vattr *vap, off_t offset, off_t len, + off_t *residp, struct ucred *cred) +{ + int iosize; + int error = EOVERFLOW; + void *buf = NULL; + struct iovec aiov; + struct uio auio; + struct thread *td; + + iosize = vap->va_blocksize; + td = curthread; + + if (iosize == 0) + iosize = BLKDEV_IOSIZE; + if (iosize > MAXPHYS) + iosize = MAXPHYS; + buf = malloc(iosize, M_TEMP, M_ZERO | M_WAITOK); + + while (len > 0) { + int xfersize = iosize; + if (offset % iosize != 0) + xfersize -= offset % iosize; + if (xfersize > len) + xfersize = len; + + aiov.iov_base = buf; + aiov.iov_len = xfersize; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_offset = offset; + auio.uio_resid = xfersize; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_rw = UIO_WRITE; + auio.uio_td = td; + + error = VOP_WRITE(vp, &auio, 0, cred); + if (error != 0) { + len -= xfersize - auio.uio_resid; + break; + } + + len -= xfersize; + offset += xfersize; + } + + free(buf, M_TEMP); + *residp = len; + return (error); +} + +static int +vn_generic_zero_locked(struct vnode *vp, off_t offset, off_t *residp, + struct ucred *cred) +{ + int error; + struct vattr va; + off_t resid; + + resid = *residp; + + if (offset + resid < offset) + return (EOVERFLOW); + if (resid == 0) + return (0); + + error = VOP_GETATTR(vp, &va, cred); + if (error) + return (error); + if (va.va_size > OFF_MAX) + return (EOVERFLOW); + + /* + * Try to upgrade our locks to exclusive lock as we are now writing and + * it is possible to extend. + */ + if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) + vn_lock(vp, LK_UPGRADE | LK_RETRY); + + while (resid > 0) { + off_t noff; + off_t xfersize; + off_t rem; + + noff = offset; + + error = vn_bmap_seekhole_locked(vp, FIOSEEKDATA, &noff, cred); + if (error) { + if (error == ENXIO) { + /* No more data region to be filled */ + error = vn_truncate_locked( + vp, offset + resid, false, cred); + if (error) + goto out; + offset += resid; + resid = 0; + break; + } + /* XXX: Is it okay to fallback further? */ + goto out; + } + KASSERT(noff >= offset, ("FIOSEEKDATA going backward")); + if (noff != offset) { + xfersize = omin(noff - offset, resid); + resid -= xfersize; + offset += xfersize; + continue; + } + error = vn_bmap_seekhole_locked(vp, FIOSEEKHOLE, &noff, cred); + if (error) + goto out; + + xfersize = noff - offset; + if (xfersize > resid) + xfersize = resid; + + /* Fill zeroes */ + error = vp_zerofill(vp, &va, offset, xfersize, &rem, cred); + if (error) { + resid -= xfersize - rem; + offset += xfersize - rem; + goto out; + } + + resid -= xfersize; + offset += xfersize; + } +out: + if (*residp != resid && + (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) + error = 0; + else if (error) + vn_truncate_locked(vp, va.va_size, false, cred); + *residp = resid; + return (error); +} + +static int +vn_fzero(struct file *fp, off_t offset, off_t *residp, int flags, + struct ucred *active_cred, struct thread *td) +{ + int error; + struct mount *mp; + struct vnode *vp; + int lock_flags; + void *rl_cookie = NULL; + off_t resid; + + vp = fp->f_vnode; + resid = *residp; + + if ((flags & ~VN_FZERO_MASK) != 0) + return (EINVAL); + + if (offset + resid < offset) + return (EOVERFLOW); + + if (vp->v_type != VREG) + return (ENODEV); + + rl_cookie = vn_rangelock_wlock(vp, offset, offset + resid); + if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) + goto out; + + if (MNT_SHARED_WRITES(mp) || + (mp == NULL && MNT_SHARED_WRITES(vp->v_mount))) { + lock_flags = LK_SHARED; + } else { + lock_flags = LK_EXCLUSIVE; + } + vn_lock(vp, lock_flags | LK_RETRY); + + if (flags & VN_FZERO_NOEXPAND) { + struct vattr va; + + error = VOP_GETATTR(vp, &va, active_cred); + if (error) + goto unlock; + if (va.va_size > OFF_MAX) { + error = EOVERFLOW; + goto unlock; + } + if (offset + resid > (off_t)va.va_size) + resid = (off_t)va.va_size - offset; + } + error = VOP_DEALLOCATE(vp, offset, resid, active_cred); + if (error == 0) + resid = 0; + else if (error == EOPNOTSUPP) + error = vn_generic_zero_locked( + vp, offset, &resid, active_cred); +unlock: + VOP_UNLOCK(vp); + vn_finished_write(mp); + +out: + *residp = resid; + vn_rangelock_unlock(vp, rl_cookie); + return (error); +} Index: sys/kern/vnode_if.src =================================================================== --- sys/kern/vnode_if.src +++ sys/kern/vnode_if.src @@ -782,6 +782,16 @@ }; +%% deallocate vp L L L + +vop_deallocate { + IN struct vnode *vp; + IN off_t offset; + IN off_t len; + IN struct ucred *cred; +}; + + # The VOPs below are spares at the end of the table to allow new VOPs to be # added in stable branches without breaking the KBI. New VOPs in HEAD should # be added above these spares. When merging a new VOP to a stable branch, Index: sys/security/audit/audit_bsm.c =================================================================== --- sys/security/audit/audit_bsm.c +++ sys/security/audit/audit_bsm.c @@ -1030,6 +1030,7 @@ case AUE_LSEEK: case AUE_POLL: case AUE_POSIX_FALLOCATE: + case AUE_FZERO: case AUE_PREAD: case AUE_PWRITE: case AUE_READ: Index: sys/sys/fcntl.h =================================================================== --- sys/sys/fcntl.h +++ sys/sys/fcntl.h @@ -342,6 +342,9 @@ #define FD_NONE -200 #endif +#define FZERO_FL_KEEP_SIZE 0x0001 /* File size should be kept the same */ +#define FZERO_FL_MASK (FZERO_FL_KEEP_SIZE) + #ifndef _KERNEL __BEGIN_DECLS int open(const char *, int, ...); @@ -357,6 +360,7 @@ int posix_fadvise(int, off_t, off_t, int); int posix_fallocate(int, off_t, off_t); #endif +off_t fzero(int, off_t, off_t, int); __END_DECLS #endif Index: sys/sys/file.h =================================================================== --- sys/sys/file.h +++ sys/sys/file.h @@ -129,6 +129,8 @@ typedef int fo_get_seals_t(struct file *fp, int *flags); typedef int fo_fallocate_t(struct file *fp, off_t offset, off_t len, struct thread *td); +typedef int fo_fzero_t(struct file *fp, off_t offset, off_t *residp, int mode, + struct ucred *active_cred, struct thread *td); typedef int fo_flags_t; struct fileops { @@ -150,6 +152,7 @@ fo_add_seals_t *fo_add_seals; fo_get_seals_t *fo_get_seals; fo_fallocate_t *fo_fallocate; + fo_fzero_t *fo_fzero; fo_flags_t fo_flags; /* DFLAG_* below */ }; @@ -459,6 +462,17 @@ return ((*fp->f_ops->fo_fallocate)(fp, offset, len, td)); } +static __inline int +fo_fzero(struct file *fp, off_t offset, off_t *residp, int flags, + struct ucred *active_cred, struct thread *td) +{ + + if (fp->f_ops->fo_fzero == NULL) + return (ENODEV); + return ( + (*fp->f_ops->fo_fzero)(fp, offset, residp, flags, active_cred, td)); +} + #endif /* _KERNEL */ #endif /* !SYS_FILE_H */ Index: sys/sys/syscall.h =================================================================== --- sys/sys/syscall.h +++ sys/sys/syscall.h @@ -512,4 +512,5 @@ #define SYS___realpathat 574 #define SYS_close_range 575 #define SYS_rpctls_syscall 576 -#define SYS_MAXSYSCALL 577 +#define SYS_fzero 577 +#define SYS_MAXSYSCALL 578 Index: sys/sys/syscall.mk =================================================================== --- sys/sys/syscall.mk +++ sys/sys/syscall.mk @@ -417,4 +417,5 @@ sigfastblock.o \ __realpathat.o \ close_range.o \ - rpctls_syscall.o + rpctls_syscall.o \ + fzero.o Index: sys/sys/syscallsubr.h =================================================================== --- sys/sys/syscallsubr.h +++ sys/sys/syscallsubr.h @@ -227,6 +227,8 @@ int advice); int kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len); +int kern_fzero(struct thread *td, int fd, off_t offset, off_t len, + int flags); int kern_procctl(struct thread *td, enum idtype idtype, id_t id, int com, void *data); int kern_pread(struct thread *td, int fd, void *buf, size_t nbyte, Index: sys/sys/sysproto.h =================================================================== --- sys/sys/sysproto.h +++ sys/sys/sysproto.h @@ -1836,6 +1836,12 @@ char op_l_[PADL_(int)]; int op; char op_r_[PADR_(int)]; char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; }; +struct fzero_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char offset_l_[PADL_(off_t)]; off_t offset; char offset_r_[PADR_(off_t)]; + char len_l_[PADL_(off_t)]; off_t len; char len_r_[PADR_(off_t)]; + char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; +}; int nosys(struct thread *, struct nosys_args *); void sys_sys_exit(struct thread *, struct sys_exit_args *); int sys_fork(struct thread *, struct fork_args *); @@ -2227,6 +2233,7 @@ int sys___realpathat(struct thread *, struct __realpathat_args *); int sys_close_range(struct thread *, struct close_range_args *); int sys_rpctls_syscall(struct thread *, struct rpctls_syscall_args *); +int sys_fzero(struct thread *, struct fzero_args *); #ifdef COMPAT_43 @@ -3158,6 +3165,7 @@ #define SYS_AUE___realpathat AUE_REALPATHAT #define SYS_AUE_close_range AUE_CLOSERANGE #define SYS_AUE_rpctls_syscall AUE_NULL +#define SYS_AUE_fzero AUE_FZERO #undef PAD_ #undef PADL_ Index: sys/sys/vnode.h =================================================================== --- sys/sys/vnode.h +++ sys/sys/vnode.h @@ -605,6 +605,10 @@ #define VN_OPEN_NAMECACHE 0x00000004 #define VN_OPEN_INVFS 0x00000008 +/* vn_fzero flags */ +#define VN_FZERO_NOEXPAND 0x00000001 /* File size is kept the same */ +#define VN_FZERO_MASK (VN_FZERO_NOEXPAND) + /* * Public vnode manipulation functions. */ @@ -709,6 +713,9 @@ void vn_printf(struct vnode *vp, const char *fmt, ...) __printflike(2,3); int vrecycle(struct vnode *vp); int vrecyclel(struct vnode *vp); +/* vn_bmap_seekhole_locked is not part of the KPI */ +int vn_bmap_seekhole_locked( + struct vnode *vp, u_long cmd, off_t *off, struct ucred *cred); int vn_bmap_seekhole(struct vnode *vp, u_long cmd, off_t *off, struct ucred *cred); int vn_close(struct vnode *vp, Index: usr.sbin/bhyve/block_if.c =================================================================== --- usr.sbin/bhyve/block_if.c +++ usr.sbin/bhyve/block_if.c @@ -332,7 +332,8 @@ else br->br_resid = 0; } else - err = EOPNOTSUPP; + err = fzero(bc->bc_fd, br->br_offset, br->br_resid, + FZERO_FL_KEEP_SIZE); break; default: err = EINVAL; @@ -538,8 +539,12 @@ candelete = arg.value.i; if (ioctl(fd, DIOCGPROVIDERNAME, name) == 0) geom = 1; - } else + } else { psectsz = sbuf.st_blksize; + /* XXX: Do we need to check if we are on fzero-capable FS? */ + if (nodelete == 0) + candelete = 1; + } #ifndef WITHOUT_CAPSICUM if (caph_ioctls_limit(fd, cmds, nitems(cmds)) == -1)