Page MenuHomeFreeBSD

D28347.id83416.diff
No OneTemporary

D28347.id83416.diff

Index: lib/libc/sys/Symbol.map
===================================================================
--- lib/libc/sys/Symbol.map
+++ lib/libc/sys/Symbol.map
@@ -410,6 +410,7 @@
fhlink;
fhlinkat;
fhreadlink;
+ fspacectl;
getfhat;
funlinkat;
memfd_create;
Index: sys/bsm/audit_kevents.h
===================================================================
--- sys/bsm/audit_kevents.h
+++ sys/bsm/audit_kevents.h
@@ -662,6 +662,7 @@
#define AUE_SPECIALFD 43266 /* FreeBSD-specific. */
#define AUE_AIO_WRITEV 43267 /* FreeBSD-specific. */
#define AUE_AIO_READV 43268 /* FreeBSD-specific. */
+#define AUE_FSPACECTL 43269 /* FreeBSD-specific. */
/*
* Darwin BSM uses a number of AUE_O_* definitions, which are aliased to the
Index: sys/compat/freebsd32/freebsd32_misc.c
===================================================================
--- sys/compat/freebsd32/freebsd32_misc.c
+++ sys/compat/freebsd32/freebsd32_misc.c
@@ -3569,6 +3569,20 @@
return (kern_posix_error(td, error));
}
+int
+freebsd32_fspacectl(struct thread *td, struct freebsd32_fspacectl_args *uap)
+{
+ int error;
+ struct spacectl_range range;
+
+ error = copyin(uap->range, &range, sizeof(range));
+ if (error != 0)
+ return (error);
+
+ error = kern_fspacectl(td, uap->fd, uap->cmd, uap->range, uap->flags);
+ return (error);
+}
+
int
freebsd32_posix_fadvise(struct thread *td,
struct freebsd32_posix_fadvise_args *uap)
Index: sys/compat/freebsd32/freebsd32_proto.h
===================================================================
--- sys/compat/freebsd32/freebsd32_proto.h
+++ sys/compat/freebsd32/freebsd32_proto.h
@@ -751,6 +751,12 @@
struct freebsd32_aio_readv_args {
char aiocbp_l_[PADL_(struct aiocb32 *)]; struct aiocb32 * aiocbp; char aiocbp_r_[PADR_(struct aiocb32 *)];
};
+struct freebsd32_fspacectl_args {
+ char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
+ char cmd_l_[PADL_(int)]; int cmd; char cmd_r_[PADR_(int)];
+ char range_l_[PADL_(struct spacectl_range *)]; struct spacectl_range * range; char range_r_[PADR_(struct spacectl_range *)];
+ char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
+};
#if !defined(PAD64_REQUIRED) && !defined(__amd64__)
#define PAD64_REQUIRED
#endif
@@ -893,6 +899,7 @@
int freebsd32___sysctlbyname(struct thread *, struct freebsd32___sysctlbyname_args *);
int freebsd32_aio_writev(struct thread *, struct freebsd32_aio_writev_args *);
int freebsd32_aio_readv(struct thread *, struct freebsd32_aio_readv_args *);
+int freebsd32_fspacectl(struct thread *, struct freebsd32_fspacectl_args *);
#ifdef COMPAT_43
@@ -1476,6 +1483,7 @@
#define FREEBSD32_SYS_AUE_freebsd32___sysctlbyname AUE_SYSCTL
#define FREEBSD32_SYS_AUE_freebsd32_aio_writev AUE_AIO_WRITEV
#define FREEBSD32_SYS_AUE_freebsd32_aio_readv AUE_AIO_READV
+#define FREEBSD32_SYS_AUE_freebsd32_fspacectl AUE_FSPACECTL
#undef PAD_
#undef PADL_
Index: sys/compat/freebsd32/freebsd32_syscall.h
===================================================================
--- sys/compat/freebsd32/freebsd32_syscall.h
+++ sys/compat/freebsd32/freebsd32_syscall.h
@@ -506,4 +506,5 @@
#define FREEBSD32_SYS___specialfd 577
#define FREEBSD32_SYS_freebsd32_aio_writev 578
#define FREEBSD32_SYS_freebsd32_aio_readv 579
-#define FREEBSD32_SYS_MAXSYSCALL 580
+#define FREEBSD32_SYS_freebsd32_fspacectl 580
+#define FREEBSD32_SYS_MAXSYSCALL 581
Index: sys/compat/freebsd32/freebsd32_syscalls.c
===================================================================
--- sys/compat/freebsd32/freebsd32_syscalls.c
+++ sys/compat/freebsd32/freebsd32_syscalls.c
@@ -616,4 +616,5 @@
"__specialfd", /* 577 = __specialfd */
"freebsd32_aio_writev", /* 578 = freebsd32_aio_writev */
"freebsd32_aio_readv", /* 579 = freebsd32_aio_readv */
+ "freebsd32_fspacectl", /* 580 = freebsd32_fspacectl */
};
Index: sys/compat/freebsd32/freebsd32_sysent.c
===================================================================
--- sys/compat/freebsd32/freebsd32_sysent.c
+++ sys/compat/freebsd32/freebsd32_sysent.c
@@ -669,4 +669,5 @@
{ .sy_narg = AS(__specialfd_args), .sy_call = (sy_call_t *)sys___specialfd, .sy_auevent = AUE_SPECIALFD, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 577 = __specialfd */
{ .sy_narg = AS(freebsd32_aio_writev_args), .sy_call = (sy_call_t *)freebsd32_aio_writev, .sy_auevent = AUE_AIO_WRITEV, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 578 = freebsd32_aio_writev */
{ .sy_narg = AS(freebsd32_aio_readv_args), .sy_call = (sy_call_t *)freebsd32_aio_readv, .sy_auevent = AUE_AIO_READV, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 579 = freebsd32_aio_readv */
+ { .sy_narg = AS(freebsd32_fspacectl_args), .sy_call = (sy_call_t *)freebsd32_fspacectl, .sy_auevent = AUE_FSPACECTL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 580 = freebsd32_fspacectl */
};
Index: sys/compat/freebsd32/freebsd32_systrace_args.c
===================================================================
--- sys/compat/freebsd32/freebsd32_systrace_args.c
+++ sys/compat/freebsd32/freebsd32_systrace_args.c
@@ -3407,6 +3407,16 @@
*n_args = 1;
break;
}
+ /* freebsd32_fspacectl */
+ case 580: {
+ struct freebsd32_fspacectl_args *p = params;
+ iarg[0] = p->fd; /* int */
+ iarg[1] = p->cmd; /* int */
+ uarg[2] = (intptr_t) p->range; /* struct spacectl_range * */
+ iarg[3] = p->flags; /* int */
+ *n_args = 4;
+ break;
+ }
default:
*n_args = 0;
break;
@@ -9183,6 +9193,25 @@
break;
};
break;
+ /* freebsd32_fspacectl */
+ case 580:
+ switch(ndx) {
+ case 0:
+ p = "int";
+ break;
+ case 1:
+ p = "int";
+ break;
+ case 2:
+ p = "userland struct spacectl_range *";
+ break;
+ case 3:
+ p = "int";
+ break;
+ default:
+ break;
+ };
+ break;
default:
break;
};
@@ -11102,6 +11131,11 @@
if (ndx == 0 || ndx == 1)
p = "int";
break;
+ /* freebsd32_fspacectl */
+ case 580:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
default:
break;
};
Index: sys/compat/freebsd32/syscalls.master
===================================================================
--- sys/compat/freebsd32/syscalls.master
+++ sys/compat/freebsd32/syscalls.master
@@ -1174,5 +1174,9 @@
struct aiocb32 *aiocbp); }
579 AUE_AIO_READV STD { int freebsd32_aio_readv( \
struct aiocb32 *aiocbp); }
+580 AUE_FSPACECTL STD { int freebsd32_fspacectl(int fd, \
+ int cmd, \
+ struct spacectl_range *range,\
+ int flags); }
; vim: syntax=off
Index: sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c
===================================================================
--- sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c
+++ sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c
@@ -3797,6 +3797,49 @@
return (error);
}
+/*
+ * common code for zfs_space-related operations
+ *
+ * This function is called from zfs_space and zfs_deallocate.
+ */
+static int
+zfs_space_common(znode_t *zp, int cmd, off_t off, off_t len, int flag,
+ cred_t *cr)
+{
+ zfsvfs_t *zfsvfs = ZTOZSB(zp);
+ uint64_t uoff, ulen;
+ int error;
+
+ if (cmd != F_FREESP)
+ return (EINVAL);
+
+ /*
+ * Callers might not be able to detect properly that we are read-only,
+ * so check it explicitly here.
+ */
+ if (zfs_is_readonly(zfsvfs))
+ return (EROFS);
+
+ if (len < 0)
+ return (EINVAL);
+
+ /*
+ * Permissions aren't checked on Solaris because on this OS
+ * zfs_space() can only be called with an opened file handle.
+ * On Linux we can get here through truncate_range() which
+ * operates directly on inodes, so we need to check access rights.
+ */
+ if ((error = zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr)))
+ return (error);
+
+ uoff = off;
+ ulen = len;
+
+ error = zfs_freesp(zp, uoff, ulen, flag, TRUE);
+
+ return (error);
+}
+
/*
* Free or allocate space in a file. Currently, this function only
* supports the `F_FREESP' command. However, this command is somewhat
@@ -3821,47 +3864,16 @@
offset_t offset, cred_t *cr)
{
zfsvfs_t *zfsvfs = ZTOZSB(zp);
- uint64_t off, len;
+ off_t off, len;
int error;
ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(zp);
- if (cmd != F_FREESP) {
- ZFS_EXIT(zfsvfs);
- return (SET_ERROR(EINVAL));
- }
-
- /*
- * Callers might not be able to detect properly that we are read-only,
- * so check it explicitly here.
- */
- if (zfs_is_readonly(zfsvfs)) {
- ZFS_EXIT(zfsvfs);
- return (SET_ERROR(EROFS));
- }
-
- if (bfp->l_len < 0) {
- ZFS_EXIT(zfsvfs);
- return (SET_ERROR(EINVAL));
- }
-
- /*
- * Permissions aren't checked on Solaris because on this OS
- * zfs_space() can only be called with an opened file handle.
- * On Linux we can get here through truncate_range() which
- * operates directly on inodes, so we need to check access rights.
- */
- if ((error = zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr))) {
- ZFS_EXIT(zfsvfs);
- return (error);
- }
-
off = bfp->l_start;
len = bfp->l_len; /* 0 means from off to end of file */
- error = zfs_freesp(zp, off, len, flag, TRUE);
-
+ error = zfs_space_common(zp, cmd, off, len, flag, cr);
ZFS_EXIT(zfsvfs);
return (error);
}
@@ -5189,6 +5201,9 @@
case _PC_NAME_MAX:
*ap->a_retval = NAME_MAX;
return (0);
+ case _PC_FDEALLOC_PRESENT:
+ *ap->a_retval = 1;
+ return (0);
case _PC_PIPE_BUF:
if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO) {
*ap->a_retval = PIPE_BUF;
@@ -5781,6 +5796,51 @@
return (error);
}
+/*
+ * This is mostly the same as zfs_space except it limits the range of operation
+ * to zp->z_size, so the log record of zfs_space during replay would be reused.
+ *
+ * File size is supposed to grow only under vnode shared lock, even though we do
+ * not grab rangelock on the znode.
+ */
+static int
+zfs_deallocate(struct vop_deallocate_args *ap)
+{
+ vnode_t *vp;
+ znode_t *zp;
+ zfsvfs_t *zfsvfs;
+ off_t offset, len;
+ uint64_t file_sz;
+ int error;
+
+ vp = ap->a_vp;
+ zp = VTOZ(vp);
+ zfsvfs = ZTOZSB(zp);
+
+ if (ap->a_offset < 0 || ap->a_len < 0)
+ return (EINVAL);
+
+ ZFS_ENTER(zfsvfs);
+ ZFS_VERIFY_ZP(zp);
+
+ offset = *ap->a_offset;
+ len = *ap->a_len;
+
+ file_sz = zp->z_size;
+ if ((uint64_t)offset + len > file_sz)
+ len = file_sz - offset;
+ if (len <= 0) {
+ error = 0;
+ goto out;
+ }
+ error = zfs_space_common(zp, F_FREESP, offset, len, O_RDWR, ap->a_cred);
+out:
+ if (__predict_true(error == 0))
+ *ap->a_len = 0;
+ ZFS_EXIT(zfsvfs);
+ return (error);
+}
+
struct vop_vector zfs_vnodeops;
struct vop_vector zfs_fifoops;
struct vop_vector zfs_shareops;
@@ -5798,6 +5858,7 @@
.vop_fplookup_symlink = zfs_freebsd_fplookup_symlink,
.vop_access = zfs_freebsd_access,
.vop_allocate = VOP_EINVAL,
+ .vop_deallocate = zfs_deallocate,
.vop_lookup = zfs_cache_lookup,
.vop_cachedlookup = zfs_freebsd_cachedlookup,
.vop_getattr = zfs_freebsd_getattr,
Index: sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c
===================================================================
--- sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c
+++ sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c
@@ -1449,14 +1449,8 @@
error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, off, len);
- if (error == 0) {
- /*
- * In FreeBSD we cannot free block in the middle of a file,
- * but only at the end of a file, so this code path should
- * never happen.
- */
- vnode_pager_setsize(ZTOV(zp), off);
- }
+ if (error == 0)
+ vnode_pager_purge_range(ZTOV(zp), off, off + len);
zfs_rangelock_exit(lr);
Index: sys/kern/init_sysent.c
===================================================================
--- sys/kern/init_sysent.c
+++ sys/kern/init_sysent.c
@@ -635,4 +635,5 @@
{ .sy_narg = AS(__specialfd_args), .sy_call = (sy_call_t *)sys___specialfd, .sy_auevent = AUE_SPECIALFD, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 577 = __specialfd */
{ .sy_narg = AS(aio_writev_args), .sy_call = (sy_call_t *)sys_aio_writev, .sy_auevent = AUE_AIO_WRITEV, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 578 = aio_writev */
{ .sy_narg = AS(aio_readv_args), .sy_call = (sy_call_t *)sys_aio_readv, .sy_auevent = AUE_AIO_READV, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 579 = aio_readv */
+ { .sy_narg = AS(fspacectl_args), .sy_call = (sy_call_t *)sys_fspacectl, .sy_auevent = AUE_FSPACECTL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 580 = fspacectl */
};
Index: sys/kern/sys_generic.c
===================================================================
--- sys/kern/sys_generic.c
+++ sys/kern/sys_generic.c
@@ -861,6 +861,59 @@
return (error);
}
+int
+sys_fspacectl(struct thread *td, struct fspacectl_args *uap)
+{
+ struct spacectl_range range;
+ int error;
+
+ error = copyin(uap->range, &range, sizeof(range));
+ if (error != 0)
+ return (error);
+
+ error = kern_fspacectl(td, uap->fd, uap->cmd, &range, uap->flags);
+ return (error);
+}
+
+int
+kern_fspacectl(struct thread *td, int fd, int cmd, struct spacectl_range *range,
+ int flags)
+{
+ struct file *fp;
+ off_t offset;
+ int error;
+
+ offset = range->r_resid;
+
+ AUDIT_ARG_FD(fd);
+ AUDIT_ARG_CMD(cmd);
+ AUDIT_ARG_FFLAGS(flags);
+
+ if ((cmd != SPACECTL_DEALLOC) ||
+ (range->r_offset < 0 || range->r_resid < 0) ||
+ (flags & ~SPACECTL_F_SUPPORTED))
+ return (EINVAL);
+
+ error = fget(td, fd, &cap_pwrite_rights, &fp);
+ if (error != 0)
+ return (error);
+ AUDIT_ARG_FILE(td->td_proc, fp);
+ if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) {
+ error = ESPIPE;
+ goto out;
+ }
+ if ((fp->f_flag & FWRITE) == 0) {
+ error = EBADF;
+ goto out;
+ }
+
+ error = fo_fspacectl(fp, cmd, range->r_offset, range->r_resid, flags,
+ td->td_ucred, td);
+out:
+ fdrop(fp, td);
+ return (error);
+}
+
int
kern_specialfd(struct thread *td, int type, void *arg)
{
Index: sys/kern/syscalls.c
===================================================================
--- sys/kern/syscalls.c
+++ sys/kern/syscalls.c
@@ -586,4 +586,5 @@
"__specialfd", /* 577 = __specialfd */
"aio_writev", /* 578 = aio_writev */
"aio_readv", /* 579 = aio_readv */
+ "fspacectl", /* 580 = fspacectl */
};
Index: sys/kern/syscalls.master
===================================================================
--- sys/kern/syscalls.master
+++ sys/kern/syscalls.master
@@ -3238,6 +3238,14 @@
_Inout_ struct aiocb *aiocbp
);
}
+580 AUE_FSPACECTL STD {
+ int fspacectl(
+ int fd,
+ int cmd,
+ _In_ struct spacectl_range *range,
+ int flags
+ );
+ }
; Please copy any additions and changes to the following compatability tables:
; sys/compat/freebsd32/syscalls.master
Index: sys/kern/systrace_args.c
===================================================================
--- sys/kern/systrace_args.c
+++ sys/kern/systrace_args.c
@@ -3399,6 +3399,16 @@
*n_args = 1;
break;
}
+ /* fspacectl */
+ case 580: {
+ struct fspacectl_args *p = params;
+ iarg[0] = p->fd; /* int */
+ iarg[1] = p->cmd; /* int */
+ uarg[2] = (intptr_t) p->range; /* struct spacectl_range * */
+ iarg[3] = p->flags; /* int */
+ *n_args = 4;
+ break;
+ }
default:
*n_args = 0;
break;
@@ -9088,6 +9098,25 @@
break;
};
break;
+ /* fspacectl */
+ case 580:
+ switch(ndx) {
+ case 0:
+ p = "int";
+ break;
+ case 1:
+ p = "int";
+ break;
+ case 2:
+ p = "userland struct spacectl_range *";
+ break;
+ case 3:
+ p = "int";
+ break;
+ default:
+ break;
+ };
+ break;
default:
break;
};
@@ -11034,6 +11063,11 @@
if (ndx == 0 || ndx == 1)
p = "int";
break;
+ /* fspacectl */
+ case 580:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
default:
break;
};
Index: sys/kern/vfs_default.c
===================================================================
--- sys/kern/vfs_default.c
+++ sys/kern/vfs_default.c
@@ -92,6 +92,7 @@
static int vop_stdgetpages_async(struct vop_getpages_async_args *ap);
static int vop_stdread_pgcache(struct vop_read_pgcache_args *ap);
static int vop_stdstat(struct vop_stat_args *ap);
+static int vop_stddeallocate(struct vop_deallocate_args *ap);
/*
* This vnode table stores what we want to do if the filesystem doesn't
@@ -116,6 +117,7 @@
.vop_advlockasync = vop_stdadvlockasync,
.vop_advlockpurge = vop_stdadvlockpurge,
.vop_allocate = vop_stdallocate,
+ .vop_deallocate = vop_stddeallocate,
.vop_bmap = vop_stdbmap,
.vop_close = VOP_NULL,
.vop_fsync = VOP_NULL,
@@ -501,6 +503,7 @@
case _PC_ACL_EXTENDED:
case _PC_ACL_NFS4:
case _PC_CAP_PRESENT:
+ case _PC_FDEALLOC_PRESENT:
case _PC_INF_PRESENT:
case _PC_MAC_PRESENT:
*ap->a_retval = 0;
@@ -1075,6 +1078,148 @@
return (error);
}
+static int
+vp_zerofill(struct vnode *vp, struct vattr *vap, off_t offset, off_t len,
+ off_t *residp, struct ucred *cred)
+{
+ int iosize;
+ int error = 0;
+ void *buf = NULL;
+ struct iovec aiov;
+ struct uio auio;
+ struct thread *td;
+
+ iosize = vap->va_blocksize;
+ td = curthread;
+
+ if (iosize == 0)
+ iosize = BLKDEV_IOSIZE;
+ if (iosize > MAXPHYS)
+ iosize = MAXPHYS;
+ buf = malloc(iosize, M_TEMP, M_ZERO | M_WAITOK);
+
+ while (len > 0) {
+ int xfersize = iosize;
+ if (offset % iosize != 0)
+ xfersize -= offset % iosize;
+ if (xfersize > len)
+ xfersize = len;
+
+ if (error != EOPNOTSUPP)
+ break;
+
+ aiov.iov_base = buf;
+ aiov.iov_len = xfersize;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_offset = offset;
+ auio.uio_resid = xfersize;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_rw = UIO_WRITE;
+ auio.uio_td = td;
+
+ error = VOP_WRITE(vp, &auio, 0, cred);
+ if (error != 0) {
+ len -= xfersize - auio.uio_resid;
+ break;
+ }
+
+ len -= xfersize;
+ offset += xfersize;
+
+ if (should_yield())
+ break;
+ }
+
+ free(buf, M_TEMP);
+ *residp = len;
+ return (error);
+}
+
+static int
+vop_stddeallocate(struct vop_deallocate_args *ap)
+{
+ struct vnode *vp;
+ off_t offset, resid;
+ struct ucred *cred;
+ int error;
+ struct vattr va;
+
+ vp = ap->a_vp;
+ offset = *ap->a_offset;
+ resid = *ap->a_len;
+ cred = ap->a_cred;
+
+ if ((offset < 0 || resid < 0) ||
+ (ap->a_flags & ~SPACECTL_F_SUPPORTED))
+ return (EINVAL);
+ if (ap->a_flags & SPACECTL_F_ATOMIC)
+ return (EOPNOTSUPP);
+
+ error = VOP_GETATTR(vp, &va, cred);
+ if (error)
+ return (error);
+
+ if ((uint64_t)offset + resid > va.va_size)
+ resid = va.va_size - offset;
+
+ while (resid > 0) {
+ off_t noff;
+ off_t xfersize;
+ off_t rem;
+
+ noff = offset;
+
+ error = vn_bmap_seekhole_locked(vp, FIOSEEKDATA, &noff, cred);
+ if (error) {
+ if (error == ENXIO) {
+ /* No more data region to be filled */
+ error = vn_truncate_locked(
+ vp, offset + resid, false, cred);
+ if (error)
+ goto out;
+ offset += resid;
+ resid = 0;
+ break;
+ }
+ /* XXX: Is it okay to fallback further? */
+ goto out;
+ }
+ KASSERT(noff >= offset, ("FIOSEEKDATA going backward"));
+ if (noff != offset) {
+ xfersize = omin(noff - offset, resid);
+ resid -= xfersize;
+ offset += xfersize;
+ continue;
+ }
+ error = vn_bmap_seekhole_locked(vp, FIOSEEKHOLE, &noff, cred);
+ if (error)
+ goto out;
+
+ xfersize = noff - offset;
+ if (xfersize > resid)
+ xfersize = resid;
+
+ /* Fill zeroes */
+ error = vp_zerofill(vp, &va, offset, xfersize, &rem, cred);
+ if (error) {
+ resid -= xfersize - rem;
+ offset += xfersize - rem;
+ goto out;
+ }
+
+ resid -= xfersize;
+ offset += xfersize;
+ }
+
+out:
+ if (*ap->a_offset != offset) {
+ *ap->a_offset = offset;
+ *ap->a_len = resid;
+ }
+ return (error);
+}
+
int
vop_stdadvise(struct vop_advise_args *ap)
{
Index: sys/kern/vfs_vnops.c
===================================================================
--- sys/kern/vfs_vnops.c
+++ sys/kern/vfs_vnops.c
@@ -106,6 +106,7 @@
static fo_close_t vn_closefile;
static fo_mmap_t vn_mmap;
static fo_fallocate_t vn_fallocate;
+static fo_fspacectl_t vn_fspacectl;
struct fileops vnops = {
.fo_read = vn_io_fault,
@@ -123,6 +124,7 @@
.fo_fill_kinfo = vn_fill_kinfo,
.fo_mmap = vn_mmap,
.fo_fallocate = vn_fallocate,
+ .fo_fspacectl = vn_fspacectl,
.fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE
};
@@ -2345,7 +2347,8 @@
}
int
-vn_bmap_seekhole(struct vnode *vp, u_long cmd, off_t *off, struct ucred *cred)
+vn_bmap_seekhole_locked(struct vnode *vp, u_long cmd, off_t *off,
+ struct ucred *cred)
{
struct vattr va;
daddr_t bn, bnp;
@@ -2353,22 +2356,17 @@
off_t noff;
int error;
- KASSERT(cmd == FIOSEEKHOLE || cmd == FIOSEEKDATA,
- ("Wrong command %lu", cmd));
-
- if (vn_lock(vp, LK_SHARED) != 0)
- return (EBADF);
if (vp->v_type != VREG) {
error = ENOTTY;
- goto unlock;
+ goto out;
}
error = VOP_GETATTR(vp, &va, cred);
if (error != 0)
- goto unlock;
+ goto out;
noff = *off;
if (noff >= va.va_size) {
error = ENXIO;
- goto unlock;
+ goto out;
}
bsize = vp->v_mount->mnt_stat.f_iosize;
for (bn = noff / bsize; noff < va.va_size; bn++, noff += bsize -
@@ -2376,14 +2374,14 @@
error = VOP_BMAP(vp, bn, NULL, &bnp, NULL, NULL);
if (error == EOPNOTSUPP) {
error = ENOTTY;
- goto unlock;
+ goto out;
}
if ((bnp == -1 && cmd == FIOSEEKHOLE) ||
(bnp != -1 && cmd == FIOSEEKDATA)) {
noff = bn * bsize;
if (noff < *off)
noff = *off;
- goto unlock;
+ goto out;
}
}
if (noff > va.va_size)
@@ -2391,13 +2389,28 @@
/* noff == va.va_size. There is an implicit hole at the end of file. */
if (cmd == FIOSEEKDATA)
error = ENXIO;
-unlock:
+out:
VOP_UNLOCK(vp);
if (error == 0)
*off = noff;
return (error);
}
+int
+vn_bmap_seekhole(struct vnode *vp, u_long cmd, off_t *off, struct ucred *cred)
+{
+ int error;
+
+ KASSERT(cmd == FIOSEEKHOLE || cmd == FIOSEEKDATA,
+ ("Wrong command %lu", cmd));
+
+ if (vn_lock(vp, LK_SHARED) != 0)
+ return (EBADF);
+ error = vn_bmap_seekhole_locked(vp, cmd, off, cred);
+ VOP_UNLOCK(vp);
+ return (error);
+}
+
int
vn_seek(struct file *fp, off_t offset, int whence, struct thread *td)
{
@@ -3339,6 +3352,96 @@
return (error);
}
+int
+vn_deallocate(struct vnode *vp, off_t off, off_t resid, int flags, int ioflg,
+ struct ucred *active_cred, struct thread *td)
+{
+ struct mount *mp;
+ void *rl_cookie;
+ int lock_flags;
+ int error;
+ off_t olen;
+
+ do {
+ rl_cookie = NULL;
+ mp = NULL;
+ olen = resid;
+
+ if ((ioflg & IO_NODELOCKED) == 0) {
+ if ((ioflg & IO_RANGELOCKED) == 0) {
+ rl_cookie = vn_rangelock_wlock(vp, off,
+ off + resid);
+ if ((error = vn_start_write(vp, &mp,
+ V_WAIT | PCATCH)) != 0) {
+ vn_rangelock_unlock(vp, rl_cookie);
+ break;
+ }
+ }
+
+ if (MNT_SHARED_WRITES(mp) ||
+ (mp == NULL && MNT_SHARED_WRITES(vp->v_mount))) {
+ lock_flags = LK_SHARED;
+ } else {
+ lock_flags = LK_EXCLUSIVE;
+ }
+ vn_lock(vp, lock_flags | LK_RETRY);
+ }
+
+ error = VOP_DEALLOCATE(vp, &off, &resid, flags, active_cred);
+ VOP_UNLOCK(vp);
+ vn_finished_write(mp);
+ if (rl_cookie != NULL) {
+ vn_rangelock_unlock(vp, rl_cookie);
+ rl_cookie = NULL;
+ }
+
+ KASSERT(olen != resid || error,
+ ("Iteration did not make progress?"));
+ if (error == 0 && resid && (flags & SPACECTL_F_ATOMIC))
+ panic("SPACECTL_F_ATOMIC was not obeyed?");
+ maybe_yield();
+ } while (error == 0 && resid);
+ return (error);
+}
+
+static int
+vn_fspacectl(struct file *fp, int cmd, off_t offset, off_t len, int flags,
+ struct ucred *active_cred, struct thread *td)
+{
+ int error;
+ struct vnode *vp;
+ off_t off, resid;
+
+ vp = fp->f_vnode;
+ off = offset;
+ resid = len;
+
+ if ((cmd != SPACECTL_DEALLOC) || (offset < 0 || resid < 0) ||
+ (flags & ~SPACECTL_F_SUPPORTED))
+ return (EINVAL);
+ if (vp->v_type != VREG)
+ return (ENODEV);
+
+ /* Take the maximum range if end offset overflows */
+ if (resid > OFF_MAX - off)
+ resid = OFF_MAX - off;
+ if (resid == 0) {
+ /* Degenerated case */
+ return (0);
+ }
+
+ switch (cmd) {
+ case SPACECTL_DEALLOC:
+ error = vn_deallocate(vp, off, resid, flags, 0, active_cred,
+ td);
+ break;
+ default:
+ panic("vn_fspacectl: unknown cmd %d", cmd);
+ }
+
+ return (error);
+}
+
static u_long vn_lock_pair_pause_cnt;
SYSCTL_ULONG(_debug, OID_AUTO, vn_lock_pair_pause, CTLFLAG_RD,
&vn_lock_pair_pause_cnt, 0,
Index: sys/kern/vnode_if.src
===================================================================
--- sys/kern/vnode_if.src
+++ sys/kern/vnode_if.src
@@ -792,6 +792,17 @@
};
+%% deallocate vp L L L
+
+vop_deallocate {
+ IN struct vnode *vp;
+ INOUT off_t *offset;
+ INOUT off_t *len;
+ IN int flags;
+ IN struct ucred *cred;
+};
+
+
# The VOPs below are spares at the end of the table to allow new VOPs to be
# added in stable branches without breaking the KBI. New VOPs in HEAD should
# be added above these spares. When merging a new VOP to a stable branch,
Index: sys/security/audit/audit_bsm.c
===================================================================
--- sys/security/audit/audit_bsm.c
+++ sys/security/audit/audit_bsm.c
@@ -1076,6 +1076,18 @@
FD_VNODE1_TOKENS;
break;
+ case AUE_FSPACECTL:
+ if (ARG_IS_VALID(kar, ARG_CMD)) {
+ tok = au_to_arg32(2, "operation", ar->ar_arg_cmd);
+ kau_write(rec, tok);
+ }
+ if (ARG_IS_VALID(kar, ARG_FFLAGS)) {
+ tok = au_to_arg32(4, "flags", ar->ar_arg_fflags);
+ kau_write(rec, tok);
+ }
+ FD_VNODE1_TOKENS;
+ break;
+
case AUE_RFORK:
if (ARG_IS_VALID(kar, ARG_FFLAGS)) {
tok = au_to_arg32(1, "flags", ar->ar_arg_fflags);
Index: sys/sys/fcntl.h
===================================================================
--- sys/sys/fcntl.h
+++ sys/sys/fcntl.h
@@ -314,6 +314,14 @@
short l_type; /* lock type: read/write, etc. */
short l_whence; /* type of l_start */
};
+
+/*
+ * Space control offset/length description
+ */
+struct spacectl_range {
+ off_t r_offset; /* starting offset */
+ off_t r_resid; /* length */
+};
#endif
#if __BSD_VISIBLE
@@ -343,6 +351,20 @@
* similar syscalls.
*/
#define FD_NONE -200
+
+/*
+ * Commands for fspacectl(2)
+ */
+#define SPACECTL_ALLOC 0 /* allocate space */
+#define SPACECTL_DEALLOC 1 /* deallocate space */
+
+/*
+ * fspacectl(2) flags
+ */
+#define SPACECTL_F_ATOMIC 0x01 /* the operation is atomic with respect
+ to other FS operations if operation
+ range overlaps */
+#define SPACECTL_F_SUPPORTED (SPACECTL_F_ATOMIC)
#endif
#ifndef _KERNEL
@@ -360,6 +382,9 @@
int posix_fadvise(int, off_t, off_t, int);
int posix_fallocate(int, off_t, off_t);
#endif
+#if __BSD_VISIBLE
+int fspacectl(int, int, struct spacectl_range *, int);
+#endif
__END_DECLS
#endif
Index: sys/sys/file.h
===================================================================
--- sys/sys/file.h
+++ sys/sys/file.h
@@ -35,6 +35,7 @@
#ifndef _SYS_FILE_H_
#define _SYS_FILE_H_
+#include "sys/fcntl.h"
#ifndef _KERNEL
#include <sys/types.h> /* XXX */
#include <sys/fcntl.h>
@@ -129,6 +130,9 @@
typedef int fo_get_seals_t(struct file *fp, int *flags);
typedef int fo_fallocate_t(struct file *fp, off_t offset, off_t len,
struct thread *td);
+typedef int fo_fspacectl_t(struct file *fp, int cmd,
+ off_t offset, off_t len, int flags,
+ struct ucred *active_cred, struct thread *td);
typedef int fo_flags_t;
struct fileops {
@@ -150,6 +154,7 @@
fo_add_seals_t *fo_add_seals;
fo_get_seals_t *fo_get_seals;
fo_fallocate_t *fo_fallocate;
+ fo_fspacectl_t *fo_fspacectl;
fo_flags_t fo_flags; /* DFLAG_* below */
};
@@ -470,6 +475,17 @@
return ((*fp->f_ops->fo_fallocate)(fp, offset, len, td));
}
+static __inline int fo_fspacectl(struct file *fp, int cmd, off_t offset,
+ off_t len, int flags, struct ucred *active_cred, struct thread *td)
+{
+
+ if (fp->f_ops->fo_fspacectl == NULL)
+ return (ENODEV);
+ return ((*fp->f_ops->fo_fspacectl)(fp, cmd, offset, len, flags,
+ active_cred, td));
+}
+
+
#endif /* _KERNEL */
#endif /* !SYS_FILE_H */
Index: sys/sys/syscall.h
===================================================================
--- sys/sys/syscall.h
+++ sys/sys/syscall.h
@@ -515,4 +515,5 @@
#define SYS___specialfd 577
#define SYS_aio_writev 578
#define SYS_aio_readv 579
-#define SYS_MAXSYSCALL 580
+#define SYS_fspacectl 580
+#define SYS_MAXSYSCALL 581
Index: sys/sys/syscall.mk
===================================================================
--- sys/sys/syscall.mk
+++ sys/sys/syscall.mk
@@ -420,4 +420,5 @@
rpctls_syscall.o \
__specialfd.o \
aio_writev.o \
- aio_readv.o
+ aio_readv.o \
+ fspacectl.o
Index: sys/sys/syscallsubr.h
===================================================================
--- sys/sys/syscallsubr.h
+++ sys/sys/syscallsubr.h
@@ -59,6 +59,7 @@
struct sched_param;
union semun;
struct sockaddr;
+struct spacectl_range;
struct stat;
struct thr_param;
struct timex;
@@ -230,6 +231,10 @@
int advice);
int kern_posix_fallocate(struct thread *td, int fd, off_t offset,
off_t len);
+int kern_fdeallocate(struct thread *td, int fd, off_t offset, off_t len);
+int kern_fzerorange(struct thread *td, int fd, off_t offset, off_t len);
+int kern_fspacectl(struct thread *td, int fd, int cmd,
+ struct spacectl_range *, int flags);
int kern_procctl(struct thread *td, enum idtype idtype, id_t id, int com,
void *data);
int kern_pread(struct thread *td, int fd, void *buf, size_t nbyte,
Index: sys/sys/sysproto.h
===================================================================
--- sys/sys/sysproto.h
+++ sys/sys/sysproto.h
@@ -1847,6 +1847,12 @@
struct aio_readv_args {
char aiocbp_l_[PADL_(struct aiocb *)]; struct aiocb * aiocbp; char aiocbp_r_[PADR_(struct aiocb *)];
};
+struct fspacectl_args {
+ char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
+ char cmd_l_[PADL_(int)]; int cmd; char cmd_r_[PADR_(int)];
+ char range_l_[PADL_(struct spacectl_range *)]; struct spacectl_range * range; char range_r_[PADR_(struct spacectl_range *)];
+ char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
+};
int nosys(struct thread *, struct nosys_args *);
void sys_sys_exit(struct thread *, struct sys_exit_args *);
int sys_fork(struct thread *, struct fork_args *);
@@ -2241,6 +2247,7 @@
int sys___specialfd(struct thread *, struct __specialfd_args *);
int sys_aio_writev(struct thread *, struct aio_writev_args *);
int sys_aio_readv(struct thread *, struct aio_readv_args *);
+int sys_fspacectl(struct thread *, struct fspacectl_args *);
#ifdef COMPAT_43
@@ -3175,6 +3182,7 @@
#define SYS_AUE___specialfd AUE_SPECIALFD
#define SYS_AUE_aio_writev AUE_AIO_WRITEV
#define SYS_AUE_aio_readv AUE_AIO_READV
+#define SYS_AUE_fspacectl AUE_FSPACECTL
#undef PAD_
#undef PADL_
Index: sys/sys/unistd.h
===================================================================
--- sys/sys/unistd.h
+++ sys/sys/unistd.h
@@ -156,6 +156,7 @@
#define _PC_INF_PRESENT 62
#define _PC_MAC_PRESENT 63
#define _PC_ACL_NFS4 64
+#define _PC_FDEALLOC_PRESENT 65
#endif
/* From OpenSolaris, used by SEEK_DATA/SEEK_HOLE. */
Index: sys/sys/vnode.h
===================================================================
--- sys/sys/vnode.h
+++ sys/sys/vnode.h
@@ -719,6 +719,9 @@
void vn_printf(struct vnode *vp, const char *fmt, ...) __printflike(2,3);
int vrecycle(struct vnode *vp);
int vrecyclel(struct vnode *vp);
+/* vn_bmap_seekhole_locked is not public KPI */
+int vn_bmap_seekhole_locked(struct vnode *vp, u_long cmd, off_t *off,
+ struct ucred *cred);
int vn_bmap_seekhole(struct vnode *vp, u_long cmd, off_t *off,
struct ucred *cred);
int vn_close(struct vnode *vp,
@@ -727,6 +730,8 @@
struct vnode *outvp, off_t *outoffp, size_t *lenp,
unsigned int flags, struct ucred *incred, struct ucred *outcred,
struct thread *fsize_td);
+int vn_deallocate(struct vnode *vp, off_t off, off_t resid, int flags,
+ int ioflg, struct ucred *active_cred, struct thread *td);
void vn_finished_write(struct mount *mp);
void vn_finished_secondary_write(struct mount *mp);
int vn_fsync_buf(struct vnode *vp, int waitfor);

File Metadata

Mime Type
text/plain
Expires
Mon, Mar 10, 7:46 AM (6 h, 45 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
17081492
Default Alt Text
D28347.id83416.diff (31 KB)

Event Timeline