Page MenuHomeFreeBSD

D20584.id58464.diff
No OneTemporary

D20584.id58464.diff

Index: include/unistd.h
===================================================================
--- include/unistd.h
+++ include/unistd.h
@@ -494,6 +494,7 @@
int acct(const char *);
int async_daemon(void);
int check_utility_compat(const char *);
+ssize_t copy_file_range(int, off_t *, int, off_t *, size_t, u_int);
const char *
crypt_get_format(void);
char *crypt_r(const char *, const char *, struct crypt_data *);
Index: lib/libc/sys/Makefile.inc
===================================================================
--- lib/libc/sys/Makefile.inc
+++ lib/libc/sys/Makefile.inc
@@ -175,6 +175,7 @@
closefrom.2 \
connect.2 \
connectat.2 \
+ copy_file_range.2 \
cpuset.2 \
cpuset_getaffinity.2 \
cpuset_getdomain.2 \
Index: lib/libc/sys/Symbol.map
===================================================================
--- lib/libc/sys/Symbol.map
+++ lib/libc/sys/Symbol.map
@@ -402,6 +402,7 @@
};
FBSD_1.6 {
+ copy_file_range;
fhlink;
fhlinkat;
fhreadlink;
Index: lib/libc/sys/copy_file_range.2
===================================================================
--- lib/libc/sys/copy_file_range.2
+++ lib/libc/sys/copy_file_range.2
@@ -0,0 +1,144 @@
+.\" SPDX-License-Identifier: BSD-2-Clause
+.\"
+.\" Copyright (c) 2019 Rick Macklem
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd June 9, 2019
+.Dt COPY_FILE_RANGE 2
+.Os
+.Sh NAME
+.Nm copy_file_range
+.Nd kernel copy of a byte range from one file to another
+.Sh LIBRARY
+.Lb libc
+.Sh SYNOPSIS
+.In sys/types.h
+.In unistd.h
+.Ft ssize_t
+.Fn copy_file_range "int infd" "off_t *inoffp" "int outfd" "off_t *outoffp" "size_t len" "u_int flags"
+.Fc
+.Sh DESCRIPTION
+The
+.Fn copy_file_range
+system call
+copies
+.Fa len
+bytes from
+.Fa infd
+to
+.Fa outfd
+in the kernel.
+It may do this using a file system specific technique if
+.Fa infd
+and
+.Fa outfd
+are on the same file system.
+The
+.Fa infd
+argument must be opened for reading and the
+.Fa outfd
+argument must be opened for writing, but not O_APPEND.
+If
+.Fa inoffp
+or
+.Fa outoffp
+is NULL, the file offset for
+.Fa infd
+or
+.Fa outfd
+respectively will be used and updated by
+the number of bytes copied.
+If
+.Fa inoffp
+or
+.Fa outoffp
+is not NULL, the byte offset pointed to by
+.Fa inoffp
+or
+.Fa outoffp
+respectively will be used/updated and the file offset for
+.Fa infd
+or
+.Fa outfd
+respectively will not be affected.
+The
+.Fa flags
+argument is currently ignored and should be set to 0.
+.Pp
+.Sh RETURN VALUES
+If it succeeds, the call returns the number of bytes copied, which can be less
+than
+.Fa len .
+.Fn copy_file_range
+should be used in a loop until copying of the desired byte range has been
+completed.
+If an error has occurred, a \-1 is returned and the error code is placed in
+the global variable
+.Va errno .
+.Sh ERRORS
+The
+.Fn copy_file_range
+system call
+will fail if:
+.Bl -tag -width Er
+.It Bq Er EBADF
+If
+.Fa
+infd
+is not open for reading or
+.Fa
+outfd
+is not open for writing, or opened for writing with O_APPEND.
+.It Bq Er EFBIG
+If the copy exceeds the process's file size limit or the maximum file size
+for the file system
+.Fa outfd
+resides on.
+.It Bq Er EINVAL
+If the initial offset for
+.Fa infd
+plus
+.Fa len
+exceeds EOF for
+.Fa infd .
+.It Bq Er EIO
+An I/O error occurred while reading/writing the files.
+.It Bq Er EISDIR
+If either
+.Fa infd
+or
+.Fa outfd
+refers to a directory.
+.El
+.Sh STANDARDS
+The
+.Fn copy_file_range
+system call is expected to be compatible with the Linux system call of
+the same name.
+.Sh HISTORY
+The
+.Fn copy_file_range
+function appeared in
+.Fx 13.0 .
Index: sys/kern/syscalls.master
===================================================================
--- sys/kern/syscalls.master
+++ sys/kern/syscalls.master
@@ -3175,6 +3175,16 @@
int flag
);
}
+569 AUE_NULL STD {
+ ssize_t copy_file_range(
+ _In_ int infd,
+ _Inout_opt_ off_t *inoffp,
+ _In_ int outfd,
+ _Inout_opt_ off_t *outoffp,
+ _In_ size_t len,
+ _In_ u_int flags
+ );
+ }
; Please copy any additions and changes to the following compatability tables:
; sys/compat/freebsd32/syscalls.master
Index: sys/kern/vfs_default.c
===================================================================
--- sys/kern/vfs_default.c
+++ sys/kern/vfs_default.c
@@ -140,6 +140,7 @@
.vop_set_text = vop_stdset_text,
.vop_unset_text = vop_stdunset_text,
.vop_add_writecount = vop_stdadd_writecount,
+ .vop_copy_file_range = VOP_EOPNOTSUPP,
};
/*
Index: sys/kern/vfs_syscalls.c
===================================================================
--- sys/kern/vfs_syscalls.c
+++ sys/kern/vfs_syscalls.c
@@ -4814,3 +4814,94 @@
uap->advice);
return (kern_posix_error(td, error));
}
+
+int
+kern_copy_file_range(struct thread *td, int infd, off_t *inoffp, int outfd,
+ off_t *outoffp, size_t len, u_int flags)
+{
+ struct file *infp, *outfp;
+ struct vnode *invp, *outvp;
+ int error, lock_flags;
+ size_t retlen;
+
+ infp = outfp = NULL;
+ retlen = 0;
+
+ /* Get the file structures for the file descriptors. */
+ error = fget_read(td, infd, &cap_read_rights, &infp);
+ if (error != 0)
+ goto out;
+ error = fget_write(td, outfd, &cap_write_rights, &outfp);
+ if (error != 0)
+ goto out;
+
+ /* Set the offset pointers to the correct place. */
+ if (inoffp == NULL)
+ inoffp = &infp->f_offset;
+ if (outoffp == NULL)
+ outoffp = &outfp->f_offset;
+
+ /* Sanity check the f_flag bits. */
+ if ((outfp->f_flag & (FWRITE | FAPPEND)) != FWRITE ||
+ (infp->f_flag & FREAD) == 0) {
+ error = EBADF;
+ goto out;
+ }
+
+ /* Lock the vnodes. */
+ invp = infp->f_vnode;
+ error = vn_lock(invp, LK_SHARED);
+ if (error != 0)
+ goto out;
+ outvp = outfp->f_vnode;
+ if (MNT_SHARED_WRITES(outvp->v_mount))
+ lock_flags = LK_SHARED;
+ else
+ lock_flags = LK_EXCLUSIVE;
+ error = vn_lock(outvp, lock_flags);
+ if (error != 0) {
+ VOP_UNLOCK(outvp, 0);
+ goto out;
+ }
+
+ retlen = len;
+ error = vn_copy_file_range(invp, inoffp, outvp, outoffp, &retlen,
+ flags);
+ VOP_UNLOCK(invp, 0);
+ VOP_UNLOCK(outvp, 0);
+out:
+ if (infp != NULL)
+ fdrop(infp, td);
+ if (outfp != NULL)
+ fdrop(outfp, td);
+ td->td_retval[0] = retlen;
+ return (error);
+}
+
+int
+sys_copy_file_range(struct thread *td, struct copy_file_range_args *uap)
+{
+ off_t inoff, outoff, *inoffp, *outoffp;
+ int error;
+
+ inoffp = outoffp = NULL;
+ if (uap->inoffp != NULL) {
+ error = copyin(uap->inoffp, &inoff, sizeof(off_t));
+ if (error != 0)
+ return (error);
+ inoffp = &inoff;
+ }
+ if (uap->outoffp != NULL) {
+ error = copyin(uap->outoffp, &outoff, sizeof(off_t));
+ if (error != 0)
+ return (error);
+ outoffp = &outoff;
+ }
+ error = kern_copy_file_range(td, uap->infd, inoffp, uap->outfd,
+ outoffp, uap->len, uap->flags);
+ if (error == 0 && uap->inoffp != NULL)
+ error = copyout(inoffp, uap->inoffp, sizeof(off_t));
+ if (error == 0 && uap->outoffp != NULL)
+ error = copyout(outoffp, uap->outoffp, sizeof(off_t));
+ return (error);
+}
Index: sys/kern/vfs_vnops.c
===================================================================
--- sys/kern/vfs_vnops.c
+++ sys/kern/vfs_vnops.c
@@ -61,6 +61,7 @@
#include <sys/limits.h>
#include <sys/lock.h>
#include <sys/mman.h>
+#include <sys/malloc.h>
#include <sys/mount.h>
#include <sys/mutex.h>
#include <sys/namei.h>
@@ -2494,6 +2495,134 @@
va->va_fsid += (uint32_t)f->val[0];
}
+/* Malloc a zero'd block to compare with the data block read in. */
+static char *copyfilerange_zerodat = NULL;
+static u_long copyfilerange_zerosize = 0;
+
+int
+vn_copy_file_range(struct vnode *invp, off_t *inoffp, struct vnode *outvp,
+ off_t *outoffp, size_t *lenp, u_int flags)
+{
+ struct statfs *sfp;
+ struct vattr va;
+ struct mount *mp;
+ u_long blksize;
+ int error, xfer;
+ ssize_t aresid;
+ size_t len;
+ char *dat;
+ struct thread *td = curthread;
+
+ len = *lenp;
+ error = 0;
+ mp = NULL;
+ /* Do some sanity checks on the arguments. */
+ if (invp->v_type == VDIR || outvp->v_type == VDIR)
+ error = EISDIR;
+ else if (*inoffp < 0 || (*inoffp + len) < *inoffp || *outoffp < 0 ||
+ (*outoffp + len) < *outoffp || invp->v_type != VREG ||
+ outvp->v_type != VREG)
+ error = EINVAL;
+ /* Check that the offset + len does not go past EOF of invp. */
+ if (error == 0)
+ error = VOP_GETATTR(invp, &va, curthread->td_ucred);
+ if (error == 0 && va.va_size < (*inoffp + len))
+ error = EINVAL;
+ if (error != 0) {
+ *lenp = 0;
+ return (error);
+ }
+
+ /*
+ * If the two vnodes are for the same file system, try the
+ * VOP_COPY_FILE_RANGE() call first and do it here if the VOP
+ * call fails.
+ */
+ if (invp->v_mount == outvp->v_mount) {
+ error = VOP_COPY_FILE_RANGE(invp, inoffp, outvp, outoffp,
+ lenp, flags);
+ if (error == 0)
+ return (error);
+ }
+
+ /*
+ * Copy blocks of the size preferred by the input file, with a
+ * minimum of 16Kbytes and a maximum of 1Mbytes.
+ */
+ sfp = malloc(sizeof(*sfp), M_STATFS, M_WAITOK);
+ error = VFS_STATFS(invp->v_mount, sfp);
+ if (error != 0) {
+ free(sfp, M_STATFS);
+ *lenp = 0;
+ return (error);
+ }
+ if (sfp->f_iosize < 16384)
+ blksize = 16384;
+ else if (sfp->f_iosize > 1048576)
+ blksize = 1048576;
+ else
+ blksize = sfp->f_iosize;
+ free(sfp, M_STATFS);
+
+ /* Start write for outvp. */
+ error = vn_start_write(outvp, &mp, V_WAIT | PCATCH);
+ if (error != 0) {
+ *lenp = 0;
+ return (error);
+ }
+
+ dat = malloc(blksize, M_TEMP, M_WAITOK);
+ /*
+ * It would be nice to use VOP_IOCTL() to find holes, but that
+ * requires that invp be unlocked/relocked for each block read.
+ * I am not sure we want to do that here, since it would open
+ * up a window where another thread could write to the file while
+ * the copy is in progress.
+ * In the meantime, just scan for a read block of all 0s.
+ */
+ if (copyfilerange_zerosize < blksize) {
+ free(copyfilerange_zerodat, M_TEMP);
+ copyfilerange_zerosize = blksize;
+ copyfilerange_zerodat = malloc(copyfilerange_zerosize, M_TEMP,
+ M_WAITOK | M_ZERO);
+ }
+ while (error == 0 && len > 0) {
+ if (len > blksize)
+ xfer = blksize;
+ else
+ xfer = len;
+ error = vn_rdwr(UIO_READ, invp, dat, xfer, *inoffp,
+ UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NULL, &aresid,
+ td);
+ /* Linux considers a range that exceeds EOF to be an error. */
+ if (error == 0 && aresid > 0)
+ error = EINVAL;
+ if (error == 0) {
+ /* Skip the write for holes. */
+ if (memcmp(dat, copyfilerange_zerodat, xfer) != 0)
+ error = vn_rdwr(UIO_WRITE, outvp, dat, xfer,
+ *outoffp, UIO_SYSSPACE, IO_NODELOCKED,
+ td->td_ucred, NULL, NULL, td);
+ else if (xfer == len) {
+ /* Hole at EOF. */
+ VATTR_NULL(&va);
+ va.va_size = *outoffp + len;
+ error = VOP_SETATTR(outvp, &va, td->td_ucred);
+ }
+ if (error == 0) {
+ *inoffp += xfer;
+ *outoffp += xfer;
+ len -= xfer;
+ }
+ }
+ }
+ *lenp -= len;
+ if (mp != NULL)
+ vn_finished_write(mp);
+ free(dat, M_TEMP);
+ return (error);
+}
+
int
vn_fsync_buf(struct vnode *vp, int waitfor)
{
Index: sys/kern/vnode_if.src
===================================================================
--- sys/kern/vnode_if.src
+++ sys/kern/vnode_if.src
@@ -718,6 +718,19 @@
};
+%% copy_file_range invp L L L
+%% copy_file_range outvp L L L
+
+vop_copy_file_range {
+ IN struct vnode *invp;
+ INOUT off_t *inoffp;
+ IN struct vnode *outvp;
+ INOUT off_t *outoffp;
+ INOUT size_t *lenp;
+ IN u_int flags;
+};
+
+
# The VOPs below are spares at the end of the table to allow new VOPs to be
# added in stable branches without breaking the KBI. New VOPs in HEAD should
# be added above these spares. When merging a new VOP to a stable branch,
Index: sys/sys/syscallsubr.h
===================================================================
--- sys/sys/syscallsubr.h
+++ sys/sys/syscallsubr.h
@@ -94,6 +94,8 @@
int kern_close(struct thread *td, int fd);
int kern_connectat(struct thread *td, int dirfd, int fd,
struct sockaddr *sa);
+int kern_copy_file_range(struct thread *td, int infd, off_t *inoffp,
+ int outfd, off_t *outoffp, size_t len, u_int flags);
int kern_cpuset_getaffinity(struct thread *td, cpulevel_t level,
cpuwhich_t which, id_t id, size_t cpusetsize, cpuset_t *maskp);
int kern_cpuset_setaffinity(struct thread *td, cpulevel_t level,
Index: sys/sys/vnode.h
===================================================================
--- sys/sys/vnode.h
+++ sys/sys/vnode.h
@@ -667,6 +667,8 @@
struct ucred *cred);
int vn_close(struct vnode *vp,
int flags, struct ucred *file_cred, struct thread *td);
+int vn_copy_file_range(struct vnode *invp, off_t *inoffp,
+ struct vnode *outvp, off_t *outoffp, size_t *lenp, u_int flags);
void vn_finished_write(struct mount *mp);
void vn_finished_secondary_write(struct mount *mp);
int vn_fsync_buf(struct vnode *vp, int waitfor);

File Metadata

Mime Type
text/plain
Expires
Sun, Oct 19, 2:32 PM (10 h, 51 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
23928802
Default Alt Text
D20584.id58464.diff (13 KB)

Event Timeline