Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F142241373
D20584.id58884.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
26 KB
Referenced Files
None
Subscribers
None
D20584.id58884.diff
View Options
Index: include/unistd.h
===================================================================
--- include/unistd.h
+++ include/unistd.h
@@ -494,6 +494,7 @@
int acct(const char *);
int async_daemon(void);
int check_utility_compat(const char *);
+ssize_t copy_file_range(int, off_t *, int, off_t *, size_t, unsigned int);
const char *
crypt_get_format(void);
char *crypt_r(const char *, const char *, struct crypt_data *);
Index: lib/libc/sys/Makefile.inc
===================================================================
--- lib/libc/sys/Makefile.inc
+++ lib/libc/sys/Makefile.inc
@@ -175,6 +175,7 @@
closefrom.2 \
connect.2 \
connectat.2 \
+ copy_file_range.2 \
cpuset.2 \
cpuset_getaffinity.2 \
cpuset_getdomain.2 \
Index: lib/libc/sys/Symbol.map
===================================================================
--- lib/libc/sys/Symbol.map
+++ lib/libc/sys/Symbol.map
@@ -402,6 +402,7 @@
};
FBSD_1.6 {
+ copy_file_range;
fhlink;
fhlinkat;
fhreadlink;
Index: lib/libc/sys/copy_file_range.2
===================================================================
--- lib/libc/sys/copy_file_range.2
+++ lib/libc/sys/copy_file_range.2
@@ -0,0 +1,155 @@
+.\" SPDX-License-Identifier: BSD-2-Clause
+.\"
+.\" Copyright (c) 2019 Rick Macklem
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd June 9, 2019
+.Dt COPY_FILE_RANGE 2
+.Os
+.Sh NAME
+.Nm copy_file_range
+.Nd kernel copy of a byte range from one file to another
+.Sh LIBRARY
+.Lb libc
+.Sh SYNOPSIS
+.In sys/types.h
+.In unistd.h
+.Ft ssize_t
+.Fn copy_file_range "int infd" "off_t *inoffp" "int outfd" "off_t *outoffp" "size_t len" "unsigned int flags"
+.Fc
+.Sh DESCRIPTION
+The
+.Fn copy_file_range
+system call
+copies
+.Fa len
+bytes from
+.Fa infd
+to
+.Fa outfd
+in the kernel.
+It may do this using a file system specific technique if
+.Fa infd
+and
+.Fa outfd
+are on the same file system.
+The
+.Fa infd
+argument must be opened for reading and the
+.Fa outfd
+argument must be opened for writing, but not
+.Dv O_APPEND .
+If
+.Fa inoffp
+or
+.Fa outoffp
+is
+.Dv NULL ,
+the file offset for
+.Fa infd
+or
+.Fa outfd
+respectively will be used and updated by
+the number of bytes copied.
+If
+.Fa inoffp
+or
+.Fa outoffp
+is not
+.Dv NULL ,
+the byte offset pointed to by
+.Fa inoffp
+or
+.Fa outoffp
+respectively will be used/updated and the file offset for
+.Fa infd
+or
+.Fa outfd
+respectively will not be affected.
+The
+.Fa flags
+argument is currently ignored and should be set to 0.
+.Pp
+.Sh RETURN VALUES
+If it succeeds, the call returns the number of bytes copied, which can be less
+than
+.Fa len .
+.Fn copy_file_range
+should be used in a loop until copying of the desired byte range has been
+completed.
+If an error has occurred, a \-1 is returned and the error code is placed in
+the global variable
+.Va errno .
+.Sh ERRORS
+The
+.Fn copy_file_range
+system call
+will fail if:
+.Bl -tag -width Er
+.It Bq Er EBADF
+If
+.Fa
+infd
+is not open for reading or
+.Fa
+outfd
+is not open for writing, or opened for writing with
+.Dv O_APPEND ,
+or if
+.Fa infd
+and
+.Fa outfd
+refer to the same file.
+.It Bq Er EFBIG
+If the copy exceeds the process's file size limit or the maximum file size
+for the file system
+.Fa outfd
+resides on.
+.It Bq Er EINVAL
+If the initial offset for
+.Fa infd
+plus
+.Fa len
+exceeds EOF for
+.Fa infd .
+.It Bq Er EIO
+An I/O error occurred while reading/writing the files.
+.It Bq Er EISDIR
+If either
+.Fa infd
+or
+.Fa outfd
+refers to a directory.
+.El
+.Sh STANDARDS
+The
+.Fn copy_file_range
+system call is expected to be compatible with the Linux system call of
+the same name.
+.Sh HISTORY
+The
+.Fn copy_file_range
+function appeared in
+.Fx 13.0 .
Index: share/man/man9/Makefile
===================================================================
--- share/man/man9/Makefile
+++ share/man/man9/Makefile
@@ -400,6 +400,7 @@
VOP_ATTRIB.9 \
VOP_BMAP.9 \
VOP_BWRITE.9 \
+ VOP_COPY_FILE_RANGE.9 \
VOP_CREATE.9 \
VOP_FSYNC.9 \
VOP_GETACL.9 \
Index: share/man/man9/VOP_COPY_FILE_RANGE.9
===================================================================
--- share/man/man9/VOP_COPY_FILE_RANGE.9
+++ share/man/man9/VOP_COPY_FILE_RANGE.9
@@ -0,0 +1,100 @@
+.\" SPDX-License-Identifier: BSD-2-Clause
+.\"
+.\" Copyright (c) 2019 Rick Macklem
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd June 20, 2019
+.Dt VOP_COPY_FILE_RANGE 9
+.Os
+.Sh NAME
+.Nm VOP_COPY_FILE_RANGE
+.Nd copy a byte range from one regular file to another within a file system
+.Sh SYNOPSIS
+.In sys/param.h
+.In sys/vnode.h
+.Ft int
+.Fn VOP_COPY_FILE_RANGE "struct vnode *invp" "off_t *inoff" "struct vnode *outvp" "off_t *outoff" "size_t *len" "unsigned int flags"
+.Sh DESCRIPTION
+This entry point copies a byte range from one regular file to another within a
+file system.
+.Pp
+The arguments are:
+.Bl -tag -width ioflag
+.It Fa invp
+The vnode of the input file.
+.It Fa inoff
+A pointer to the file offset for the input file.
+.It Fa outvp
+The vnode of the output file.
+.It Fa outoff
+A pointer to the file offset for the output file.
+.It Fa len
+A pointer to the number of bytes to be copied.
+.It Fa flags
+Flags, should be set to 0 for now.
+.El
+.Pp
+The
+.Fa inoff
+and
+.Fa outoff
+arguments point to the locations of the file offsets.
+These file offsets should be updated by the number of bytes copied.
+The
+.Fa len
+argument points to the location that stores the number of bytes
+to be copied.
+It should be reduced by the number of bytes copied, which implies that
+the value pointed to by
+.Fa len
+will normally be zero for a non-error return.
+However, a copy of less bytes than requested is permitted.
+.Sh LOCKS
+The vnodes are unlocked when the call is done and should be unlocked
+when the call returns.
+The byte ranges for both
+.Fa invp
+and
+.Fa outvp
+should be range locked when this call is done.
+.Sh RETURN VALUES
+Zero is returned on success, otherwise an error code is returned.
+.Sh ERRORS
+.Bl -tag -width Er
+.It Bq Er EFBIG
+If the copy exceeds the process's file size limit or the maximum file size
+for the file system
+.Fa invp
+and
+.Fa outvp
+reside on.
+.It Bq Er EIO
+An I/O error occurred while reading/writing the files.
+.It Bq Er ENOSPC
+The file system is full.
+.El
+.Sh SEE ALSO
+.Xr vn_rdwr 9 ,
+.Xr vnode 9
Index: sys/kern/syscalls.master
===================================================================
--- sys/kern/syscalls.master
+++ sys/kern/syscalls.master
@@ -3175,6 +3175,16 @@
int flag
);
}
+569 AUE_NULL STD {
+ ssize_t copy_file_range(
+ int infd,
+ _Inout_opt_ off_t *inoffp,
+ int outfd,
+ _Inout_opt_ off_t *outoffp,
+ size_t len,
+ unsigned int flags
+ );
+ }
; Please copy any additions and changes to the following compatability tables:
; sys/compat/freebsd32/syscalls.master
Index: sys/kern/vfs_default.c
===================================================================
--- sys/kern/vfs_default.c
+++ sys/kern/vfs_default.c
@@ -83,6 +83,7 @@
static int vop_stdis_text(struct vop_is_text_args *ap);
static int vop_stdunset_text(struct vop_unset_text_args *ap);
static int vop_stdadd_writecount(struct vop_add_writecount_args *ap);
+static int vop_stdcopy_file_range(struct vop_copy_file_range_args *ap);
static int vop_stdfdatasync(struct vop_fdatasync_args *ap);
static int vop_stdgetpages_async(struct vop_getpages_async_args *ap);
@@ -140,6 +141,7 @@
.vop_set_text = vop_stdset_text,
.vop_unset_text = vop_stdunset_text,
.vop_add_writecount = vop_stdadd_writecount,
+ .vop_copy_file_range = vop_stdcopy_file_range,
};
/*
@@ -1206,6 +1208,16 @@
return (0);
}
+static int
+vop_stdcopy_file_range(struct vop_copy_file_range_args *ap)
+{
+ int error;
+
+ error = vn_generic_copy_file_range(ap->a_invp, ap->a_inoffp,
+ ap->a_outvp, ap->a_outoffp, ap->a_lenp, ap->a_flags);
+ return (error);
+}
+
int
vfs_stdvget (mp, ino, flags, vpp)
struct mount *mp;
Index: sys/kern/vfs_subr.c
===================================================================
--- sys/kern/vfs_subr.c
+++ sys/kern/vfs_subr.c
@@ -60,6 +60,7 @@
#include <sys/extattr.h>
#include <sys/file.h>
#include <sys/fcntl.h>
+#include <sys/filio.h>
#include <sys/jail.h>
#include <sys/kdb.h>
#include <sys/kernel.h>
@@ -80,6 +81,7 @@
#include <sys/stat.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
+#include <sys/unistd.h>
#include <sys/vmmeter.h>
#include <sys/vnode.h>
#include <sys/watchdog.h>
@@ -5657,3 +5659,292 @@
mtx_unlock(&mp->mnt_listmtx);
mnt_vnode_markerfree_active(mvp, mp);
}
+
+/*
+ * Test len bytes of data starting at addr for all bytes == 0.
+ * Return true if all bytes are zero, false otherwise.
+ * Expects dat to be well aligned.
+ */
+static bool
+mem_iszero(void *dat, int len)
+{
+ int i;
+ const u_int *p;
+ const char *cp;
+
+ for (p = (const u_int *)dat; len > 0; len -= sizeof(*p), p++) {
+ if (len >= sizeof(*p)) {
+ if (*p != 0)
+ return (false);
+ } else {
+ cp = (const char *)p;
+ for (i = 0; i < len; i++, cp++)
+ if (*cp != '\0')
+ return (false);
+ }
+ }
+ return (true);
+}
+
+/*
+ * Write an xfer sized chunk to outvp in blksize blocks from dat.
+ * dat is a maximum of blksize in length and can be written repeatedly in
+ * the chunk.
+ */
+static int
+vn_write_outvp(struct vnode *outvp, char *dat, off_t outoff, size_t xfer,
+ u_long blksize)
+{
+ struct mount *mp;
+ size_t xfer2;
+ int error, lckf;
+
+ mp = NULL;
+ error = vn_start_write(outvp, &mp, V_WAIT);
+ if (error == 0) {
+ if (MNT_SHARED_WRITES(mp))
+ lckf = LK_SHARED;
+ else
+ lckf = LK_EXCLUSIVE;
+ error = vn_lock(outvp, lckf);
+ }
+ if (error == 0) {
+ do {
+ xfer2 = xfer;
+ if (xfer2 > blksize)
+ xfer2 = blksize;
+ error = vn_rdwr(UIO_WRITE, outvp, dat, xfer2, outoff,
+ UIO_SYSSPACE, IO_NODELOCKED, curthread->td_ucred,
+ NULL, NULL, curthread);
+ outoff += xfer2;
+ xfer -= xfer2;
+ } while (xfer > 0 && error == 0);
+ VOP_UNLOCK(outvp, 0);
+ }
+ if (mp != NULL)
+ vn_finished_write(mp);
+ return (error);
+}
+
+/*
+ * Copy a byte range of one file to another. This function can handle the
+ * case where invp and outvp are on different file systems.
+ * It can also be called by a VOP_COPY_FILE_RANGE() to do the work, if there
+ * is not a better file system specific way to do it.
+ */
+int
+vn_generic_copy_file_range(struct vnode *invp, off_t *inoffp,
+ struct vnode *outvp, off_t *outoffp, size_t *lenp, unsigned int flags)
+{
+ struct vattr va;
+ struct mount *mp;
+ off_t startoff, endoff;
+ u_long blksize;
+ int error;
+ bool cantseek, readzeros;
+ ssize_t aresid;
+ size_t copylen, len, savlen, xfer, xfer2;
+ char *dat;
+ long holein, holeout;
+ struct thread *td = curthread;
+
+ savlen = len = *lenp;
+ error = 0;
+ dat = NULL;
+
+ error = vn_lock(invp, LK_SHARED);
+ if (error != 0)
+ goto out;
+ if (VOP_PATHCONF(invp, _PC_MIN_HOLE_SIZE, &holein) != 0)
+ holein = 0;
+ VOP_UNLOCK(invp, 0);
+ if (error != 0)
+ goto out;
+
+ mp = NULL;
+ error = vn_start_write(outvp, &mp, V_WAIT);
+ if (error == 0)
+ error = vn_lock(outvp, LK_EXCLUSIVE);
+ if (error == 0) {
+ if (VOP_PATHCONF(outvp, _PC_MIN_HOLE_SIZE, &holeout) != 0)
+ holeout = 0;
+ /*
+ * Holes that are past EOF do not need to be written as a block
+ * of zero bytes. So, truncate the output file as far as
+ * possible and then use va.va_size to decide if writing 0
+ * bytes is necessary in the loop below.
+ */
+ error = VOP_GETATTR(outvp, &va, td->td_ucred);
+ if (error == 0 && va.va_size > *outoffp && va.va_size <=
+ *outoffp + len) {
+ VATTR_NULL(&va);
+ va.va_size = *outoffp;
+ error = VOP_SETATTR(outvp, &va, td->td_ucred);
+ }
+ VOP_UNLOCK(outvp, 0);
+ }
+ if (mp != NULL)
+ vn_finished_write(mp);
+ if (error != 0)
+ goto out;
+
+ /*
+ * Set the blksize to the larger of the hole sizes for invp and outvp.
+ * If hole sizes aren't available, set the blksize to the larger
+ * f_iosize of invp and outvp.
+ * This code expects the hole sizes and f_iosizes to be powers of 2.
+ * This value is clipped at 4Kbytes and 1Mbyte.
+ */
+ if (holein > 0 && holeout > 0)
+ if (holein > holeout)
+ blksize = holein;
+ else
+ blksize = holeout;
+ else if (invp->v_mount->mnt_stat.f_iosize >
+ outvp->v_mount->mnt_stat.f_iosize)
+ blksize = invp->v_mount->mnt_stat.f_iosize;
+ else
+ blksize = outvp->v_mount->mnt_stat.f_iosize;
+ if (blksize < 4096)
+ blksize = 4096;
+ else if (blksize > 1048576)
+ blksize = 1048576;
+ dat = malloc(blksize, M_TEMP, M_WAITOK);
+
+ /*
+ * If VOP_IOCTL(FIOSEEKHOLE) works for invp, use it and FIOSEEKDATA
+ * to find holes. Otherwise, just scan the read block for all 0s
+ * in the inner loop where the data copying is done.
+ * Note that some file systems such as NFSv3, NFSv4.0 and NFSv4.1 may
+ * support holes on the server, but do not support FIOSEEKHOLE.
+ */
+ while (len > 0 && error == 0) {
+ endoff = 0; /* To shut up compilers. */
+ cantseek = true;
+ startoff = *inoffp;
+ copylen = len;
+
+ readzeros = false;
+ /*
+ * Find the next data area. If there is just a hole to EOF,
+ * FIOSEEKDATA should fail and then we drop down into the
+ * inner loop and create the hole on the outvp file.
+ * (I do not know if any file system will report a hole to
+ * EOF via FIOSEEKHOLE, but I am pretty sure FIOSEEKDATA
+ * will fail for those file systems.)
+ *
+ * For input files that don't support FIOSEEKDATA/FIOSEEKHOLE,
+ * the code just falls through to the inner copy loop.
+ */
+ error = EINVAL;
+ if (holein > 0)
+ error = VOP_IOCTL(invp, FIOSEEKDATA, &startoff, 0,
+ td->td_ucred, td);
+ if (error == 0) {
+ endoff = startoff;
+ error = VOP_IOCTL(invp, FIOSEEKHOLE, &endoff, 0,
+ td->td_ucred, td);
+ /*
+ * If the hole extends to the end of the byte range
+ * being copied, set error so that a copy of the
+ * last bytes (all 0s) will be done in the loop
+ * below, to ensure a hole is created in the output
+ * file.
+ * Also, set readzeros = true, since reads would return
+ * all 0 bytes.
+ */
+ if (startoff >= *inoffp + len) {
+ readzeros = true;
+ error = EINVAL;
+ }
+ }
+ if (error == 0) {
+ if (startoff > *inoffp) {
+ /* Found hole before data block. */
+ xfer = startoff - *inoffp;
+ if (*inoffp < va.va_size) {
+ /* Must write 0s to punch hole. */
+ xfer2 = va.va_size - *inoffp;
+ if (xfer2 > xfer)
+ xfer2 = xfer;
+ if (xfer2 > blksize)
+ memset(dat, 0, blksize);
+ else
+ memset(dat, 0, xfer2);
+ error = vn_write_outvp(outvp, dat,
+ *outoffp, xfer2, blksize);
+ }
+ if (error == 0) {
+ *inoffp += xfer;
+ *outoffp += xfer;
+ len -= xfer;
+ }
+ }
+ copylen = len;
+ if (copylen > endoff - startoff)
+ copylen = endoff - startoff;
+ cantseek = false;
+ } else {
+ cantseek = true;
+ startoff = *inoffp;
+ copylen = len;
+ error = 0;
+ }
+
+ xfer = blksize;
+ if (cantseek) {
+ /*
+ * Set first xfer to end at a block boundary, so that
+ * holes are more likely detected in the loop below via
+ * the for all bytes 0 method.
+ */
+ xfer -= (*inoffp % blksize);
+ }
+ if (readzeros)
+ memset(dat, 0, blksize);
+ /* Loop copying the data block. */
+ while (copylen > 0 && error == 0) {
+ if (copylen < xfer)
+ xfer = copylen;
+ if (!readzeros) {
+ error = vn_lock(invp, LK_SHARED);
+ if (error != 0)
+ goto out;
+ error = vn_rdwr(UIO_READ, invp, dat, xfer,
+ startoff, UIO_SYSSPACE, IO_NODELOCKED,
+ td->td_ucred, NULL, &aresid, td);
+ VOP_UNLOCK(invp, 0);
+ /*
+ * Linux considers a range that exceeds EOF to
+ * be an error, so we will too.
+ */
+ if (error == 0 && aresid > 0)
+ error = EINVAL;
+ }
+ if (error == 0) {
+ /*
+ * Skip the write for holes past the initial EOF
+ * of the output file, unless this is the last
+ * write of the output file at EOF.
+ */
+ if (!cantseek || *outoffp < va.va_size ||
+ xfer == len || !(readzeros ||
+ mem_iszero(dat, xfer)))
+ error = vn_write_outvp(outvp, dat,
+ *outoffp, xfer, blksize);
+ if (error == 0) {
+ *inoffp += xfer;
+ startoff += xfer;
+ *outoffp += xfer;
+ copylen -= xfer;
+ len -= xfer;
+ }
+ }
+ xfer = blksize;
+ }
+ }
+out:
+ *lenp = savlen - len;
+ free(dat, M_TEMP);
+ return (error);
+}
Index: sys/kern/vfs_syscalls.c
===================================================================
--- sys/kern/vfs_syscalls.c
+++ sys/kern/vfs_syscalls.c
@@ -4814,3 +4814,78 @@
uap->advice);
return (kern_posix_error(td, error));
}
+
+int
+kern_copy_file_range(struct thread *td, int infd, off_t *inoffp, int outfd,
+ off_t *outoffp, size_t len, unsigned int flags)
+{
+ struct file *infp, *outfp;
+ struct vnode *invp, *outvp;
+ int error;
+ size_t retlen;
+
+ infp = outfp = NULL;
+ retlen = 0;
+
+ /* Get the file structures for the file descriptors. */
+ error = fget_read(td, infd, &cap_read_rights, &infp);
+ if (error != 0)
+ goto out;
+ error = fget_write(td, outfd, &cap_write_rights, &outfp);
+ if (error != 0)
+ goto out;
+
+ /* Set the offset pointers to the correct place. */
+ if (inoffp == NULL)
+ inoffp = &infp->f_offset;
+ if (outoffp == NULL)
+ outoffp = &outfp->f_offset;
+
+ invp = infp->f_vnode;
+ outvp = outfp->f_vnode;
+ /* Sanity check the f_flag bits. */
+ if ((outfp->f_flag & (FWRITE | FAPPEND)) != FWRITE ||
+ (infp->f_flag & FREAD) == 0 || invp == outvp) {
+ error = EBADF;
+ goto out;
+ }
+
+ retlen = len;
+ error = vn_copy_file_range(invp, inoffp, outvp, outoffp, &retlen,
+ flags);
+out:
+ if (outfp != NULL)
+ fdrop(outfp, td);
+ if (infp != NULL)
+ fdrop(infp, td);
+ td->td_retval[0] = retlen;
+ return (error);
+}
+
+int
+sys_copy_file_range(struct thread *td, struct copy_file_range_args *uap)
+{
+ off_t inoff, outoff, *inoffp, *outoffp;
+ int error;
+
+ inoffp = outoffp = NULL;
+ if (uap->inoffp != NULL) {
+ error = copyin(uap->inoffp, &inoff, sizeof(off_t));
+ if (error != 0)
+ return (error);
+ inoffp = &inoff;
+ }
+ if (uap->outoffp != NULL) {
+ error = copyin(uap->outoffp, &outoff, sizeof(off_t));
+ if (error != 0)
+ return (error);
+ outoffp = &outoff;
+ }
+ error = kern_copy_file_range(td, uap->infd, inoffp, uap->outfd,
+ outoffp, uap->len, uap->flags);
+ if (error == 0 && uap->inoffp != NULL)
+ error = copyout(inoffp, uap->inoffp, sizeof(off_t));
+ if (error == 0 && uap->outoffp != NULL)
+ error = copyout(outoffp, uap->outoffp, sizeof(off_t));
+ return (error);
+}
Index: sys/kern/vfs_vnops.c
===================================================================
--- sys/kern/vfs_vnops.c
+++ sys/kern/vfs_vnops.c
@@ -61,6 +61,7 @@
#include <sys/limits.h>
#include <sys/lock.h>
#include <sys/mman.h>
+#include <sys/malloc.h>
#include <sys/mount.h>
#include <sys/mutex.h>
#include <sys/namei.h>
@@ -89,6 +90,8 @@
#include <vm/vm_page.h>
#include <vm/vnode_pager.h>
+#include <machine/vmparam.h>
+
#ifdef HWPMC_HOOKS
#include <sys/pmckern.h>
#endif
@@ -2494,6 +2497,86 @@
va->va_fsid += (uint32_t)f->val[0];
}
+/*
+ * Copies a byte range from invp to outvp. Calls VOP_COPY_FILE_RANGE()
+ * or vn_generic_copy_file_range() after rangelocking the byte ranges,
+ * to do the actual copy.
+ * vn_generic_copy_file_range() is factored out, so it can be called
+ * from a VOP_COPY_FILE_RANGE() call as well, but handles vnodes from
+ * different file systems.
+ */
+int
+vn_copy_file_range(struct vnode *invp, off_t *inoffp, struct vnode *outvp,
+ off_t *outoffp, size_t *lenp, unsigned int flags)
+{
+ struct vattr va;
+ int error;
+ size_t len;
+ uint64_t uvalin, uvalout;
+ void *rl_rcookie, *rl_wcookie;
+
+ len = *lenp;
+ error = 0;
+ rl_rcookie = rl_wcookie = NULL;
+
+ /* Do some sanity checks on the arguments. */
+ uvalin = *inoffp;
+ uvalin += len;
+ uvalout = *outoffp;
+ uvalout += len;
+ if (invp->v_type == VDIR || outvp->v_type == VDIR)
+ error = EISDIR;
+ else if (*inoffp < 0 || uvalin > INT64_MAX || uvalin <
+ (uint64_t)*inoffp || *outoffp < 0 || uvalout > INT64_MAX ||
+ uvalout < (uint64_t)*outoffp || invp->v_type != VREG ||
+ outvp->v_type != VREG || invp == outvp)
+ error = EINVAL;
+ if (error != 0)
+ goto out;
+
+ error = vn_lock(invp, LK_SHARED);
+ if (error != 0)
+ goto out;
+ /* Check that the offset + len does not go past EOF of invp. */
+ error = VOP_GETATTR(invp, &va, curthread->td_ucred);
+ if (error == 0 && va.va_size < (*inoffp + len))
+ error = EINVAL;
+ VOP_UNLOCK(invp, 0);
+ if (error != 0)
+ goto out;
+
+ /* Range lock the byte ranges for both invp and outvp. */
+ for (;;) {
+ rl_wcookie = vn_rangelock_wlock(outvp, *outoffp, *outoffp +
+ len);
+ rl_rcookie = vn_rangelock_rlock_trylock(invp, *inoffp,
+ *inoffp + len);
+ if (rl_rcookie != NULL)
+ break;
+ vn_rangelock_unlock(outvp, rl_wcookie);
+ rl_rcookie = vn_rangelock_rlock(invp, *inoffp, *inoffp + len);
+ vn_rangelock_unlock(invp, rl_rcookie);
+ }
+
+ /*
+ * If the two vnode are for the same file system, call
+ * VOP_COPY_FILE_RANGE(), otherwise call vn_generic_copy_file_range()
+ * which can handle copies across multiple file systems.
+ */
+ if (invp->v_mount == outvp->v_mount)
+ error = VOP_COPY_FILE_RANGE(invp, inoffp, outvp, outoffp,
+ lenp, flags);
+ else
+ error = vn_generic_copy_file_range(invp, inoffp, outvp,
+ outoffp, lenp, flags);
+out:
+ if (rl_rcookie != NULL)
+ vn_rangelock_unlock(invp, rl_rcookie);
+ if (rl_wcookie != NULL)
+ vn_rangelock_unlock(outvp, rl_wcookie);
+ return (error);
+}
+
int
vn_fsync_buf(struct vnode *vp, int waitfor)
{
Index: sys/kern/vnode_if.src
===================================================================
--- sys/kern/vnode_if.src
+++ sys/kern/vnode_if.src
@@ -718,6 +718,19 @@
};
+%% copy_file_range invp U U U
+%% copy_file_range outvp U U U
+
+vop_copy_file_range {
+ IN struct vnode *invp;
+ INOUT off_t *inoffp;
+ IN struct vnode *outvp;
+ INOUT off_t *outoffp;
+ INOUT size_t *lenp;
+ IN unsigned int flags;
+};
+
+
# The VOPs below are spares at the end of the table to allow new VOPs to be
# added in stable branches without breaking the KBI. New VOPs in HEAD should
# be added above these spares. When merging a new VOP to a stable branch,
Index: sys/sys/syscallsubr.h
===================================================================
--- sys/sys/syscallsubr.h
+++ sys/sys/syscallsubr.h
@@ -94,6 +94,8 @@
int kern_close(struct thread *td, int fd);
int kern_connectat(struct thread *td, int dirfd, int fd,
struct sockaddr *sa);
+int kern_copy_file_range(struct thread *td, int infd, off_t *inoffp,
+ int outfd, off_t *outoffp, size_t len, unsigned int flags);
int kern_cpuset_getaffinity(struct thread *td, cpulevel_t level,
cpuwhich_t which, id_t id, size_t cpusetsize, cpuset_t *maskp);
int kern_cpuset_setaffinity(struct thread *td, cpulevel_t level,
Index: sys/sys/vnode.h
===================================================================
--- sys/sys/vnode.h
+++ sys/sys/vnode.h
@@ -667,9 +667,15 @@
struct ucred *cred);
int vn_close(struct vnode *vp,
int flags, struct ucred *file_cred, struct thread *td);
+int vn_copy_file_range(struct vnode *invp, off_t *inoffp,
+ struct vnode *outvp, off_t *outoffp, size_t *lenp,
+ unsigned int flags);
void vn_finished_write(struct mount *mp);
void vn_finished_secondary_write(struct mount *mp);
int vn_fsync_buf(struct vnode *vp, int waitfor);
+int vn_generic_copy_file_range(struct vnode *invp, off_t *inoffp,
+ struct vnode *outvp, off_t *outoffp, size_t *lenp,
+ unsigned int flags);
int vn_isdisk(struct vnode *vp, int *errp);
int _vn_lock(struct vnode *vp, int flags, char *file, int line);
#define vn_lock(vp, flags) _vn_lock(vp, flags, __FILE__, __LINE__)
@@ -720,8 +726,12 @@
VI_MTX(vp))
#define vn_rangelock_rlock(vp, start, end) \
rangelock_rlock(&(vp)->v_rl, (start), (end), VI_MTX(vp))
+#define vn_rangelock_rlock_trylock(vp, start, end) \
+ rangelock_rlock_trylock(&(vp)->v_rl, (start), (end), VI_MTX(vp))
#define vn_rangelock_wlock(vp, start, end) \
rangelock_wlock(&(vp)->v_rl, (start), (end), VI_MTX(vp))
+#define vn_rangelock_wlock_trylock(vp, start, end) \
+ rangelock_wlock_trylock(&(vp)->v_rl, (start), (end), VI_MTX(vp))
int vfs_cache_lookup(struct vop_lookup_args *ap);
void vfs_timestamp(struct timespec *);
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Jan 18, 5:26 PM (11 h, 49 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
27713358
Default Alt Text
D20584.id58884.diff (26 KB)
Attached To
Mode
D20584: add a linux compatible copy_file_range(2) syscall
Attached
Detach File
Event Timeline
Log In to Comment