Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F132631289
D20584.id58464.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
13 KB
Referenced Files
None
Subscribers
None
D20584.id58464.diff
View Options
Index: include/unistd.h
===================================================================
--- include/unistd.h
+++ include/unistd.h
@@ -494,6 +494,7 @@
int acct(const char *);
int async_daemon(void);
int check_utility_compat(const char *);
+ssize_t copy_file_range(int, off_t *, int, off_t *, size_t, u_int);
const char *
crypt_get_format(void);
char *crypt_r(const char *, const char *, struct crypt_data *);
Index: lib/libc/sys/Makefile.inc
===================================================================
--- lib/libc/sys/Makefile.inc
+++ lib/libc/sys/Makefile.inc
@@ -175,6 +175,7 @@
closefrom.2 \
connect.2 \
connectat.2 \
+ copy_file_range.2 \
cpuset.2 \
cpuset_getaffinity.2 \
cpuset_getdomain.2 \
Index: lib/libc/sys/Symbol.map
===================================================================
--- lib/libc/sys/Symbol.map
+++ lib/libc/sys/Symbol.map
@@ -402,6 +402,7 @@
};
FBSD_1.6 {
+ copy_file_range;
fhlink;
fhlinkat;
fhreadlink;
Index: lib/libc/sys/copy_file_range.2
===================================================================
--- lib/libc/sys/copy_file_range.2
+++ lib/libc/sys/copy_file_range.2
@@ -0,0 +1,144 @@
+.\" SPDX-License-Identifier: BSD-2-Clause
+.\"
+.\" Copyright (c) 2019 Rick Macklem
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd June 9, 2019
+.Dt COPY_FILE_RANGE 2
+.Os
+.Sh NAME
+.Nm copy_file_range
+.Nd kernel copy of a byte range from one file to another
+.Sh LIBRARY
+.Lb libc
+.Sh SYNOPSIS
+.In sys/types.h
+.In unistd.h
+.Ft ssize_t
+.Fn copy_file_range "int infd" "off_t *inoffp" "int outfd" "off_t *outoffp" "size_t len" "u_int flags"
+.Fc
+.Sh DESCRIPTION
+The
+.Fn copy_file_range
+system call
+copies
+.Fa len
+bytes from
+.Fa infd
+to
+.Fa outfd
+in the kernel.
+It may do this using a file system specific technique if
+.Fa infd
+and
+.Fa outfd
+are on the same file system.
+The
+.Fa infd
+argument must be opened for reading and the
+.Fa outfd
+argument must be opened for writing, but not O_APPEND.
+If
+.Fa inoffp
+or
+.Fa outoffp
+is NULL, the file offset for
+.Fa infd
+or
+.Fa outfd
+respectively will be used and updated by
+the number of bytes copied.
+If
+.Fa inoffp
+or
+.Fa outoffp
+is not NULL, the byte offset pointed to by
+.Fa inoffp
+or
+.Fa outoffp
+respectively will be used/updated and the file offset for
+.Fa infd
+or
+.Fa outfd
+respectively will not be affected.
+The
+.Fa flags
+argument is currently ignored and should be set to 0.
+.Pp
+.Sh RETURN VALUES
+If it succeeds, the call returns the number of bytes copied, which can be less
+than
+.Fa len .
+.Fn copy_file_range
+should be used in a loop until copying of the desired byte range has been
+completed.
+If an error has occurred, a \-1 is returned and the error code is placed in
+the global variable
+.Va errno .
+.Sh ERRORS
+The
+.Fn copy_file_range
+system call
+will fail if:
+.Bl -tag -width Er
+.It Bq Er EBADF
+If
+.Fa
+infd
+is not open for reading or
+.Fa
+outfd
+is not open for writing, or opened for writing with O_APPEND.
+.It Bq Er EFBIG
+If the copy exceeds the process's file size limit or the maximum file size
+for the file system
+.Fa outfd
+resides on.
+.It Bq Er EINVAL
+If the initial offset for
+.Fa infd
+plus
+.Fa len
+exceeds EOF for
+.Fa infd .
+.It Bq Er EIO
+An I/O error occurred while reading/writing the files.
+.It Bq Er EISDIR
+If either
+.Fa infd
+or
+.Fa outfd
+refers to a directory.
+.El
+.Sh STANDARDS
+The
+.Fn copy_file_range
+system call is expected to be compatible with the Linux system call of
+the same name.
+.Sh HISTORY
+The
+.Fn copy_file_range
+function appeared in
+.Fx 13.0 .
Index: sys/kern/syscalls.master
===================================================================
--- sys/kern/syscalls.master
+++ sys/kern/syscalls.master
@@ -3175,6 +3175,16 @@
int flag
);
}
+569 AUE_NULL STD {
+ ssize_t copy_file_range(
+ _In_ int infd,
+ _Inout_opt_ off_t *inoffp,
+ _In_ int outfd,
+ _Inout_opt_ off_t *outoffp,
+ _In_ size_t len,
+ _In_ u_int flags
+ );
+ }
; Please copy any additions and changes to the following compatability tables:
; sys/compat/freebsd32/syscalls.master
Index: sys/kern/vfs_default.c
===================================================================
--- sys/kern/vfs_default.c
+++ sys/kern/vfs_default.c
@@ -140,6 +140,7 @@
.vop_set_text = vop_stdset_text,
.vop_unset_text = vop_stdunset_text,
.vop_add_writecount = vop_stdadd_writecount,
+ .vop_copy_file_range = VOP_EOPNOTSUPP,
};
/*
Index: sys/kern/vfs_syscalls.c
===================================================================
--- sys/kern/vfs_syscalls.c
+++ sys/kern/vfs_syscalls.c
@@ -4814,3 +4814,94 @@
uap->advice);
return (kern_posix_error(td, error));
}
+
+int
+kern_copy_file_range(struct thread *td, int infd, off_t *inoffp, int outfd,
+ off_t *outoffp, size_t len, u_int flags)
+{
+ struct file *infp, *outfp;
+ struct vnode *invp, *outvp;
+ int error, lock_flags;
+ size_t retlen;
+
+ infp = outfp = NULL;
+ retlen = 0;
+
+ /* Get the file structures for the file descriptors. */
+ error = fget_read(td, infd, &cap_read_rights, &infp);
+ if (error != 0)
+ goto out;
+ error = fget_write(td, outfd, &cap_write_rights, &outfp);
+ if (error != 0)
+ goto out;
+
+ /* Set the offset pointers to the correct place. */
+ if (inoffp == NULL)
+ inoffp = &infp->f_offset;
+ if (outoffp == NULL)
+ outoffp = &outfp->f_offset;
+
+ /* Sanity check the f_flag bits. */
+ if ((outfp->f_flag & (FWRITE | FAPPEND)) != FWRITE ||
+ (infp->f_flag & FREAD) == 0) {
+ error = EBADF;
+ goto out;
+ }
+
+ /* Lock the vnodes. */
+ invp = infp->f_vnode;
+ error = vn_lock(invp, LK_SHARED);
+ if (error != 0)
+ goto out;
+ outvp = outfp->f_vnode;
+ if (MNT_SHARED_WRITES(outvp->v_mount))
+ lock_flags = LK_SHARED;
+ else
+ lock_flags = LK_EXCLUSIVE;
+ error = vn_lock(outvp, lock_flags);
+ if (error != 0) {
+ VOP_UNLOCK(outvp, 0);
+ goto out;
+ }
+
+ retlen = len;
+ error = vn_copy_file_range(invp, inoffp, outvp, outoffp, &retlen,
+ flags);
+ VOP_UNLOCK(invp, 0);
+ VOP_UNLOCK(outvp, 0);
+out:
+ if (infp != NULL)
+ fdrop(infp, td);
+ if (outfp != NULL)
+ fdrop(outfp, td);
+ td->td_retval[0] = retlen;
+ return (error);
+}
+
+int
+sys_copy_file_range(struct thread *td, struct copy_file_range_args *uap)
+{
+ off_t inoff, outoff, *inoffp, *outoffp;
+ int error;
+
+ inoffp = outoffp = NULL;
+ if (uap->inoffp != NULL) {
+ error = copyin(uap->inoffp, &inoff, sizeof(off_t));
+ if (error != 0)
+ return (error);
+ inoffp = &inoff;
+ }
+ if (uap->outoffp != NULL) {
+ error = copyin(uap->outoffp, &outoff, sizeof(off_t));
+ if (error != 0)
+ return (error);
+ outoffp = &outoff;
+ }
+ error = kern_copy_file_range(td, uap->infd, inoffp, uap->outfd,
+ outoffp, uap->len, uap->flags);
+ if (error == 0 && uap->inoffp != NULL)
+ error = copyout(inoffp, uap->inoffp, sizeof(off_t));
+ if (error == 0 && uap->outoffp != NULL)
+ error = copyout(outoffp, uap->outoffp, sizeof(off_t));
+ return (error);
+}
Index: sys/kern/vfs_vnops.c
===================================================================
--- sys/kern/vfs_vnops.c
+++ sys/kern/vfs_vnops.c
@@ -61,6 +61,7 @@
#include <sys/limits.h>
#include <sys/lock.h>
#include <sys/mman.h>
+#include <sys/malloc.h>
#include <sys/mount.h>
#include <sys/mutex.h>
#include <sys/namei.h>
@@ -2494,6 +2495,134 @@
va->va_fsid += (uint32_t)f->val[0];
}
+/* Malloc a zero'd block to compare with the data block read in. */
+static char *copyfilerange_zerodat = NULL;
+static u_long copyfilerange_zerosize = 0;
+
+int
+vn_copy_file_range(struct vnode *invp, off_t *inoffp, struct vnode *outvp,
+ off_t *outoffp, size_t *lenp, u_int flags)
+{
+ struct statfs *sfp;
+ struct vattr va;
+ struct mount *mp;
+ u_long blksize;
+ int error, xfer;
+ ssize_t aresid;
+ size_t len;
+ char *dat;
+ struct thread *td = curthread;
+
+ len = *lenp;
+ error = 0;
+ mp = NULL;
+ /* Do some sanity checks on the arguments. */
+ if (invp->v_type == VDIR || outvp->v_type == VDIR)
+ error = EISDIR;
+ else if (*inoffp < 0 || (*inoffp + len) < *inoffp || *outoffp < 0 ||
+ (*outoffp + len) < *outoffp || invp->v_type != VREG ||
+ outvp->v_type != VREG)
+ error = EINVAL;
+ /* Check that the offset + len does not go past EOF of invp. */
+ if (error == 0)
+ error = VOP_GETATTR(invp, &va, curthread->td_ucred);
+ if (error == 0 && va.va_size < (*inoffp + len))
+ error = EINVAL;
+ if (error != 0) {
+ *lenp = 0;
+ return (error);
+ }
+
+ /*
+ * If the two vnodes are for the same file system, try the
+ * VOP_COPY_FILE_RANGE() call first and do it here if the VOP
+ * call fails.
+ */
+ if (invp->v_mount == outvp->v_mount) {
+ error = VOP_COPY_FILE_RANGE(invp, inoffp, outvp, outoffp,
+ lenp, flags);
+ if (error == 0)
+ return (error);
+ }
+
+ /*
+ * Copy blocks of the size preferred by the input file, with a
+ * minimum of 16Kbytes and a maximum of 1Mbytes.
+ */
+ sfp = malloc(sizeof(*sfp), M_STATFS, M_WAITOK);
+ error = VFS_STATFS(invp->v_mount, sfp);
+ if (error != 0) {
+ free(sfp, M_STATFS);
+ *lenp = 0;
+ return (error);
+ }
+ if (sfp->f_iosize < 16384)
+ blksize = 16384;
+ else if (sfp->f_iosize > 1048576)
+ blksize = 1048576;
+ else
+ blksize = sfp->f_iosize;
+ free(sfp, M_STATFS);
+
+ /* Start write for outvp. */
+ error = vn_start_write(outvp, &mp, V_WAIT | PCATCH);
+ if (error != 0) {
+ *lenp = 0;
+ return (error);
+ }
+
+ dat = malloc(blksize, M_TEMP, M_WAITOK);
+ /*
+ * It would be nice to use VOP_IOCTL() to find holes, but that
+ * requires that invp be unlocked/relocked for each block read.
+ * I am not sure we want to do that here, since it would open
+ * up a window where another thread could write to the file while
+ * the copy is in progress.
+ * In the meantime, just scan for a read block of all 0s.
+ */
+ if (copyfilerange_zerosize < blksize) {
+ free(copyfilerange_zerodat, M_TEMP);
+ copyfilerange_zerosize = blksize;
+ copyfilerange_zerodat = malloc(copyfilerange_zerosize, M_TEMP,
+ M_WAITOK | M_ZERO);
+ }
+ while (error == 0 && len > 0) {
+ if (len > blksize)
+ xfer = blksize;
+ else
+ xfer = len;
+ error = vn_rdwr(UIO_READ, invp, dat, xfer, *inoffp,
+ UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NULL, &aresid,
+ td);
+ /* Linux considers a range that exceeds EOF to be an error. */
+ if (error == 0 && aresid > 0)
+ error = EINVAL;
+ if (error == 0) {
+ /* Skip the write for holes. */
+ if (memcmp(dat, copyfilerange_zerodat, xfer) != 0)
+ error = vn_rdwr(UIO_WRITE, outvp, dat, xfer,
+ *outoffp, UIO_SYSSPACE, IO_NODELOCKED,
+ td->td_ucred, NULL, NULL, td);
+ else if (xfer == len) {
+ /* Hole at EOF. */
+ VATTR_NULL(&va);
+ va.va_size = *outoffp + len;
+ error = VOP_SETATTR(outvp, &va, td->td_ucred);
+ }
+ if (error == 0) {
+ *inoffp += xfer;
+ *outoffp += xfer;
+ len -= xfer;
+ }
+ }
+ }
+ *lenp -= len;
+ if (mp != NULL)
+ vn_finished_write(mp);
+ free(dat, M_TEMP);
+ return (error);
+}
+
int
vn_fsync_buf(struct vnode *vp, int waitfor)
{
Index: sys/kern/vnode_if.src
===================================================================
--- sys/kern/vnode_if.src
+++ sys/kern/vnode_if.src
@@ -718,6 +718,19 @@
};
+%% copy_file_range invp L L L
+%% copy_file_range outvp L L L
+
+vop_copy_file_range {
+ IN struct vnode *invp;
+ INOUT off_t *inoffp;
+ IN struct vnode *outvp;
+ INOUT off_t *outoffp;
+ INOUT size_t *lenp;
+ IN u_int flags;
+};
+
+
# The VOPs below are spares at the end of the table to allow new VOPs to be
# added in stable branches without breaking the KBI. New VOPs in HEAD should
# be added above these spares. When merging a new VOP to a stable branch,
Index: sys/sys/syscallsubr.h
===================================================================
--- sys/sys/syscallsubr.h
+++ sys/sys/syscallsubr.h
@@ -94,6 +94,8 @@
int kern_close(struct thread *td, int fd);
int kern_connectat(struct thread *td, int dirfd, int fd,
struct sockaddr *sa);
+int kern_copy_file_range(struct thread *td, int infd, off_t *inoffp,
+ int outfd, off_t *outoffp, size_t len, u_int flags);
int kern_cpuset_getaffinity(struct thread *td, cpulevel_t level,
cpuwhich_t which, id_t id, size_t cpusetsize, cpuset_t *maskp);
int kern_cpuset_setaffinity(struct thread *td, cpulevel_t level,
Index: sys/sys/vnode.h
===================================================================
--- sys/sys/vnode.h
+++ sys/sys/vnode.h
@@ -667,6 +667,8 @@
struct ucred *cred);
int vn_close(struct vnode *vp,
int flags, struct ucred *file_cred, struct thread *td);
+int vn_copy_file_range(struct vnode *invp, off_t *inoffp,
+ struct vnode *outvp, off_t *outoffp, size_t *lenp, u_int flags);
void vn_finished_write(struct mount *mp);
void vn_finished_secondary_write(struct mount *mp);
int vn_fsync_buf(struct vnode *vp, int waitfor);
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Oct 19, 2:32 PM (10 h, 51 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
23928802
Default Alt Text
D20584.id58464.diff (13 KB)
Attached To
Mode
D20584: add a linux compatible copy_file_range(2) syscall
Attached
Detach File
Event Timeline
Log In to Comment