Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F133345096
D20584.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
17 KB
Referenced Files
None
Subscribers
None
D20584.diff
View Options
Index: head/sys/kern/syscalls.master
===================================================================
--- head/sys/kern/syscalls.master
+++ head/sys/kern/syscalls.master
@@ -3175,6 +3175,16 @@
int flag
);
}
+569 AUE_NULL STD {
+ ssize_t copy_file_range(
+ int infd,
+ _Inout_opt_ off_t *inoffp,
+ int outfd,
+ _Inout_opt_ off_t *outoffp,
+ size_t len,
+ unsigned int flags
+ );
+ }
; Please copy any additions and changes to the following compatability tables:
; sys/compat/freebsd32/syscalls.master
Index: head/sys/kern/vfs_default.c
===================================================================
--- head/sys/kern/vfs_default.c
+++ head/sys/kern/vfs_default.c
@@ -83,6 +83,7 @@
static int vop_stdis_text(struct vop_is_text_args *ap);
static int vop_stdunset_text(struct vop_unset_text_args *ap);
static int vop_stdadd_writecount(struct vop_add_writecount_args *ap);
+static int vop_stdcopy_file_range(struct vop_copy_file_range_args *ap);
static int vop_stdfdatasync(struct vop_fdatasync_args *ap);
static int vop_stdgetpages_async(struct vop_getpages_async_args *ap);
@@ -140,6 +141,7 @@
.vop_set_text = vop_stdset_text,
.vop_unset_text = vop_stdunset_text,
.vop_add_writecount = vop_stdadd_writecount,
+ .vop_copy_file_range = vop_stdcopy_file_range,
};
/*
@@ -1210,6 +1212,17 @@
{
return (0);
+}
+
+static int
+vop_stdcopy_file_range(struct vop_copy_file_range_args *ap)
+{
+ int error;
+
+ error = vn_generic_copy_file_range(ap->a_invp, ap->a_inoffp,
+ ap->a_outvp, ap->a_outoffp, ap->a_lenp, ap->a_flags, ap->a_incred,
+ ap->a_outcred, ap->a_fsizetd);
+ return (error);
}
int
Index: head/sys/kern/vfs_syscalls.c
===================================================================
--- head/sys/kern/vfs_syscalls.c
+++ head/sys/kern/vfs_syscalls.c
@@ -4814,3 +4814,122 @@
uap->advice);
return (kern_posix_error(td, error));
}
+
+int
+kern_copy_file_range(struct thread *td, int infd, off_t *inoffp, int outfd,
+ off_t *outoffp, size_t len, unsigned int flags)
+{
+ struct file *infp, *outfp;
+ struct vnode *invp, *outvp;
+ int error;
+ size_t retlen;
+ void *rl_rcookie, *rl_wcookie;
+ off_t savinoff, savoutoff;
+
+ infp = outfp = NULL;
+ rl_rcookie = rl_wcookie = NULL;
+ savinoff = -1;
+ error = 0;
+ retlen = 0;
+
+ if (flags != 0) {
+ error = EINVAL;
+ goto out;
+ }
+ if (len > SSIZE_MAX)
+ /*
+ * Although the len argument is size_t, the return argument
+ * is ssize_t (which is signed). Therefore a size that won't
+ * fit in ssize_t can't be returned.
+ */
+ len = SSIZE_MAX;
+
+ /* Get the file structures for the file descriptors. */
+ error = fget_read(td, infd, &cap_read_rights, &infp);
+ if (error != 0)
+ goto out;
+ error = fget_write(td, outfd, &cap_write_rights, &outfp);
+ if (error != 0)
+ goto out;
+
+ /* Set the offset pointers to the correct place. */
+ if (inoffp == NULL)
+ inoffp = &infp->f_offset;
+ if (outoffp == NULL)
+ outoffp = &outfp->f_offset;
+ savinoff = *inoffp;
+ savoutoff = *outoffp;
+
+ invp = infp->f_vnode;
+ outvp = outfp->f_vnode;
+ /* Sanity check the f_flag bits. */
+ if ((outfp->f_flag & (FWRITE | FAPPEND)) != FWRITE ||
+ (infp->f_flag & FREAD) == 0 || invp == outvp) {
+ error = EBADF;
+ goto out;
+ }
+
+ /* If len == 0, just return 0. */
+ if (len == 0)
+ goto out;
+
+ /* Range lock the byte ranges for both invp and outvp. */
+ for (;;) {
+ rl_wcookie = vn_rangelock_wlock(outvp, *outoffp, *outoffp +
+ len);
+ rl_rcookie = vn_rangelock_tryrlock(invp, *inoffp, *inoffp +
+ len);
+ if (rl_rcookie != NULL)
+ break;
+ vn_rangelock_unlock(outvp, rl_wcookie);
+ rl_rcookie = vn_rangelock_rlock(invp, *inoffp, *inoffp + len);
+ vn_rangelock_unlock(invp, rl_rcookie);
+ }
+
+ retlen = len;
+ error = vn_copy_file_range(invp, inoffp, outvp, outoffp, &retlen,
+ flags, infp->f_cred, outfp->f_cred, td);
+out:
+ if (rl_rcookie != NULL)
+ vn_rangelock_unlock(invp, rl_rcookie);
+ if (rl_wcookie != NULL)
+ vn_rangelock_unlock(outvp, rl_wcookie);
+ if (savinoff != -1 && (error == EINTR || error == ERESTART)) {
+ *inoffp = savinoff;
+ *outoffp = savoutoff;
+ }
+ if (outfp != NULL)
+ fdrop(outfp, td);
+ if (infp != NULL)
+ fdrop(infp, td);
+ td->td_retval[0] = retlen;
+ return (error);
+}
+
+int
+sys_copy_file_range(struct thread *td, struct copy_file_range_args *uap)
+{
+ off_t inoff, outoff, *inoffp, *outoffp;
+ int error;
+
+ inoffp = outoffp = NULL;
+ if (uap->inoffp != NULL) {
+ error = copyin(uap->inoffp, &inoff, sizeof(off_t));
+ if (error != 0)
+ return (error);
+ inoffp = &inoff;
+ }
+ if (uap->outoffp != NULL) {
+ error = copyin(uap->outoffp, &outoff, sizeof(off_t));
+ if (error != 0)
+ return (error);
+ outoffp = &outoff;
+ }
+ error = kern_copy_file_range(td, uap->infd, inoffp, uap->outfd,
+ outoffp, uap->len, uap->flags);
+ if (error == 0 && uap->inoffp != NULL)
+ error = copyout(inoffp, uap->inoffp, sizeof(off_t));
+ if (error == 0 && uap->outoffp != NULL)
+ error = copyout(outoffp, uap->outoffp, sizeof(off_t));
+ return (error);
+}
Index: head/sys/kern/vfs_vnops.c
===================================================================
--- head/sys/kern/vfs_vnops.c
+++ head/sys/kern/vfs_vnops.c
@@ -2619,3 +2619,372 @@
return (error);
}
+
+/*
+ * Copies a byte range from invp to outvp. Calls VOP_COPY_FILE_RANGE()
+ * or vn_generic_copy_file_range() after rangelocking the byte ranges,
+ * to do the actual copy.
+ * vn_generic_copy_file_range() is factored out, so it can be called
+ * from a VOP_COPY_FILE_RANGE() call as well, but handles vnodes from
+ * different file systems.
+ */
+int
+vn_copy_file_range(struct vnode *invp, off_t *inoffp, struct vnode *outvp,
+ off_t *outoffp, size_t *lenp, unsigned int flags, struct ucred *incred,
+ struct ucred *outcred, struct thread *fsize_td)
+{
+ struct vattr va;
+ int error;
+ size_t len;
+ uint64_t uvalin, uvalout;
+
+ len = *lenp;
+ *lenp = 0; /* For error returns. */
+ error = 0;
+
+ /* Do some sanity checks on the arguments. */
+ uvalin = *inoffp;
+ uvalin += len;
+ uvalout = *outoffp;
+ uvalout += len;
+ if (invp->v_type == VDIR || outvp->v_type == VDIR)
+ error = EISDIR;
+ else if (*inoffp < 0 || uvalin > INT64_MAX || uvalin <
+ (uint64_t)*inoffp || *outoffp < 0 || uvalout > INT64_MAX ||
+ uvalout < (uint64_t)*outoffp || invp->v_type != VREG ||
+ outvp->v_type != VREG)
+ error = EINVAL;
+ else if (invp == outvp)
+ error = EBADF;
+ if (error != 0)
+ goto out;
+
+ error = vn_lock(invp, LK_SHARED);
+ if (error != 0)
+ goto out;
+ /* Check that the offset + len does not go past EOF of invp. */
+ error = VOP_GETATTR(invp, &va, incred);
+ if (error == 0 && va.va_size < *inoffp + len)
+ error = EINVAL;
+ VOP_UNLOCK(invp, 0);
+ if (error != 0)
+ goto out;
+
+ /*
+ * If the two vnode are for the same file system, call
+ * VOP_COPY_FILE_RANGE(), otherwise call vn_generic_copy_file_range()
+ * which can handle copies across multiple file systems.
+ */
+ *lenp = len;
+ if (invp->v_mount == outvp->v_mount)
+ error = VOP_COPY_FILE_RANGE(invp, inoffp, outvp, outoffp,
+ lenp, flags, incred, outcred, fsize_td);
+ else
+ error = vn_generic_copy_file_range(invp, inoffp, outvp,
+ outoffp, lenp, flags, incred, outcred, fsize_td);
+out:
+ return (error);
+}
+
+/*
+ * Test len bytes of data starting at dat for all bytes == 0.
+ * Return true if all bytes are zero, false otherwise.
+ * Expects dat to be well aligned.
+ */
+static bool
+mem_iszero(void *dat, int len)
+{
+ int i;
+ const u_int *p;
+ const char *cp;
+
+ for (p = dat; len > 0; len -= sizeof(*p), p++) {
+ if (len >= sizeof(*p)) {
+ if (*p != 0)
+ return (false);
+ } else {
+ cp = (const char *)p;
+ for (i = 0; i < len; i++, cp++)
+ if (*cp != '\0')
+ return (false);
+ }
+ }
+ return (true);
+}
+
+/*
+ * Write an xfer sized chunk to outvp in blksize blocks from dat.
+ * dat is a maximum of blksize in length and can be written repeatedly in
+ * the chunk.
+ * If growfile == true, just grow the file via vn_truncate_locked() instead
+ * of doing actual writes.
+ */
+static int
+vn_write_outvp(struct vnode *outvp, char *dat, off_t outoff, off_t xfer,
+ u_long blksize, bool growfile, struct ucred *cred)
+{
+ struct mount *mp;
+ off_t xfer2;
+ int error, lckf;
+
+ /*
+ * Loop around doing writes of blksize until write has been completed.
+ * Lock/unlock on each loop iteration so that a bwillwrite() can be
+ * done for each iteration, since the xfer argument can be very
+ * large if there is a large hole to punch in the output file.
+ */
+ do {
+ bwillwrite();
+ mp = NULL;
+ error = vn_start_write(outvp, &mp, V_WAIT);
+ if (error == 0) {
+ if (MNT_SHARED_WRITES(mp))
+ lckf = LK_SHARED;
+ else
+ lckf = LK_EXCLUSIVE;
+ error = vn_lock(outvp, lckf);
+ }
+ if (error == 0) {
+ if (growfile)
+ error = vn_truncate_locked(outvp, outoff + xfer,
+ false, cred);
+ else {
+ xfer2 = MIN(xfer, blksize);
+ error = vn_rdwr(UIO_WRITE, outvp, dat, xfer2,
+ outoff, UIO_SYSSPACE, IO_NODELOCKED,
+ curthread->td_ucred, cred, NULL, curthread);
+ outoff += xfer2;
+ xfer -= xfer2;
+ }
+ VOP_UNLOCK(outvp, 0);
+ }
+ if (mp != NULL)
+ vn_finished_write(mp);
+ } while (!growfile && xfer > 0 && error == 0);
+ return (error);
+}
+
+/*
+ * Copy a byte range of one file to another. This function can handle the
+ * case where invp and outvp are on different file systems.
+ * It can also be called by a VOP_COPY_FILE_RANGE() to do the work, if there
+ * is no better file system specific way to do it.
+ */
+int
+vn_generic_copy_file_range(struct vnode *invp, off_t *inoffp,
+ struct vnode *outvp, off_t *outoffp, size_t *lenp, unsigned int flags,
+ struct ucred *incred, struct ucred *outcred, struct thread *fsize_td)
+{
+ struct vattr va;
+ struct mount *mp;
+ struct uio io;
+ off_t startoff, endoff, xfer, xfer2;
+ u_long blksize;
+ int error;
+ bool cantseek, readzeros;
+ ssize_t aresid;
+ size_t copylen, len, savlen;
+ char *dat;
+ long holein, holeout;
+
+ holein = holeout = 0;
+ savlen = len = *lenp;
+ error = 0;
+ dat = NULL;
+
+ error = vn_lock(invp, LK_SHARED);
+ if (error != 0)
+ goto out;
+ if (VOP_PATHCONF(invp, _PC_MIN_HOLE_SIZE, &holein) != 0)
+ holein = 0;
+ VOP_UNLOCK(invp, 0);
+ if (error != 0)
+ goto out;
+
+ mp = NULL;
+ error = vn_start_write(outvp, &mp, V_WAIT);
+ if (error == 0)
+ error = vn_lock(outvp, LK_EXCLUSIVE);
+ if (error == 0) {
+ /*
+ * If fsize_td != NULL, do a vn_rlimit_fsize() call,
+ * now that outvp is locked.
+ */
+ if (fsize_td != NULL) {
+ io.uio_offset = *outoffp;
+ io.uio_resid = len;
+ error = vn_rlimit_fsize(outvp, &io, fsize_td);
+ if (error != 0)
+ error = EFBIG;
+ }
+ if (VOP_PATHCONF(outvp, _PC_MIN_HOLE_SIZE, &holeout) != 0)
+ holeout = 0;
+ /*
+ * Holes that are past EOF do not need to be written as a block
+ * of zero bytes. So, truncate the output file as far as
+ * possible and then use va.va_size to decide if writing 0
+ * bytes is necessary in the loop below.
+ */
+ if (error == 0)
+ error = VOP_GETATTR(outvp, &va, outcred);
+ if (error == 0 && va.va_size > *outoffp && va.va_size <=
+ *outoffp + len) {
+#ifdef MAC
+ error = mac_vnode_check_write(curthread->td_ucred,
+ outcred, outvp);
+ if (error == 0)
+#endif
+ error = vn_truncate_locked(outvp, *outoffp,
+ false, outcred);
+ if (error == 0)
+ va.va_size = *outoffp;
+ }
+ VOP_UNLOCK(outvp, 0);
+ }
+ if (mp != NULL)
+ vn_finished_write(mp);
+ if (error != 0)
+ goto out;
+
+ /*
+ * Set the blksize to the larger of the hole sizes for invp and outvp.
+ * If hole sizes aren't available, set the blksize to the larger
+ * f_iosize of invp and outvp.
+ * This code expects the hole sizes and f_iosizes to be powers of 2.
+ * This value is clipped at 4Kbytes and 1Mbyte.
+ */
+ blksize = MAX(holein, holeout);
+ if (blksize == 0)
+ blksize = MAX(invp->v_mount->mnt_stat.f_iosize,
+ outvp->v_mount->mnt_stat.f_iosize);
+ if (blksize < 4096)
+ blksize = 4096;
+ else if (blksize > 1024 * 1024)
+ blksize = 1024 * 1024;
+ dat = malloc(blksize, M_TEMP, M_WAITOK);
+
+ /*
+ * If VOP_IOCTL(FIOSEEKHOLE) works for invp, use it and FIOSEEKDATA
+ * to find holes. Otherwise, just scan the read block for all 0s
+ * in the inner loop where the data copying is done.
+ * Note that some file systems such as NFSv3, NFSv4.0 and NFSv4.1 may
+ * support holes on the server, but do not support FIOSEEKHOLE.
+ */
+ while (len > 0 && error == 0) {
+ endoff = 0; /* To shut up compilers. */
+ cantseek = true;
+ startoff = *inoffp;
+ copylen = len;
+
+ /*
+ * Find the next data area. If there is just a hole to EOF,
+ * FIOSEEKDATA should fail and then we drop down into the
+ * inner loop and create the hole on the outvp file.
+ * (I do not know if any file system will report a hole to
+ * EOF via FIOSEEKHOLE, but I am pretty sure FIOSEEKDATA
+ * will fail for those file systems.)
+ *
+ * For input files that don't support FIOSEEKDATA/FIOSEEKHOLE,
+ * the code just falls through to the inner copy loop.
+ */
+ error = EINVAL;
+ if (holein > 0)
+ error = VOP_IOCTL(invp, FIOSEEKDATA, &startoff, 0,
+ incred, curthread);
+ if (error == 0) {
+ endoff = startoff;
+ error = VOP_IOCTL(invp, FIOSEEKHOLE, &endoff, 0,
+ incred, curthread);
+ }
+ if (error == 0) {
+ if (startoff > *inoffp) {
+ /* Found hole before data block. */
+ xfer = MIN(startoff - *inoffp, len);
+ if (*outoffp < va.va_size) {
+ /* Must write 0s to punch hole. */
+ xfer2 = MIN(va.va_size - *outoffp,
+ xfer);
+ memset(dat, 0, MIN(xfer2, blksize));
+ error = vn_write_outvp(outvp, dat,
+ *outoffp, xfer2, blksize, false,
+ outcred);
+ }
+
+ if (error == 0 && *outoffp + xfer >
+ va.va_size && xfer == len)
+ /* Grow last block. */
+ error = vn_write_outvp(outvp, dat,
+ *outoffp, xfer, blksize, true,
+ outcred);
+ if (error == 0) {
+ *inoffp += xfer;
+ *outoffp += xfer;
+ len -= xfer;
+ }
+ }
+ copylen = MIN(len, endoff - startoff);
+ cantseek = false;
+ } else {
+ cantseek = true;
+ startoff = *inoffp;
+ copylen = len;
+ error = 0;
+ }
+
+ xfer = blksize;
+ if (cantseek) {
+ /*
+ * Set first xfer to end at a block boundary, so that
+ * holes are more likely detected in the loop below via
+ * the for all bytes 0 method.
+ */
+ xfer -= (*inoffp % blksize);
+ }
+ /* Loop copying the data block. */
+ while (copylen > 0 && error == 0) {
+ if (copylen < xfer)
+ xfer = copylen;
+ error = vn_lock(invp, LK_SHARED);
+ if (error != 0)
+ goto out;
+ error = vn_rdwr(UIO_READ, invp, dat, xfer,
+ startoff, UIO_SYSSPACE, IO_NODELOCKED,
+ curthread->td_ucred, incred, &aresid,
+ curthread);
+ VOP_UNLOCK(invp, 0);
+ /*
+ * Linux considers a range that exceeds EOF to
+ * be an error, so we will too.
+ */
+ if (error == 0 && aresid > 0)
+ error = EINVAL;
+ if (error == 0) {
+ /*
+ * Skip the write for holes past the initial EOF
+ * of the output file, unless this is the last
+ * write of the output file at EOF.
+ */
+ readzeros = cantseek ? mem_iszero(dat, xfer) :
+ false;
+ if (!cantseek || *outoffp < va.va_size ||
+ xfer == len || !readzeros)
+ error = vn_write_outvp(outvp, dat,
+ *outoffp, xfer, blksize,
+ readzeros && xfer == len &&
+ *outoffp >= va.va_size, outcred);
+ if (error == 0) {
+ *inoffp += xfer;
+ startoff += xfer;
+ *outoffp += xfer;
+ copylen -= xfer;
+ len -= xfer;
+ }
+ }
+ xfer = blksize;
+ }
+ }
+out:
+ *lenp = savlen - len;
+ free(dat, M_TEMP);
+ return (error);
+}
Index: head/sys/kern/vnode_if.src
===================================================================
--- head/sys/kern/vnode_if.src
+++ head/sys/kern/vnode_if.src
@@ -718,6 +718,22 @@
};
+%% copy_file_range invp U U U
+%% copy_file_range outvp U U U
+
+vop_copy_file_range {
+ IN struct vnode *invp;
+ INOUT off_t *inoffp;
+ IN struct vnode *outvp;
+ INOUT off_t *outoffp;
+ INOUT size_t *lenp;
+ IN unsigned int flags;
+ IN struct ucred *incred;
+ IN struct ucred *outcred;
+ IN struct thread *fsizetd;
+};
+
+
# The VOPs below are spares at the end of the table to allow new VOPs to be
# added in stable branches without breaking the KBI. New VOPs in HEAD should
# be added above these spares. When merging a new VOP to a stable branch,
Index: head/sys/sys/syscallsubr.h
===================================================================
--- head/sys/sys/syscallsubr.h
+++ head/sys/sys/syscallsubr.h
@@ -94,6 +94,8 @@
int kern_close(struct thread *td, int fd);
int kern_connectat(struct thread *td, int dirfd, int fd,
struct sockaddr *sa);
+int kern_copy_file_range(struct thread *td, int infd, off_t *inoffp,
+ int outfd, off_t *outoffp, size_t len, unsigned int flags);
int kern_cpuset_getaffinity(struct thread *td, cpulevel_t level,
cpuwhich_t which, id_t id, size_t cpusetsize, cpuset_t *maskp);
int kern_cpuset_setaffinity(struct thread *td, cpulevel_t level,
Index: head/sys/sys/vnode.h
===================================================================
--- head/sys/sys/vnode.h
+++ head/sys/sys/vnode.h
@@ -667,9 +667,17 @@
struct ucred *cred);
int vn_close(struct vnode *vp,
int flags, struct ucred *file_cred, struct thread *td);
+int vn_copy_file_range(struct vnode *invp, off_t *inoffp,
+ struct vnode *outvp, off_t *outoffp, size_t *lenp,
+ unsigned int flags, struct ucred *incred, struct ucred *outcred,
+ struct thread *fsize_td);
void vn_finished_write(struct mount *mp);
void vn_finished_secondary_write(struct mount *mp);
int vn_fsync_buf(struct vnode *vp, int waitfor);
+int vn_generic_copy_file_range(struct vnode *invp, off_t *inoffp,
+ struct vnode *outvp, off_t *outoffp, size_t *lenp,
+ unsigned int flags, struct ucred *incred, struct ucred *outcred,
+ struct thread *fsize_td);
int vn_isdisk(struct vnode *vp, int *errp);
int _vn_lock(struct vnode *vp, int flags, char *file, int line);
#define vn_lock(vp, flags) _vn_lock(vp, flags, __FILE__, __LINE__)
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Oct 26, 2:09 AM (6 h, 5 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
24191492
Default Alt Text
D20584.diff (17 KB)
Attached To
Mode
D20584: add a linux compatible copy_file_range(2) syscall
Attached
Detach File
Event Timeline
Log In to Comment