Index: sys/fs/fuse/fuse_internal.h =================================================================== --- sys/fs/fuse/fuse_internal.h +++ sys/fs/fuse/fuse_internal.h @@ -274,6 +274,10 @@ int fuse_internal_setattr(struct vnode *vp, struct vattr *va, struct thread *td, struct ucred *cred); +/* write */ +void fuse_internal_clear_suid_on_write(struct vnode *vp, struct ucred *cred, + struct thread *td); + /* strategy */ /* entity creation */ Index: sys/fs/fuse/fuse_internal.c =================================================================== --- sys/fs/fuse/fuse_internal.c +++ sys/fs/fuse/fuse_internal.c @@ -1054,6 +1054,9 @@ if (!fuse_libabi_geq(data, 7, 24)) fsess_set_notimpl(data->mp, FUSE_LSEEK); + if (!fuse_libabi_geq(data, 7, 28)) + fsess_set_notimpl(data->mp, FUSE_COPY_FILE_RANGE); + out: if (err) { fdata_set_dead(data); @@ -1098,6 +1101,12 @@ * FUSE_READDIRPLUS_AUTO: not yet implemented * FUSE_ASYNC_DIO: not yet implemented * FUSE_NO_OPEN_SUPPORT: not yet implemented + * FUSE_PARALLEL_DIROPS: not yet implemented + * FUSE_HANDLE_KILLPRIV: not yet implemented + * FUSE_POSIX_ACL: not yet implemented + * FUSE_ABORT_ERROR: not yet implemented + * FUSE_CACHE_SYMLINKS: not yet implemented + * FUSE_MAX_PAGES: not yet implemented */ fiii->flags = FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_WRITEBACK_CACHE; @@ -1228,6 +1237,43 @@ return err; } +/* + * FreeBSD clears the SUID and SGID bits on any write by a non-root user. + */ +void +fuse_internal_clear_suid_on_write(struct vnode *vp, struct ucred *cred, + struct thread *td) +{ + struct fuse_data *data; + struct mount *mp; + struct vattr va; + int dataflags; + + mp = vnode_mount(vp); + data = fuse_get_mpdata(mp); + dataflags = data->dataflags; + + if (dataflags & FSESS_DEFAULT_PERMISSIONS) { + if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID)) { + fuse_internal_getattr(vp, &va, cred, td); + if (va.va_mode & (S_ISUID | S_ISGID)) { + mode_t mode = va.va_mode & ~(S_ISUID | S_ISGID); + /* Clear all vattr fields except mode */ + vattr_null(&va); + va.va_mode = mode; + + /* + * Ignore fuse_internal_setattr's return value, + * because at this point the write operation has + * already succeeded and we don't want to return + * failing status for that. + */ + (void)fuse_internal_setattr(vp, &va, td, NULL); + } + } + } +} + #ifdef ZERO_PAD_INCOMPLETE_BUFS static int isbzero(void *buf, size_t len) Index: sys/fs/fuse/fuse_io.c =================================================================== --- sys/fs/fuse/fuse_io.c +++ sys/fs/fuse/fuse_io.c @@ -121,9 +121,6 @@ static int fuse_inval_buf_range(struct vnode *vp, off_t filesize, off_t start, off_t end); -static void -fuse_io_clear_suid_on_write(struct vnode *vp, struct ucred *cred, - struct thread *td); static int fuse_read_directbackend(struct vnode *vp, struct uio *uio, struct ucred *cred, struct fuse_filehandle *fufh); @@ -190,43 +187,6 @@ return (0); } -/* - * FreeBSD clears the SUID and SGID bits on any write by a non-root user. - */ -static void -fuse_io_clear_suid_on_write(struct vnode *vp, struct ucred *cred, - struct thread *td) -{ - struct fuse_data *data; - struct mount *mp; - struct vattr va; - int dataflags; - - mp = vnode_mount(vp); - data = fuse_get_mpdata(mp); - dataflags = data->dataflags; - - if (dataflags & FSESS_DEFAULT_PERMISSIONS) { - if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID)) { - fuse_internal_getattr(vp, &va, cred, td); - if (va.va_mode & (S_ISUID | S_ISGID)) { - mode_t mode = va.va_mode & ~(S_ISUID | S_ISGID); - /* Clear all vattr fields except mode */ - vattr_null(&va); - va.va_mode = mode; - - /* - * Ignore fuse_internal_setattr's return value, - * because at this point the write operation has - * already succeeded and we don't want to return - * failing status for that. - */ - (void)fuse_internal_setattr(vp, &va, td, NULL); - } - } - } -} - SDT_PROBE_DEFINE5(fusefs, , io, io_dispatch, "struct vnode*", "struct uio*", "int", "struct ucred*", "struct fuse_filehandle*"); SDT_PROBE_DEFINE4(fusefs, , io, io_dispatch_filehandles_closed, "struct vnode*", @@ -318,7 +278,7 @@ err = fuse_write_biobackend(vp, uio, cred, fufh, ioflag, pid); } - fuse_io_clear_suid_on_write(vp, cred, uio->uio_td); + fuse_internal_clear_suid_on_write(vp, cred, uio->uio_td); break; default: panic("uninterpreted mode passed to fuse_io_dispatch"); Index: sys/fs/fuse/fuse_ipc.c =================================================================== --- sys/fs/fuse/fuse_ipc.c +++ sys/fs/fuse/fuse_ipc.c @@ -855,6 +855,10 @@ err = (blen == sizeof(struct fuse_lseek_out)) ? 0 : EINVAL; break; + case FUSE_COPY_FILE_RANGE: + err = (blen == sizeof(struct fuse_write_out)) ? 0 : EINVAL; + break; + default: panic("FUSE: opcodes out of sync (%d)\n", opcode); } Index: sys/fs/fuse/fuse_kernel.h =================================================================== --- sys/fs/fuse/fuse_kernel.h +++ sys/fs/fuse/fuse_kernel.h @@ -1,4 +1,6 @@ -/*-- +/*- + * SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) + * * This file defines the kernel interface of FUSE * Copyright (C) 2001-2008 Miklos Szeredi * @@ -105,6 +107,22 @@ * * 7.24 * - add FUSE_LSEEK for SEEK_HOLE and SEEK_DATA support + * + * 7.25 + * - add FUSE_PARALLEL_DIROPS + * + * 7.26 + * - add FUSE_HANDLE_KILLPRIV + * - add FUSE_POSIX_ACL + * + * 7.27 + * - add FUSE_ABORT_ERROR + * + * 7.28 + * - add FUSE_COPY_FILE_RANGE + * - add FOPEN_CACHE_DIR + * - add FUSE_MAX_PAGES, add max_pages to init_out + * - add FUSE_CACHE_SYMLINKS */ #ifndef _FUSE_FUSE_KERNEL_H @@ -120,7 +138,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 24 +#define FUSE_KERNEL_MINOR_VERSION 28 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -188,10 +206,12 @@ * FOPEN_DIRECT_IO: bypass page cache for this open file * FOPEN_KEEP_CACHE: don't invalidate the data cache on open * FOPEN_NONSEEKABLE: the file is not seekable + * FOPEN_CACHE_DIR: allow caching this directory */ #define FOPEN_DIRECT_IO (1 << 0) #define FOPEN_KEEP_CACHE (1 << 1) #define FOPEN_NONSEEKABLE (1 << 2) +#define FOPEN_CACHE_DIR (1 << 3) /** * INIT request/reply flags @@ -214,6 +234,12 @@ * FUSE_ASYNC_DIO: asynchronous direct I/O submission * FUSE_WRITEBACK_CACHE: use writeback cache for buffered writes * FUSE_NO_OPEN_SUPPORT: kernel supports zero-message opens + * FUSE_PARALLEL_DIROPS: allow parallel lookups and readdir + * FUSE_HANDLE_KILLPRIV: fs handles killing suid/sgid/cap on write/chown/trunc + * FUSE_POSIX_ACL: filesystem supports posix acls + * FUSE_ABORT_ERROR: reading the device after abort returns ECONNABORTED + * FUSE_MAX_PAGES: init_out.max_pages contains the max number of req pages + * FUSE_CACHE_SYMLINKS: cache READLINK responses */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -233,6 +259,12 @@ #define FUSE_ASYNC_DIO (1 << 15) #define FUSE_WRITEBACK_CACHE (1 << 16) #define FUSE_NO_OPEN_SUPPORT (1 << 17) +#define FUSE_PARALLEL_DIROPS (1 << 18) +#define FUSE_HANDLE_KILLPRIV (1 << 19) +#define FUSE_POSIX_ACL (1 << 20) +#define FUSE_ABORT_ERROR (1 << 21) +#define FUSE_MAX_PAGES (1 << 22) +#define FUSE_CACHE_SYMLINKS (1 << 23) #ifdef linux /** @@ -300,54 +332,55 @@ #define FUSE_POLL_SCHEDULE_NOTIFY (1 << 0) enum fuse_opcode { - FUSE_LOOKUP = 1, - FUSE_FORGET = 2, /* no reply */ - FUSE_GETATTR = 3, - FUSE_SETATTR = 4, - FUSE_READLINK = 5, - FUSE_SYMLINK = 6, - FUSE_MKNOD = 8, - FUSE_MKDIR = 9, - FUSE_UNLINK = 10, - FUSE_RMDIR = 11, - FUSE_RENAME = 12, - FUSE_LINK = 13, - FUSE_OPEN = 14, - FUSE_READ = 15, - FUSE_WRITE = 16, - FUSE_STATFS = 17, - FUSE_RELEASE = 18, - FUSE_FSYNC = 20, - FUSE_SETXATTR = 21, - FUSE_GETXATTR = 22, - FUSE_LISTXATTR = 23, - FUSE_REMOVEXATTR = 24, - FUSE_FLUSH = 25, - FUSE_INIT = 26, - FUSE_OPENDIR = 27, - FUSE_READDIR = 28, - FUSE_RELEASEDIR = 29, - FUSE_FSYNCDIR = 30, - FUSE_GETLK = 31, - FUSE_SETLK = 32, - FUSE_SETLKW = 33, - FUSE_ACCESS = 34, - FUSE_CREATE = 35, - FUSE_INTERRUPT = 36, - FUSE_BMAP = 37, - FUSE_DESTROY = 38, - FUSE_IOCTL = 39, - FUSE_POLL = 40, - FUSE_NOTIFY_REPLY = 41, - FUSE_BATCH_FORGET = 42, - FUSE_FALLOCATE = 43, - FUSE_READDIRPLUS = 44, - FUSE_RENAME2 = 45, - FUSE_LSEEK = 46, + FUSE_LOOKUP = 1, + FUSE_FORGET = 2, /* no reply */ + FUSE_GETATTR = 3, + FUSE_SETATTR = 4, + FUSE_READLINK = 5, + FUSE_SYMLINK = 6, + FUSE_MKNOD = 8, + FUSE_MKDIR = 9, + FUSE_UNLINK = 10, + FUSE_RMDIR = 11, + FUSE_RENAME = 12, + FUSE_LINK = 13, + FUSE_OPEN = 14, + FUSE_READ = 15, + FUSE_WRITE = 16, + FUSE_STATFS = 17, + FUSE_RELEASE = 18, + FUSE_FSYNC = 20, + FUSE_SETXATTR = 21, + FUSE_GETXATTR = 22, + FUSE_LISTXATTR = 23, + FUSE_REMOVEXATTR = 24, + FUSE_FLUSH = 25, + FUSE_INIT = 26, + FUSE_OPENDIR = 27, + FUSE_READDIR = 28, + FUSE_RELEASEDIR = 29, + FUSE_FSYNCDIR = 30, + FUSE_GETLK = 31, + FUSE_SETLK = 32, + FUSE_SETLKW = 33, + FUSE_ACCESS = 34, + FUSE_CREATE = 35, + FUSE_INTERRUPT = 36, + FUSE_BMAP = 37, + FUSE_DESTROY = 38, + FUSE_IOCTL = 39, + FUSE_POLL = 40, + FUSE_NOTIFY_REPLY = 41, + FUSE_BATCH_FORGET = 42, + FUSE_FALLOCATE = 43, + FUSE_READDIRPLUS = 44, + FUSE_RENAME2 = 45, + FUSE_LSEEK = 46, + FUSE_COPY_FILE_RANGE = 47, #ifdef linux /* CUSE specific operations */ - CUSE_INIT = 4096, + CUSE_INIT = 4096, #endif /* linux */ }; @@ -585,7 +618,9 @@ uint16_t congestion_threshold; uint32_t max_write; uint32_t time_gran; - uint32_t unused[9]; + uint16_t max_pages; + uint16_t padding; + uint32_t unused[8]; }; #ifdef linux @@ -766,4 +801,14 @@ uint64_t offset; }; +struct fuse_copy_file_range_in { + uint64_t fh_in; + uint64_t off_in; + uint64_t nodeid_out; + uint64_t fh_out; + uint64_t off_out; + uint64_t len; + uint64_t flags; +}; + #endif /* _FUSE_FUSE_KERNEL_H */ Index: sys/fs/fuse/fuse_vnops.c =================================================================== --- sys/fs/fuse/fuse_vnops.c +++ sys/fs/fuse/fuse_vnops.c @@ -130,6 +130,7 @@ static vop_bmap_t fuse_vnop_bmap; static vop_close_t fuse_fifo_close; static vop_close_t fuse_vnop_close; +static vop_copy_file_range_t fuse_vnop_copy_file_range; static vop_create_t fuse_vnop_create; static vop_deleteextattr_t fuse_vnop_deleteextattr; static vop_fdatasync_t fuse_vnop_fdatasync; @@ -185,6 +186,7 @@ .vop_advlock = fuse_vnop_advlock, .vop_bmap = fuse_vnop_bmap, .vop_close = fuse_vnop_close, + .vop_copy_file_range = fuse_vnop_copy_file_range, .vop_create = fuse_vnop_create, .vop_deleteextattr = fuse_vnop_deleteextattr, .vop_fsync = fuse_vnop_fsync, @@ -608,6 +610,126 @@ return err; } +/* + struct vop_copy_file_range_args { + struct vop_generic_args a_gen; + struct vnode *a_invp; + off_t *a_inoffp; + struct vnode *a_outvp; + off_t *a_outoffp; + size_t *a_lenp; + unsigned int a_flags; + struct ucred *a_incred; + struct ucred *a_outcred; + struct thread *a_fsizetd; +} + */ +static int +fuse_vnop_copy_file_range(struct vop_copy_file_range_args *ap) +{ + struct vnode *invp = ap->a_invp; + struct vnode *outvp = ap->a_outvp; + struct mount *mp = vnode_mount(invp); + struct fuse_dispatcher fdi; + struct fuse_filehandle *infufh, *outfufh; + struct fuse_copy_file_range_in *fcfri; + struct ucred *incred = ap->a_incred; + struct ucred *outcred = ap->a_outcred; + struct fuse_write_out *fwo; + struct thread *td; + struct uio io; + pid_t pid; + int err; + + if (mp != vnode_mount(outvp)) + goto fallback; + + if (incred->cr_uid != outcred->cr_uid) + goto fallback; + + if (incred->cr_groups[0] != outcred->cr_groups[0]) + goto fallback; + + if (!fsess_maybe_impl(mp, FUSE_COPY_FILE_RANGE)) + goto fallback; + + if (ap->a_fsizetd == NULL) + td = curthread; + else + td = ap->a_fsizetd; + pid = td->td_proc->p_pid; + + err = fuse_filehandle_getrw(invp, FREAD, &infufh, incred, pid); + if (err) + return (err); + + err = fuse_filehandle_getrw(outvp, FWRITE, &outfufh, outcred, pid); + if (err) + return (err); + + /* Lock both vnodes, avoiding risk of deadlock. */ + do { + err = vn_lock(outvp, LK_EXCLUSIVE); + if (invp == outvp) + break; + if (err == 0) { + err = vn_lock(invp, LK_SHARED | LK_NOWAIT); + if (err == 0) + break; + VOP_UNLOCK(outvp); + err = vn_lock(invp, LK_SHARED); + if (err == 0) + VOP_UNLOCK(invp); + } + } while (err == 0); + if (err != 0) + return (err); + + io.uio_offset = *ap->a_outoffp; + io.uio_resid = *ap->a_lenp; + if (ap->a_fsizetd) { + err = vn_rlimit_fsize(outvp, &io, ap->a_fsizetd); + if (err) + goto unlock; + } + + fdisp_init(&fdi, sizeof(*fcfri)); + fdisp_make_vp(&fdi, FUSE_COPY_FILE_RANGE, invp, td, incred); + fcfri = fdi.indata; + fcfri->fh_in = infufh->fh_id; + fcfri->off_in = *ap->a_inoffp; + fcfri->nodeid_out = VTOI(outvp); + fcfri->fh_out = outfufh->fh_id; + fcfri->off_out = *ap->a_outoffp; + fcfri->len = *ap->a_lenp; + fcfri->flags = 0; + + err = fdisp_wait_answ(&fdi); + if (err == 0) { + fwo = fdi.answ; + *ap->a_lenp = fwo->size; + *ap->a_inoffp += fwo->size; + *ap->a_outoffp += fwo->size; + fuse_internal_clear_suid_on_write(outvp, outcred, td); + } + fdisp_destroy(&fdi); + +unlock: + if (invp != outvp) + VOP_UNLOCK(invp); + VOP_UNLOCK(outvp); + + if (err == ENOSYS) { + fsess_set_notimpl(mp, FUSE_COPY_FILE_RANGE); +fallback: + err = vn_generic_copy_file_range(ap->a_invp, ap->a_inoffp, + ap->a_outvp, ap->a_outoffp, ap->a_lenp, ap->a_flags, + ap->a_incred, ap->a_outcred, ap->a_fsizetd); + } + + return (err); +} + static void fdisp_make_mknod_for_fallback( struct fuse_dispatcher *fdip, Index: tests/sys/fs/fusefs/Makefile =================================================================== --- tests/sys/fs/fusefs/Makefile +++ tests/sys/fs/fusefs/Makefile @@ -13,6 +13,7 @@ GTESTS+= allow_other GTESTS+= bmap GTESTS+= cache +GTESTS+= copy_file_range GTESTS+= create GTESTS+= default_permissions GTESTS+= default_permissions_privileged Index: tests/sys/fs/fusefs/copy_file_range.cc =================================================================== --- /dev/null +++ tests/sys/fs/fusefs/copy_file_range.cc @@ -0,0 +1,401 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2020 Alan Somers + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +extern "C" { +#include +#include +#include + +#include +#include +#include +} + +#include "mockfs.hh" +#include "utils.hh" + +using namespace testing; + +class CopyFileRange: public FuseTest { +public: +static sig_atomic_t s_sigxfsz; + +void SetUp() { + s_sigxfsz = 0; + FuseTest::SetUp(); +} + +void TearDown() { + struct sigaction sa; + + bzero(&sa, sizeof(sa)); + sa.sa_handler = SIG_DFL; + sigaction(SIGXFSZ, &sa, NULL); + + FuseTest::TearDown(); +} + +void expect_maybe_lseek(uint64_t ino) +{ + EXPECT_CALL(*m_mock, process( + ResultOf([=](auto in) { + return (in.header.opcode == FUSE_LSEEK && + in.header.nodeid == ino); + }, Eq(true)), + _) + ).Times(AtMost(1)) + .WillRepeatedly(Invoke(ReturnErrno(ENOSYS))); +} + +void expect_open(uint64_t ino, uint32_t flags, int times, uint64_t fh) +{ + EXPECT_CALL(*m_mock, process( + ResultOf([=](auto in) { + return (in.header.opcode == FUSE_OPEN && + in.header.nodeid == ino); + }, Eq(true)), + _) + ).Times(times) + .WillRepeatedly(Invoke( + ReturnImmediate([=](auto in __unused, auto& out) { + out.header.len = sizeof(out.header); + SET_OUT_HEADER_LEN(out, open); + out.body.open.fh = fh; + out.body.open.open_flags = flags; + }))); +} + +void expect_write(uint64_t ino, uint64_t offset, uint64_t isize, + uint64_t osize, const void *contents) +{ + EXPECT_CALL(*m_mock, process( + ResultOf([=](auto in) { + const char *buf = (const char*)in.body.bytes + + sizeof(struct fuse_write_in); + + return (in.header.opcode == FUSE_WRITE && + in.header.nodeid == ino && + in.body.write.offset == offset && + in.body.write.size == isize && + 0 == bcmp(buf, contents, isize)); + }, Eq(true)), + _) + ).WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { + SET_OUT_HEADER_LEN(out, write); + out.body.write.size = osize; + }))); +} + +}; + +sig_atomic_t CopyFileRange::s_sigxfsz = 0; + +void sigxfsz_handler(int __unused sig) { + CopyFileRange::s_sigxfsz = 1; +} + + +class CopyFileRange_7_27: public CopyFileRange { +public: +virtual void SetUp() { + m_kernel_minor_version = 27; + CopyFileRange::SetUp(); +} +}; + +TEST_F(CopyFileRange, eio) +{ + const char FULLPATH1[] = "mountpoint/src.txt"; + const char RELPATH1[] = "src.txt"; + const char FULLPATH2[] = "mountpoint/dst.txt"; + const char RELPATH2[] = "dst.txt"; + const uint64_t ino1 = 42; + const uint64_t ino2 = 43; + const uint64_t fh1 = 0xdeadbeef1a7ebabe; + const uint64_t fh2 = 0xdeadc0de88c0ffee; + off_t fsize1 = 1 << 20; /* 1 MiB */ + off_t fsize2 = 1 << 19; /* 512 KiB */ + off_t start1 = 1 << 18; + off_t start2 = 3 << 17; + ssize_t len = 65536; + int fd1, fd2; + + expect_lookup(RELPATH1, ino1, S_IFREG | 0644, fsize1, 1); + expect_lookup(RELPATH2, ino2, S_IFREG | 0644, fsize2, 1); + expect_open(ino1, 0, 1, fh1); + expect_open(ino2, 0, 1, fh2); + EXPECT_CALL(*m_mock, process( + ResultOf([=](auto in) { + return (in.header.opcode == FUSE_COPY_FILE_RANGE && + in.header.nodeid == ino1 && + in.body.copy_file_range.fh_in == fh1 && + (off_t)in.body.copy_file_range.off_in == start1 && + in.body.copy_file_range.nodeid_out == ino2 && + in.body.copy_file_range.fh_out == fh2 && + (off_t)in.body.copy_file_range.off_out == start2 && + in.body.copy_file_range.len == (size_t)len && + in.body.copy_file_range.flags == 0); + }, Eq(true)), + _) + ).WillOnce(Invoke(ReturnErrno(EIO))); + + fd1 = open(FULLPATH1, O_RDONLY); + fd2 = open(FULLPATH2, O_WRONLY); + ASSERT_EQ(-1, copy_file_range(fd1, &start1, fd2, &start2, len, 0)); + EXPECT_EQ(EIO, errno); +} + +/* + * If the server doesn't support FUSE_COPY_FILE_RANGE, the kernel should + * fallback to a read/write based implementation. + */ +TEST_F(CopyFileRange, fallback) +{ + const char FULLPATH1[] = "mountpoint/src.txt"; + const char RELPATH1[] = "src.txt"; + const char FULLPATH2[] = "mountpoint/dst.txt"; + const char RELPATH2[] = "dst.txt"; + const uint64_t ino1 = 42; + const uint64_t ino2 = 43; + const uint64_t fh1 = 0xdeadbeef1a7ebabe; + const uint64_t fh2 = 0xdeadc0de88c0ffee; + off_t fsize2 = 0; + off_t start1 = 0; + off_t start2 = 0; + const char *contents = "Hello, world!"; + ssize_t len; + int fd1, fd2; + + len = strlen(contents); + + /* + * Ensure that we read to EOF, just so the buffer cache's read size is + * predictable. + */ + expect_lookup(RELPATH1, ino1, S_IFREG | 0644, start1 + len, 1); + expect_lookup(RELPATH2, ino2, S_IFREG | 0644, fsize2, 1); + expect_open(ino1, 0, 1, fh1); + expect_open(ino2, 0, 1, fh2); + EXPECT_CALL(*m_mock, process( + ResultOf([=](auto in) { + return (in.header.opcode == FUSE_COPY_FILE_RANGE && + in.header.nodeid == ino1 && + in.body.copy_file_range.fh_in == fh1 && + (off_t)in.body.copy_file_range.off_in == start1 && + in.body.copy_file_range.nodeid_out == ino2 && + in.body.copy_file_range.fh_out == fh2 && + (off_t)in.body.copy_file_range.off_out == start2 && + in.body.copy_file_range.len == (size_t)len && + in.body.copy_file_range.flags == 0); + }, Eq(true)), + _) + ).WillOnce(Invoke(ReturnErrno(ENOSYS))); + expect_maybe_lseek(ino1); + expect_read(ino1, start1, len, len, contents, 0); + expect_write(ino2, start2, len, len, contents); + + fd1 = open(FULLPATH1, O_RDONLY); + ASSERT_GE(fd1, 0); + fd2 = open(FULLPATH2, O_WRONLY); + ASSERT_GE(fd2, 0); + ASSERT_EQ(len, copy_file_range(fd1, &start1, fd2, &start2, len, 0)); +} + +/* fusefs should respect RLIMIT_FSIZE */ +TEST_F(CopyFileRange, rlimit_fsize) +{ + const char FULLPATH1[] = "mountpoint/src.txt"; + const char RELPATH1[] = "src.txt"; + const char FULLPATH2[] = "mountpoint/dst.txt"; + const char RELPATH2[] = "dst.txt"; + struct rlimit rl; + const uint64_t ino1 = 42; + const uint64_t ino2 = 43; + const uint64_t fh1 = 0xdeadbeef1a7ebabe; + const uint64_t fh2 = 0xdeadc0de88c0ffee; + off_t fsize1 = 1 << 20; /* 1 MiB */ + off_t fsize2 = 1 << 19; /* 512 KiB */ + off_t start1 = 1 << 18; + off_t start2 = fsize2; + ssize_t len = 65536; + int fd1, fd2; + + expect_lookup(RELPATH1, ino1, S_IFREG | 0644, fsize1, 1); + expect_lookup(RELPATH2, ino2, S_IFREG | 0644, fsize2, 1); + expect_open(ino1, 0, 1, fh1); + expect_open(ino2, 0, 1, fh2); + EXPECT_CALL(*m_mock, process( + ResultOf([=](auto in) { + return (in.header.opcode == FUSE_COPY_FILE_RANGE); + }, Eq(true)), + _) + ).Times(0); + + rl.rlim_cur = fsize2; + rl.rlim_max = 10 * fsize2; + ASSERT_EQ(0, setrlimit(RLIMIT_FSIZE, &rl)) << strerror(errno); + ASSERT_NE(SIG_ERR, signal(SIGXFSZ, sigxfsz_handler)) << strerror(errno); + + fd1 = open(FULLPATH1, O_RDONLY); + fd2 = open(FULLPATH2, O_WRONLY); + ASSERT_EQ(-1, copy_file_range(fd1, &start1, fd2, &start2, len, 0)); + EXPECT_EQ(EFBIG, errno); + EXPECT_EQ(1, s_sigxfsz); +} + +TEST_F(CopyFileRange, ok) +{ + const char FULLPATH1[] = "mountpoint/src.txt"; + const char RELPATH1[] = "src.txt"; + const char FULLPATH2[] = "mountpoint/dst.txt"; + const char RELPATH2[] = "dst.txt"; + const uint64_t ino1 = 42; + const uint64_t ino2 = 43; + const uint64_t fh1 = 0xdeadbeef1a7ebabe; + const uint64_t fh2 = 0xdeadc0de88c0ffee; + off_t fsize1 = 1 << 20; /* 1 MiB */ + off_t fsize2 = 1 << 19; /* 512 KiB */ + off_t start1 = 1 << 18; + off_t start2 = 3 << 17; + ssize_t len = 65536; + int fd1, fd2; + + expect_lookup(RELPATH1, ino1, S_IFREG | 0644, fsize1, 1); + expect_lookup(RELPATH2, ino2, S_IFREG | 0644, fsize2, 1); + expect_open(ino1, 0, 1, fh1); + expect_open(ino2, 0, 1, fh2); + EXPECT_CALL(*m_mock, process( + ResultOf([=](auto in) { + return (in.header.opcode == FUSE_COPY_FILE_RANGE && + in.header.nodeid == ino1 && + in.body.copy_file_range.fh_in == fh1 && + (off_t)in.body.copy_file_range.off_in == start1 && + in.body.copy_file_range.nodeid_out == ino2 && + in.body.copy_file_range.fh_out == fh2 && + (off_t)in.body.copy_file_range.off_out == start2 && + in.body.copy_file_range.len == (size_t)len && + in.body.copy_file_range.flags == 0); + }, Eq(true)), + _) + ).WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { + SET_OUT_HEADER_LEN(out, write); + out.body.write.size = len; + }))); + + fd1 = open(FULLPATH1, O_RDONLY); + fd2 = open(FULLPATH2, O_WRONLY); + ASSERT_EQ(len, copy_file_range(fd1, &start1, fd2, &start2, len, 0)); +} + +/* + * copy_file_range can make copies within a single file, as long as the ranges + * don't overlap. + * */ +TEST_F(CopyFileRange, same_file) +{ + const char FULLPATH[] = "mountpoint/src.txt"; + const char RELPATH[] = "src.txt"; + const uint64_t ino = 4; + const uint64_t fh = 0xdeadbeefa7ebabe; + off_t fsize = 1 << 20; /* 1 MiB */ + off_t off_in = 1 << 18; + off_t off_out = 3 << 17; + ssize_t len = 65536; + int fd; + + expect_lookup(RELPATH, ino, S_IFREG | 0644, fsize, 1); + expect_open(ino, 0, 1, fh); + EXPECT_CALL(*m_mock, process( + ResultOf([=](auto in) { + return (in.header.opcode == FUSE_COPY_FILE_RANGE && + in.header.nodeid == ino && + in.body.copy_file_range.fh_in == fh && + (off_t)in.body.copy_file_range.off_in == off_in && + in.body.copy_file_range.nodeid_out == ino && + in.body.copy_file_range.fh_out == fh && + (off_t)in.body.copy_file_range.off_out == off_out && + in.body.copy_file_range.len == (size_t)len && + in.body.copy_file_range.flags == 0); + }, Eq(true)), + _) + ).WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { + SET_OUT_HEADER_LEN(out, write); + out.body.write.size = len; + }))); + + fd = open(FULLPATH, O_RDWR); + ASSERT_EQ(len, copy_file_range(fd, &off_in, fd, &off_out, len, 0)); +} + +/* With older protocol versions, no FUSE_COPY_FILE_RANGE should be attempted */ +TEST_F(CopyFileRange_7_27, fallback) +{ + const char FULLPATH1[] = "mountpoint/src.txt"; + const char RELPATH1[] = "src.txt"; + const char FULLPATH2[] = "mountpoint/dst.txt"; + const char RELPATH2[] = "dst.txt"; + const uint64_t ino1 = 42; + const uint64_t ino2 = 43; + const uint64_t fh1 = 0xdeadbeef1a7ebabe; + const uint64_t fh2 = 0xdeadc0de88c0ffee; + off_t fsize2 = 0; + off_t start1 = 0; + off_t start2 = 0; + const char *contents = "Hello, world!"; + ssize_t len; + int fd1, fd2; + + len = strlen(contents); + + /* + * Ensure that we read to EOF, just so the buffer cache's read size is + * predictable. + */ + expect_lookup(RELPATH1, ino1, S_IFREG | 0644, start1 + len, 1); + expect_lookup(RELPATH2, ino2, S_IFREG | 0644, fsize2, 1); + expect_open(ino1, 0, 1, fh1); + expect_open(ino2, 0, 1, fh2); + EXPECT_CALL(*m_mock, process( + ResultOf([=](auto in) { + return (in.header.opcode == FUSE_COPY_FILE_RANGE); + }, Eq(true)), + _) + ).Times(0); + expect_maybe_lseek(ino1); + expect_read(ino1, start1, len, len, contents, 0); + expect_write(ino2, start2, len, len, contents); + + fd1 = open(FULLPATH1, O_RDONLY); + ASSERT_GE(fd1, 0); + fd2 = open(FULLPATH2, O_WRONLY); + ASSERT_GE(fd2, 0); + ASSERT_EQ(len, copy_file_range(fd1, &start1, fd2, &start2, len, 0)); +} + + Index: tests/sys/fs/fusefs/default_permissions.cc =================================================================== --- tests/sys/fs/fusefs/default_permissions.cc +++ tests/sys/fs/fusefs/default_permissions.cc @@ -109,6 +109,25 @@ }))); } +void expect_copy_file_range(uint64_t ino_in, uint64_t off_in, uint64_t ino_out, + uint64_t off_out, uint64_t len) +{ + EXPECT_CALL(*m_mock, process( + ResultOf([=](auto in) { + return (in.header.opcode == FUSE_COPY_FILE_RANGE && + in.header.nodeid == ino_in && + in.body.copy_file_range.off_in == off_in && + in.body.copy_file_range.nodeid_out == ino_out && + in.body.copy_file_range.off_out == off_out && + in.body.copy_file_range.len == len); + }, Eq(true)), + _) + ).WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { + SET_OUT_HEADER_LEN(out, write); + out.body.write.size = len; + }))); +} + void expect_getattr(uint64_t ino, mode_t mode, uint64_t attr_valid, int times, uid_t uid = 0, gid_t gid = 0) { @@ -141,6 +160,7 @@ class Access: public DefaultPermissions {}; class Chown: public DefaultPermissions {}; class Chgrp: public DefaultPermissions {}; +class CopyFileRange: public DefaultPermissions {}; class Lookup: public DefaultPermissions {}; class Open: public DefaultPermissions {}; class Setattr: public DefaultPermissions {}; @@ -477,6 +497,94 @@ EXPECT_EQ(0, chown(FULLPATH, -1, newgid)) << strerror(errno); } +/* A write by a non-owner should clear a file's SGID bit */ +TEST_F(CopyFileRange, clear_guid) +{ + const char FULLPATH_IN[] = "mountpoint/in.txt"; + const char RELPATH_IN[] = "in.txt"; + const char FULLPATH_OUT[] = "mountpoint/out.txt"; + const char RELPATH_OUT[] = "out.txt"; + struct stat sb; + uint64_t ino_in = 42; + uint64_t ino_out = 43; + mode_t oldmode = 02777; + mode_t newmode = 0777; + off_t fsize = 16; + off_t off_in = 0; + off_t off_out = 8; + off_t len = 8; + int fd_in, fd_out; + + expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0755, UINT64_MAX, 1); + FuseTest::expect_lookup(RELPATH_IN, ino_in, S_IFREG | oldmode, fsize, 1, + UINT64_MAX, 0, 0); + expect_open(ino_in, 0, 1); + FuseTest::expect_lookup(RELPATH_OUT, ino_out, S_IFREG | oldmode, fsize, + 1, UINT64_MAX, 0, 0); + expect_open(ino_out, 0, 1); + expect_copy_file_range(ino_in, off_in, ino_out, off_out, len); + expect_chmod(ino_out, newmode, fsize); + + fd_in = open(FULLPATH_IN, O_RDONLY); + ASSERT_LE(0, fd_in) << strerror(errno); + fd_out = open(FULLPATH_OUT, O_WRONLY); + ASSERT_LE(0, fd_out) << strerror(errno); + ASSERT_EQ(len, + copy_file_range(fd_in, &off_in, fd_out, &off_out, len, 0)) + << strerror(errno); + ASSERT_EQ(0, fstat(fd_out, &sb)) << strerror(errno); + EXPECT_EQ(S_IFREG | newmode, sb.st_mode); + ASSERT_EQ(0, fstat(fd_in, &sb)) << strerror(errno); + EXPECT_EQ(S_IFREG | oldmode, sb.st_mode); + + leak(fd_in); + leak(fd_out); +} + +/* A write by a non-owner should clear a file's SUID bit */ +TEST_F(CopyFileRange, clear_suid) +{ + const char FULLPATH_IN[] = "mountpoint/in.txt"; + const char RELPATH_IN[] = "in.txt"; + const char FULLPATH_OUT[] = "mountpoint/out.txt"; + const char RELPATH_OUT[] = "out.txt"; + struct stat sb; + uint64_t ino_in = 42; + uint64_t ino_out = 43; + mode_t oldmode = 04777; + mode_t newmode = 0777; + off_t fsize = 16; + off_t off_in = 0; + off_t off_out = 8; + off_t len = 8; + int fd_in, fd_out; + + expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0755, UINT64_MAX, 1); + FuseTest::expect_lookup(RELPATH_IN, ino_in, S_IFREG | oldmode, fsize, 1, + UINT64_MAX, 0, 0); + expect_open(ino_in, 0, 1); + FuseTest::expect_lookup(RELPATH_OUT, ino_out, S_IFREG | oldmode, fsize, + 1, UINT64_MAX, 0, 0); + expect_open(ino_out, 0, 1); + expect_copy_file_range(ino_in, off_in, ino_out, off_out, len); + expect_chmod(ino_out, newmode, fsize); + + fd_in = open(FULLPATH_IN, O_RDONLY); + ASSERT_LE(0, fd_in) << strerror(errno); + fd_out = open(FULLPATH_OUT, O_WRONLY); + ASSERT_LE(0, fd_out) << strerror(errno); + ASSERT_EQ(len, + copy_file_range(fd_in, &off_in, fd_out, &off_out, len, 0)) + << strerror(errno); + ASSERT_EQ(0, fstat(fd_out, &sb)) << strerror(errno); + EXPECT_EQ(S_IFREG | newmode, sb.st_mode); + ASSERT_EQ(0, fstat(fd_in, &sb)) << strerror(errno); + EXPECT_EQ(S_IFREG | oldmode, sb.st_mode); + + leak(fd_in); + leak(fd_out); +} + TEST_F(Create, ok) { const char FULLPATH[] = "mountpoint/some_file.txt"; @@ -1311,5 +1419,3 @@ ASSERT_EQ(1, write(fd, wbuf, sizeof(wbuf))) << strerror(errno); leak(fd); } - - Index: tests/sys/fs/fusefs/mockfs.hh =================================================================== --- tests/sys/fs/fusefs/mockfs.hh +++ tests/sys/fs/fusefs/mockfs.hh @@ -157,6 +157,7 @@ uint8_t bytes[ max_max_write + 0x1000 - sizeof(struct fuse_in_header) ]; + fuse_copy_file_range_in copy_file_range; fuse_create_in create; fuse_flush_in flush; fuse_fsync_in fsync; Index: tests/sys/fs/fusefs/mockfs.cc =================================================================== --- tests/sys/fs/fusefs/mockfs.cc +++ tests/sys/fs/fusefs/mockfs.cc @@ -62,7 +62,7 @@ const char* opcode2opname(uint32_t opcode) { - const int NUM_OPS = 47; + const int NUM_OPS = 48; const char* table[NUM_OPS] = { "Unknown (opcode 0)", "LOOKUP", @@ -111,6 +111,7 @@ "READDIRPLUS", "RENAME2", "LSEEK", + "COPY_FILE_RANGE", }; if (opcode >= NUM_OPS) return ("Unknown (opcode > max)"); @@ -183,6 +184,20 @@ printf(" block=%" PRIx64 " blocksize=%#x", in.body.bmap.block, in.body.bmap.blocksize); break; + case FUSE_COPY_FILE_RANGE: + printf(" off_in=%" PRIu64 " ino_out=%" PRIu64 + " off_out=%" PRIu64 " size=%" PRIu64, + in.body.copy_file_range.off_in, + in.body.copy_file_range.nodeid_out, + in.body.copy_file_range.off_out, + in.body.copy_file_range.len); + if (verbosity > 1) + printf(" fh_in=%" PRIu64 " fh_out=%" PRIu64 + " flags=%" PRIx64, + in.body.copy_file_range.fh_in, + in.body.copy_file_range.fh_out, + in.body.copy_file_range.flags); + break; case FUSE_CREATE: if (m_kernel_minor_version >= 12) name = (const char*)in.body.bytes + @@ -664,6 +679,11 @@ EXPECT_EQ(inlen, fih + sizeof(in.body.lseek)); EXPECT_EQ((size_t)buflen, inlen); break; + case FUSE_COPY_FILE_RANGE: + EXPECT_EQ(inlen, fih + sizeof(in.body.copy_file_range)); + EXPECT_EQ(0ul, in.body.copy_file_range.flags); + EXPECT_EQ((size_t)buflen, inlen); + break; case FUSE_NOTIFY_REPLY: case FUSE_BATCH_FORGET: case FUSE_FALLOCATE: Index: tests/sys/fs/fusefs/utils.cc =================================================================== --- tests/sys/fs/fusefs/utils.cc +++ tests/sys/fs/fusefs/utils.cc @@ -376,10 +376,10 @@ in.body.read.fh == FH && in.body.read.offset == offset && in.body.read.size == isize && - flags == -1 ? + (flags == -1 ? (in.body.read.flags == O_RDONLY || in.body.read.flags == O_RDWR) - : in.body.read.flags == (uint32_t)flags); + : in.body.read.flags == (uint32_t)flags)); }, Eq(true)), _) ).WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { Index: tests/sys/fs/fusefs/write.cc =================================================================== --- tests/sys/fs/fusefs/write.cc +++ tests/sys/fs/fusefs/write.cc @@ -508,17 +508,14 @@ const char *INITIAL = "XXXXXXXXXX"; uint64_t ino = 42; uint64_t offset = 1; - ssize_t bufsize = strlen(CONTENTS); + ssize_t bufsize = strlen(CONTENTS) + 1; off_t orig_fsize = 10; off_t truncated_fsize = 5; - off_t final_fsize = bufsize; int fd; FuseTest::expect_lookup(RELPATH, ino, S_IFREG | 0644, orig_fsize, 1); expect_open(ino, 0, 1); expect_read(ino, 0, orig_fsize, truncated_fsize, INITIAL, O_RDWR); - expect_getattr(ino, truncated_fsize); - expect_read(ino, 0, final_fsize, final_fsize, INITIAL, O_RDWR); maybe_expect_write(ino, offset, bufsize, CONTENTS); fd = open(FULLPATH, O_RDWR);