diff --git a/sys/fs/fuse/fuse_internal.c b/sys/fs/fuse/fuse_internal.c index 2faad7cd8651..60f9a7319e00 100644 --- a/sys/fs/fuse/fuse_internal.c +++ b/sys/fs/fuse/fuse_internal.c @@ -1,1259 +1,1307 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2007-2009 Google Inc. and Amit Singh * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. * * Neither the name of Google Inc. nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Copyright (C) 2005 Csaba Henk. * All rights reserved. * * Copyright (c) 2019 The FreeBSD Foundation * * Portions of this software were developed by BFF Storage Systems, LLC under * sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "fuse.h" #include "fuse_file.h" #include "fuse_internal.h" #include "fuse_io.h" #include "fuse_ipc.h" #include "fuse_node.h" #include "fuse_file.h" SDT_PROVIDER_DECLARE(fusefs); /* * Fuse trace probe: * arg0: verbosity. Higher numbers give more verbose messages * arg1: Textual message */ SDT_PROBE_DEFINE2(fusefs, , internal, trace, "int", "char*"); #ifdef ZERO_PAD_INCOMPLETE_BUFS static int isbzero(void *buf, size_t len); #endif counter_u64_t fuse_lookup_cache_hits; counter_u64_t fuse_lookup_cache_misses; SYSCTL_COUNTER_U64(_vfs_fusefs_stats, OID_AUTO, lookup_cache_hits, CTLFLAG_RD, &fuse_lookup_cache_hits, "number of positive cache hits in lookup"); SYSCTL_COUNTER_U64(_vfs_fusefs_stats, OID_AUTO, lookup_cache_misses, CTLFLAG_RD, &fuse_lookup_cache_misses, "number of cache misses in lookup"); int fuse_internal_get_cached_vnode(struct mount* mp, ino_t ino, int flags, struct vnode **vpp) { struct bintime now; struct thread *td = curthread; uint64_t nodeid = ino; int error; *vpp = NULL; error = vfs_hash_get(mp, fuse_vnode_hash(nodeid), flags, td, vpp, fuse_vnode_cmp, &nodeid); if (error) return error; /* * Check the entry cache timeout. We have to do this within fusefs * instead of by using cache_enter_time/cache_lookup because those * routines are only intended to work with pathnames, not inodes */ if (*vpp != NULL) { getbinuptime(&now); if (bintime_cmp(&(VTOFUD(*vpp)->entry_cache_timeout), &now, >)){ counter_u64_add(fuse_lookup_cache_hits, 1); return 0; } else { /* Entry cache timeout */ counter_u64_add(fuse_lookup_cache_misses, 1); cache_purge(*vpp); vput(*vpp); *vpp = NULL; } } return 0; } SDT_PROBE_DEFINE0(fusefs, , internal, access_vadmin); /* Synchronously send a FUSE_ACCESS operation */ int fuse_internal_access(struct vnode *vp, accmode_t mode, struct thread *td, struct ucred *cred) { int err = 0; uint32_t mask = F_OK; int dataflags; int vtype; struct mount *mp; struct fuse_dispatcher fdi; struct fuse_access_in *fai; struct fuse_data *data; mp = vnode_mount(vp); vtype = vnode_vtype(vp); data = fuse_get_mpdata(mp); dataflags = data->dataflags; if (mode == 0) return 0; if (mode & VMODIFY_PERMS && vfs_isrdonly(mp)) { switch (vp->v_type) { case VDIR: /* FALLTHROUGH */ case VLNK: /* FALLTHROUGH */ case VREG: return EROFS; default: break; } } /* Unless explicitly permitted, deny everyone except the fs owner. */ if (!(dataflags & FSESS_DAEMON_CAN_SPY)) { if (fuse_match_cred(data->daemoncred, cred)) return EPERM; } if (dataflags & FSESS_DEFAULT_PERMISSIONS) { struct vattr va; fuse_internal_getattr(vp, &va, cred, td); return vaccess(vp->v_type, va.va_mode, va.va_uid, va.va_gid, mode, cred); } if (mode & VADMIN) { /* * The FUSE protocol doesn't have an equivalent of VADMIN, so * it's a bug if we ever reach this point with that bit set. */ SDT_PROBE0(fusefs, , internal, access_vadmin); } if (fsess_not_impl(mp, FUSE_ACCESS)) return 0; if ((mode & (VWRITE | VAPPEND)) != 0) mask |= W_OK; if ((mode & VREAD) != 0) mask |= R_OK; if ((mode & VEXEC) != 0) mask |= X_OK; fdisp_init(&fdi, sizeof(*fai)); fdisp_make_vp(&fdi, FUSE_ACCESS, vp, td, cred); fai = fdi.indata; fai->mask = mask; err = fdisp_wait_answ(&fdi); fdisp_destroy(&fdi); if (err == ENOSYS) { fsess_set_notimpl(mp, FUSE_ACCESS); err = 0; } return err; } /* * Cache FUSE attributes from attr, in attribute cache associated with vnode * 'vp'. Optionally, if argument 'vap' is not NULL, store a copy of the * converted attributes there as well. * * If the nominal attribute cache TTL is zero, do not cache on the 'vp' (but do * return the result to the caller). */ void fuse_internal_cache_attrs(struct vnode *vp, struct fuse_attr *attr, uint64_t attr_valid, uint32_t attr_valid_nsec, struct vattr *vap) { struct mount *mp; struct fuse_vnode_data *fvdat; struct fuse_data *data; struct vattr *vp_cache_at; mp = vnode_mount(vp); fvdat = VTOFUD(vp); data = fuse_get_mpdata(mp); ASSERT_VOP_ELOCKED(vp, "fuse_internal_cache_attrs"); fuse_validity_2_bintime(attr_valid, attr_valid_nsec, &fvdat->attr_cache_timeout); /* Fix our buffers if the filesize changed without us knowing */ if (vnode_isreg(vp) && attr->size != fvdat->cached_attrs.va_size) { (void)fuse_vnode_setsize(vp, attr->size); fvdat->cached_attrs.va_size = attr->size; } if (attr_valid > 0 || attr_valid_nsec > 0) vp_cache_at = &(fvdat->cached_attrs); else if (vap != NULL) vp_cache_at = vap; else return; vattr_null(vp_cache_at); vp_cache_at->va_fsid = mp->mnt_stat.f_fsid.val[0]; vp_cache_at->va_fileid = attr->ino; vp_cache_at->va_mode = attr->mode & ~S_IFMT; vp_cache_at->va_nlink = attr->nlink; vp_cache_at->va_uid = attr->uid; vp_cache_at->va_gid = attr->gid; vp_cache_at->va_rdev = attr->rdev; vp_cache_at->va_size = attr->size; /* XXX on i386, seconds are truncated to 32 bits */ vp_cache_at->va_atime.tv_sec = attr->atime; vp_cache_at->va_atime.tv_nsec = attr->atimensec; vp_cache_at->va_mtime.tv_sec = attr->mtime; vp_cache_at->va_mtime.tv_nsec = attr->mtimensec; vp_cache_at->va_ctime.tv_sec = attr->ctime; vp_cache_at->va_ctime.tv_nsec = attr->ctimensec; if (fuse_libabi_geq(data, 7, 9) && attr->blksize > 0) vp_cache_at->va_blocksize = attr->blksize; else vp_cache_at->va_blocksize = PAGE_SIZE; vp_cache_at->va_type = IFTOVT(attr->mode); vp_cache_at->va_bytes = attr->blocks * S_BLKSIZE; vp_cache_at->va_flags = 0; if (vap != vp_cache_at && vap != NULL) memcpy(vap, vp_cache_at, sizeof(*vap)); } /* fsync */ int fuse_internal_fsync_callback(struct fuse_ticket *tick, struct uio *uio) { if (tick->tk_aw_ohead.error == ENOSYS) { fsess_set_notimpl(tick->tk_data->mp, fticket_opcode(tick)); } return 0; } int fuse_internal_fsync(struct vnode *vp, struct thread *td, int waitfor, bool datasync) { struct fuse_fsync_in *ffsi = NULL; struct fuse_dispatcher fdi; struct fuse_filehandle *fufh; struct fuse_vnode_data *fvdat = VTOFUD(vp); struct mount *mp = vnode_mount(vp); int op = FUSE_FSYNC; int err = 0; if (fsess_not_impl(vnode_mount(vp), (vnode_vtype(vp) == VDIR ? FUSE_FSYNCDIR : FUSE_FSYNC))) { return 0; } if (vnode_isdir(vp)) op = FUSE_FSYNCDIR; if (fsess_not_impl(mp, op)) return 0; fdisp_init(&fdi, sizeof(*ffsi)); /* * fsync every open file handle for this file, because we can't be sure * which file handle the caller is really referring to. */ LIST_FOREACH(fufh, &fvdat->handles, next) { fdi.iosize = sizeof(*ffsi); if (ffsi == NULL) fdisp_make_vp(&fdi, op, vp, td, NULL); else fdisp_refresh_vp(&fdi, op, vp, td, NULL); ffsi = fdi.indata; ffsi->fh = fufh->fh_id; ffsi->fsync_flags = 0; if (datasync) ffsi->fsync_flags = 1; if (waitfor == MNT_WAIT) { err = fdisp_wait_answ(&fdi); } else { fuse_insert_callback(fdi.tick, fuse_internal_fsync_callback); fuse_insert_message(fdi.tick, false); } if (err == ENOSYS) { /* ENOSYS means "success, and don't call again" */ fsess_set_notimpl(mp, op); err = 0; break; } } fdisp_destroy(&fdi); return err; } /* Asynchronous invalidation */ SDT_PROBE_DEFINE3(fusefs, , internal, invalidate_entry, "struct vnode*", "struct fuse_notify_inval_entry_out*", "char*"); int fuse_internal_invalidate_entry(struct mount *mp, struct uio *uio) { struct fuse_notify_inval_entry_out fnieo; struct componentname cn; struct vnode *dvp, *vp; char name[PATH_MAX]; int err; if ((err = uiomove(&fnieo, sizeof(fnieo), uio)) != 0) return (err); if (fnieo.namelen >= sizeof(name)) return (EINVAL); if ((err = uiomove(name, fnieo.namelen, uio)) != 0) return (err); name[fnieo.namelen] = '\0'; /* fusefs does not cache "." or ".." entries */ if (strncmp(name, ".", sizeof(".")) == 0 || strncmp(name, "..", sizeof("..")) == 0) return (0); if (fnieo.parent == FUSE_ROOT_ID) err = VFS_ROOT(mp, LK_SHARED, &dvp); else err = fuse_internal_get_cached_vnode( mp, fnieo.parent, LK_SHARED, &dvp); SDT_PROBE3(fusefs, , internal, invalidate_entry, dvp, &fnieo, name); /* * If dvp is not in the cache, then it must've been reclaimed. And * since fuse_vnop_reclaim does a cache_purge, name's entry must've * been invalidated already. So we can safely return if dvp == NULL */ if (err != 0 || dvp == NULL) return (err); /* * XXX we can't check dvp's generation because the FUSE invalidate * entry message doesn't include it. Worse case is that we invalidate * an entry that didn't need to be invalidated. */ cn.cn_nameiop = LOOKUP; cn.cn_flags = 0; /* !MAKEENTRY means free cached entry */ cn.cn_thread = curthread; cn.cn_cred = curthread->td_ucred; cn.cn_lkflags = LK_SHARED; cn.cn_pnbuf = NULL; cn.cn_nameptr = name; cn.cn_namelen = fnieo.namelen; err = cache_lookup(dvp, &vp, &cn, NULL, NULL); MPASS(err == 0); fuse_vnode_clear_attr_cache(dvp); vput(dvp); return (0); } SDT_PROBE_DEFINE2(fusefs, , internal, invalidate_inode, "struct vnode*", "struct fuse_notify_inval_inode_out *"); int fuse_internal_invalidate_inode(struct mount *mp, struct uio *uio) { struct fuse_notify_inval_inode_out fniio; struct vnode *vp; int err; if ((err = uiomove(&fniio, sizeof(fniio), uio)) != 0) return (err); if (fniio.ino == FUSE_ROOT_ID) err = VFS_ROOT(mp, LK_EXCLUSIVE, &vp); else err = fuse_internal_get_cached_vnode(mp, fniio.ino, LK_SHARED, &vp); SDT_PROBE2(fusefs, , internal, invalidate_inode, vp, &fniio); if (err != 0 || vp == NULL) return (err); /* * XXX we can't check vp's generation because the FUSE invalidate * entry message doesn't include it. Worse case is that we invalidate * an inode that didn't need to be invalidated. */ /* * Flush and invalidate buffers if off >= 0. Technically we only need * to flush and invalidate the range of offsets [off, off + len), but * for simplicity's sake we do everything. */ if (fniio.off >= 0) fuse_io_invalbuf(vp, curthread); fuse_vnode_clear_attr_cache(vp); vput(vp); return (0); } /* mknod */ int fuse_internal_mknod(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct vattr *vap) { struct fuse_data *data; struct fuse_mknod_in fmni; size_t insize; data = fuse_get_mpdata(dvp->v_mount); fmni.mode = MAKEIMODE(vap->va_type, vap->va_mode); fmni.rdev = vap->va_rdev; if (fuse_libabi_geq(data, 7, 12)) { insize = sizeof(fmni); fmni.umask = curthread->td_proc->p_pd->pd_cmask; } else { insize = FUSE_COMPAT_MKNOD_IN_SIZE; } return (fuse_internal_newentry(dvp, vpp, cnp, FUSE_MKNOD, &fmni, insize, vap->va_type)); } /* readdir */ int fuse_internal_readdir(struct vnode *vp, struct uio *uio, off_t startoff, struct fuse_filehandle *fufh, struct fuse_iov *cookediov, int *ncookies, u_long *cookies) { int err = 0; struct fuse_dispatcher fdi; struct fuse_read_in *fri = NULL; int fnd_start; if (uio_resid(uio) == 0) return 0; fdisp_init(&fdi, 0); /* * Note that we DO NOT have a UIO_SYSSPACE here (so no need for p2p * I/O). */ /* * fnd_start is set non-zero once the offset in the directory gets * to the startoff. This is done because directories must be read * from the beginning (offset == 0) when fuse_vnop_readdir() needs * to do an open of the directory. * If it is not set non-zero here, it will be set non-zero in * fuse_internal_readdir_processdata() when uio_offset == startoff. */ fnd_start = 0; if (uio->uio_offset == startoff) fnd_start = 1; while (uio_resid(uio) > 0) { fdi.iosize = sizeof(*fri); if (fri == NULL) fdisp_make_vp(&fdi, FUSE_READDIR, vp, NULL, NULL); else fdisp_refresh_vp(&fdi, FUSE_READDIR, vp, NULL, NULL); fri = fdi.indata; fri->fh = fufh->fh_id; fri->offset = uio_offset(uio); fri->size = MIN(uio->uio_resid, fuse_get_mpdata(vp->v_mount)->max_read); if ((err = fdisp_wait_answ(&fdi))) break; if ((err = fuse_internal_readdir_processdata(uio, startoff, &fnd_start, fri->size, fdi.answ, fdi.iosize, cookediov, ncookies, &cookies))) break; } fdisp_destroy(&fdi); return ((err == -1) ? 0 : err); } /* * Return -1 to indicate that this readdir is finished, 0 if it copied * all the directory data read in and it may be possible to read more * and greater than 0 for a failure. */ int fuse_internal_readdir_processdata(struct uio *uio, off_t startoff, int *fnd_start, size_t reqsize, void *buf, size_t bufsize, struct fuse_iov *cookediov, int *ncookies, u_long **cookiesp) { int err = 0; int bytesavail; size_t freclen; struct dirent *de; struct fuse_dirent *fudge; u_long *cookies; cookies = *cookiesp; if (bufsize < FUSE_NAME_OFFSET) return -1; for (;;) { if (bufsize < FUSE_NAME_OFFSET) { err = -1; break; } fudge = (struct fuse_dirent *)buf; freclen = FUSE_DIRENT_SIZE(fudge); if (bufsize < freclen) { /* * This indicates a partial directory entry at the * end of the directory data. */ err = -1; break; } #ifdef ZERO_PAD_INCOMPLETE_BUFS if (isbzero(buf, FUSE_NAME_OFFSET)) { err = -1; break; } #endif if (!fudge->namelen || fudge->namelen > MAXNAMLEN) { err = EINVAL; break; } bytesavail = GENERIC_DIRSIZ((struct pseudo_dirent *) &fudge->namelen); if (bytesavail > uio_resid(uio)) { /* Out of space for the dir so we are done. */ err = -1; break; } /* * Don't start to copy the directory entries out until * the requested offset in the directory is found. */ if (*fnd_start != 0) { fiov_adjust(cookediov, bytesavail); bzero(cookediov->base, bytesavail); de = (struct dirent *)cookediov->base; de->d_fileno = fudge->ino; de->d_reclen = bytesavail; de->d_type = fudge->type; de->d_namlen = fudge->namelen; memcpy((char *)cookediov->base + sizeof(struct dirent) - MAXNAMLEN - 1, (char *)buf + FUSE_NAME_OFFSET, fudge->namelen); dirent_terminate(de); err = uiomove(cookediov->base, cookediov->len, uio); if (err) break; if (cookies != NULL) { if (*ncookies == 0) { err = -1; break; } *cookies = fudge->off; cookies++; (*ncookies)--; } } else if (startoff == fudge->off) *fnd_start = 1; buf = (char *)buf + freclen; bufsize -= freclen; uio_setoffset(uio, fudge->off); } *cookiesp = cookies; return err; } /* remove */ int fuse_internal_remove(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, enum fuse_opcode op) { struct fuse_dispatcher fdi; nlink_t nlink; int err = 0; fdisp_init(&fdi, cnp->cn_namelen + 1); fdisp_make_vp(&fdi, op, dvp, cnp->cn_thread, cnp->cn_cred); memcpy(fdi.indata, cnp->cn_nameptr, cnp->cn_namelen); ((char *)fdi.indata)[cnp->cn_namelen] = '\0'; err = fdisp_wait_answ(&fdi); fdisp_destroy(&fdi); if (err) return (err); /* * Access the cached nlink even if the attr cached has expired. If * it's inaccurate, the worst that will happen is: * 1) We'll recycle the vnode even though the file has another link we * don't know about, costing a bit of cpu time, or * 2) We won't recycle the vnode even though all of its links are gone. * It will linger around until vnlru reclaims it, costing a bit of * temporary memory. */ nlink = VTOFUD(vp)->cached_attrs.va_nlink--; /* * Purge the parent's attribute cache because the daemon * should've updated its mtime and ctime. */ fuse_vnode_clear_attr_cache(dvp); /* NB: nlink could be zero if it was never cached */ if (nlink <= 1 || vnode_vtype(vp) == VDIR) { fuse_internal_vnode_disappear(vp); } else { cache_purge(vp); fuse_vnode_update(vp, FN_CTIMECHANGE); } return err; } /* rename */ int fuse_internal_rename(struct vnode *fdvp, struct componentname *fcnp, struct vnode *tdvp, struct componentname *tcnp) { struct fuse_dispatcher fdi; struct fuse_rename_in *fri; int err = 0; fdisp_init(&fdi, sizeof(*fri) + fcnp->cn_namelen + tcnp->cn_namelen + 2); fdisp_make_vp(&fdi, FUSE_RENAME, fdvp, tcnp->cn_thread, tcnp->cn_cred); fri = fdi.indata; fri->newdir = VTOI(tdvp); memcpy((char *)fdi.indata + sizeof(*fri), fcnp->cn_nameptr, fcnp->cn_namelen); ((char *)fdi.indata)[sizeof(*fri) + fcnp->cn_namelen] = '\0'; memcpy((char *)fdi.indata + sizeof(*fri) + fcnp->cn_namelen + 1, tcnp->cn_nameptr, tcnp->cn_namelen); ((char *)fdi.indata)[sizeof(*fri) + fcnp->cn_namelen + tcnp->cn_namelen + 1] = '\0'; err = fdisp_wait_answ(&fdi); fdisp_destroy(&fdi); return err; } /* strategy */ /* entity creation */ void fuse_internal_newentry_makerequest(struct mount *mp, uint64_t dnid, struct componentname *cnp, enum fuse_opcode op, void *buf, size_t bufsize, struct fuse_dispatcher *fdip) { fdip->iosize = bufsize + cnp->cn_namelen + 1; fdisp_make(fdip, op, mp, dnid, cnp->cn_thread, cnp->cn_cred); memcpy(fdip->indata, buf, bufsize); memcpy((char *)fdip->indata + bufsize, cnp->cn_nameptr, cnp->cn_namelen); ((char *)fdip->indata)[bufsize + cnp->cn_namelen] = '\0'; } int fuse_internal_newentry_core(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, enum vtype vtyp, struct fuse_dispatcher *fdip) { int err = 0; struct fuse_entry_out *feo; struct mount *mp = vnode_mount(dvp); if ((err = fdisp_wait_answ(fdip))) { return err; } feo = fdip->answ; if ((err = fuse_internal_checkentry(feo, vtyp))) { return err; } err = fuse_vnode_get(mp, feo, feo->nodeid, dvp, vpp, cnp, vtyp); if (err) { fuse_internal_forget_send(mp, cnp->cn_thread, cnp->cn_cred, feo->nodeid, 1); return err; } /* * Purge the parent's attribute cache because the daemon should've * updated its mtime and ctime */ fuse_vnode_clear_attr_cache(dvp); fuse_internal_cache_attrs(*vpp, &feo->attr, feo->attr_valid, feo->attr_valid_nsec, NULL); return err; } int fuse_internal_newentry(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, enum fuse_opcode op, void *buf, size_t bufsize, enum vtype vtype) { int err; struct fuse_dispatcher fdi; struct mount *mp = vnode_mount(dvp); fdisp_init(&fdi, 0); fuse_internal_newentry_makerequest(mp, VTOI(dvp), cnp, op, buf, bufsize, &fdi); err = fuse_internal_newentry_core(dvp, vpp, cnp, vtype, &fdi); fdisp_destroy(&fdi); return err; } /* entity destruction */ int fuse_internal_forget_callback(struct fuse_ticket *ftick, struct uio *uio) { fuse_internal_forget_send(ftick->tk_data->mp, curthread, NULL, ((struct fuse_in_header *)ftick->tk_ms_fiov.base)->nodeid, 1); return 0; } void fuse_internal_forget_send(struct mount *mp, struct thread *td, struct ucred *cred, uint64_t nodeid, uint64_t nlookup) { struct fuse_dispatcher fdi; struct fuse_forget_in *ffi; /* * KASSERT(nlookup > 0, ("zero-times forget for vp #%llu", * (long long unsigned) nodeid)); */ fdisp_init(&fdi, sizeof(*ffi)); fdisp_make(&fdi, FUSE_FORGET, mp, nodeid, td, cred); ffi = fdi.indata; ffi->nlookup = nlookup; fuse_insert_message(fdi.tick, false); fdisp_destroy(&fdi); } SDT_PROBE_DEFINE2(fusefs, , internal, getattr_cache_incoherent, "struct vnode*", "struct fuse_attr_out*"); /* Fetch the vnode's attributes from the daemon*/ int fuse_internal_do_getattr(struct vnode *vp, struct vattr *vap, struct ucred *cred, struct thread *td) { struct fuse_dispatcher fdi; struct fuse_vnode_data *fvdat = VTOFUD(vp); struct fuse_getattr_in *fgai; struct fuse_attr_out *fao; off_t old_filesize = fvdat->cached_attrs.va_size; struct timespec old_ctime = fvdat->cached_attrs.va_ctime; struct timespec old_mtime = fvdat->cached_attrs.va_mtime; enum vtype vtyp; int err; fdisp_init(&fdi, sizeof(*fgai)); fdisp_make_vp(&fdi, FUSE_GETATTR, vp, td, cred); fgai = fdi.indata; /* * We could look up a file handle and set it in fgai->fh, but that * involves extra runtime work and I'm unaware of any file systems that * care. */ fgai->getattr_flags = 0; if ((err = fdisp_wait_answ(&fdi))) { if (err == ENOENT) fuse_internal_vnode_disappear(vp); goto out; } fao = (struct fuse_attr_out *)fdi.answ; vtyp = IFTOVT(fao->attr.mode); if (fvdat->flag & FN_SIZECHANGE) fao->attr.size = old_filesize; if (fvdat->flag & FN_CTIMECHANGE) { fao->attr.ctime = old_ctime.tv_sec; fao->attr.ctimensec = old_ctime.tv_nsec; } if (fvdat->flag & FN_MTIMECHANGE) { fao->attr.mtime = old_mtime.tv_sec; fao->attr.mtimensec = old_mtime.tv_nsec; } if (vnode_isreg(vp) && fvdat->cached_attrs.va_size != VNOVAL && fao->attr.size != fvdat->cached_attrs.va_size) { /* * The server changed the file's size even though we had it * cached! That's a server bug. */ SDT_PROBE2(fusefs, , internal, getattr_cache_incoherent, vp, fao); printf("%s: cache incoherent on %s! " "Buggy FUSE server detected. To prevent data corruption, " "disable the data cache by mounting with -o direct_io, or " "as directed otherwise by your FUSE server's " "documentation\n", __func__, vnode_mount(vp)->mnt_stat.f_mntonname); int iosize = fuse_iosize(vp); v_inval_buf_range(vp, 0, INT64_MAX, iosize); } fuse_internal_cache_attrs(vp, &fao->attr, fao->attr_valid, fao->attr_valid_nsec, vap); if (vtyp != vnode_vtype(vp)) { fuse_internal_vnode_disappear(vp); err = ENOENT; } out: fdisp_destroy(&fdi); return err; } /* Read a vnode's attributes from cache or fetch them from the fuse daemon */ int fuse_internal_getattr(struct vnode *vp, struct vattr *vap, struct ucred *cred, struct thread *td) { struct vattr *attrs; if ((attrs = VTOVA(vp)) != NULL) { *vap = *attrs; /* struct copy */ return 0; } return fuse_internal_do_getattr(vp, vap, cred, td); } void fuse_internal_vnode_disappear(struct vnode *vp) { struct fuse_vnode_data *fvdat = VTOFUD(vp); ASSERT_VOP_ELOCKED(vp, "fuse_internal_vnode_disappear"); fvdat->flag |= FN_REVOKED; cache_purge(vp); } /* fuse start/stop */ SDT_PROBE_DEFINE2(fusefs, , internal, init_done, "struct fuse_data*", "struct fuse_init_out*"); int fuse_internal_init_callback(struct fuse_ticket *tick, struct uio *uio) { int err = 0; struct fuse_data *data = tick->tk_data; struct fuse_init_out *fiio; if ((err = tick->tk_aw_ohead.error)) { goto out; } if ((err = fticket_pull(tick, uio))) { goto out; } fiio = fticket_resp(tick)->base; data->fuse_libabi_major = fiio->major; data->fuse_libabi_minor = fiio->minor; if (!fuse_libabi_geq(data, 7, 4)) { /* * With a little work we could support servers as old as 7.1. * But there would be little payoff. */ SDT_PROBE2(fusefs, , internal, trace, 1, "userpace version too low"); err = EPROTONOSUPPORT; goto out; } if (fuse_libabi_geq(data, 7, 5)) { if (fticket_resp(tick)->len == sizeof(struct fuse_init_out) || fticket_resp(tick)->len == FUSE_COMPAT_22_INIT_OUT_SIZE) { data->max_write = fiio->max_write; if (fiio->flags & FUSE_ASYNC_READ) data->dataflags |= FSESS_ASYNC_READ; if (fiio->flags & FUSE_POSIX_LOCKS) data->dataflags |= FSESS_POSIX_LOCKS; if (fiio->flags & FUSE_EXPORT_SUPPORT) data->dataflags |= FSESS_EXPORT_SUPPORT; /* * Don't bother to check FUSE_BIG_WRITES, because it's * redundant with max_write */ /* * max_background and congestion_threshold are not * implemented */ } else { err = EINVAL; } } else { /* Old fixed values */ data->max_write = 4096; } if (fuse_libabi_geq(data, 7, 6)) data->max_readahead_blocks = fiio->max_readahead / maxbcachebuf; if (!fuse_libabi_geq(data, 7, 7)) fsess_set_notimpl(data->mp, FUSE_INTERRUPT); if (!fuse_libabi_geq(data, 7, 8)) { fsess_set_notimpl(data->mp, FUSE_BMAP); fsess_set_notimpl(data->mp, FUSE_DESTROY); } if (fuse_libabi_geq(data, 7, 23) && fiio->time_gran >= 1 && fiio->time_gran <= 1000000000) data->time_gran = fiio->time_gran; else data->time_gran = 1; if (!fuse_libabi_geq(data, 7, 23)) data->cache_mode = fuse_data_cache_mode; else if (fiio->flags & FUSE_WRITEBACK_CACHE) data->cache_mode = FUSE_CACHE_WB; else data->cache_mode = FUSE_CACHE_WT; if (!fuse_libabi_geq(data, 7, 24)) fsess_set_notimpl(data->mp, FUSE_LSEEK); + if (!fuse_libabi_geq(data, 7, 28)) + fsess_set_notimpl(data->mp, FUSE_COPY_FILE_RANGE); + out: if (err) { fdata_set_dead(data); } FUSE_LOCK(); data->dataflags |= FSESS_INITED; SDT_PROBE2(fusefs, , internal, init_done, data, fiio); wakeup(&data->ticketer); FUSE_UNLOCK(); return 0; } void fuse_internal_send_init(struct fuse_data *data, struct thread *td) { struct fuse_init_in *fiii; struct fuse_dispatcher fdi; fdisp_init(&fdi, sizeof(*fiii)); fdisp_make(&fdi, FUSE_INIT, data->mp, 0, td, NULL); fiii = fdi.indata; fiii->major = FUSE_KERNEL_VERSION; fiii->minor = FUSE_KERNEL_MINOR_VERSION; /* * fusefs currently reads ahead no more than one cache block at a time. * See fuse_read_biobackend */ fiii->max_readahead = maxbcachebuf; /* * Unsupported features: * FUSE_FILE_OPS: No known FUSE server or client supports it * FUSE_ATOMIC_O_TRUNC: our VFS cannot support it * FUSE_DONT_MASK: unlike Linux, FreeBSD always applies the umask, even * when default ACLs are in use. * FUSE_SPLICE_WRITE, FUSE_SPLICE_MOVE, FUSE_SPLICE_READ: FreeBSD * doesn't have splice(2). * FUSE_FLOCK_LOCKS: not yet implemented * FUSE_HAS_IOCTL_DIR: not yet implemented * FUSE_AUTO_INVAL_DATA: not yet implemented * FUSE_DO_READDIRPLUS: not yet implemented * FUSE_READDIRPLUS_AUTO: not yet implemented * FUSE_ASYNC_DIO: not yet implemented * FUSE_NO_OPEN_SUPPORT: not yet implemented + * FUSE_PARALLEL_DIROPS: not yet implemented + * FUSE_HANDLE_KILLPRIV: not yet implemented + * FUSE_POSIX_ACL: not yet implemented + * FUSE_ABORT_ERROR: not yet implemented + * FUSE_CACHE_SYMLINKS: not yet implemented + * FUSE_MAX_PAGES: not yet implemented */ fiii->flags = FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_WRITEBACK_CACHE; fuse_insert_callback(fdi.tick, fuse_internal_init_callback); fuse_insert_message(fdi.tick, false); fdisp_destroy(&fdi); } /* * Send a FUSE_SETATTR operation with no permissions checks. If cred is NULL, * send the request with root credentials */ int fuse_internal_setattr(struct vnode *vp, struct vattr *vap, struct thread *td, struct ucred *cred) { struct fuse_vnode_data *fvdat; struct fuse_dispatcher fdi; struct fuse_setattr_in *fsai; struct mount *mp; pid_t pid = td->td_proc->p_pid; struct fuse_data *data; int dataflags; int err = 0; enum vtype vtyp; int sizechanged = -1; uint64_t newsize = 0; mp = vnode_mount(vp); fvdat = VTOFUD(vp); data = fuse_get_mpdata(mp); dataflags = data->dataflags; fdisp_init(&fdi, sizeof(*fsai)); fdisp_make_vp(&fdi, FUSE_SETATTR, vp, td, cred); if (!cred) { fdi.finh->uid = 0; fdi.finh->gid = 0; } fsai = fdi.indata; fsai->valid = 0; if (vap->va_uid != (uid_t)VNOVAL) { fsai->uid = vap->va_uid; fsai->valid |= FATTR_UID; } if (vap->va_gid != (gid_t)VNOVAL) { fsai->gid = vap->va_gid; fsai->valid |= FATTR_GID; } if (vap->va_size != VNOVAL) { struct fuse_filehandle *fufh = NULL; /*Truncate to a new value. */ fsai->size = vap->va_size; sizechanged = 1; newsize = vap->va_size; fsai->valid |= FATTR_SIZE; fuse_filehandle_getrw(vp, FWRITE, &fufh, cred, pid); if (fufh) { fsai->fh = fufh->fh_id; fsai->valid |= FATTR_FH; } VTOFUD(vp)->flag &= ~FN_SIZECHANGE; } if (vap->va_atime.tv_sec != VNOVAL) { fsai->atime = vap->va_atime.tv_sec; fsai->atimensec = vap->va_atime.tv_nsec; fsai->valid |= FATTR_ATIME; if (vap->va_vaflags & VA_UTIMES_NULL) fsai->valid |= FATTR_ATIME_NOW; } if (vap->va_mtime.tv_sec != VNOVAL) { fsai->mtime = vap->va_mtime.tv_sec; fsai->mtimensec = vap->va_mtime.tv_nsec; fsai->valid |= FATTR_MTIME; if (vap->va_vaflags & VA_UTIMES_NULL) fsai->valid |= FATTR_MTIME_NOW; } else if (fvdat->flag & FN_MTIMECHANGE) { fsai->mtime = fvdat->cached_attrs.va_mtime.tv_sec; fsai->mtimensec = fvdat->cached_attrs.va_mtime.tv_nsec; fsai->valid |= FATTR_MTIME; } if (fuse_libabi_geq(data, 7, 23) && fvdat->flag & FN_CTIMECHANGE) { fsai->ctime = fvdat->cached_attrs.va_ctime.tv_sec; fsai->ctimensec = fvdat->cached_attrs.va_ctime.tv_nsec; fsai->valid |= FATTR_CTIME; } if (vap->va_mode != (mode_t)VNOVAL) { fsai->mode = vap->va_mode & ALLPERMS; fsai->valid |= FATTR_MODE; } if (!fsai->valid) { goto out; } if ((err = fdisp_wait_answ(&fdi))) goto out; vtyp = IFTOVT(((struct fuse_attr_out *)fdi.answ)->attr.mode); if (vnode_vtype(vp) != vtyp) { if (vnode_vtype(vp) == VNON && vtyp != VNON) { SDT_PROBE2(fusefs, , internal, trace, 1, "FUSE: Dang! " "vnode_vtype is VNON and vtype isn't."); } else { /* * STALE vnode, ditch * * The vnode has changed its type "behind our back". * There's nothing really we can do, so let us just * force an internal revocation and tell the caller to * try again, if interested. */ fuse_internal_vnode_disappear(vp); err = EAGAIN; } } if (err == 0) { struct fuse_attr_out *fao = (struct fuse_attr_out*)fdi.answ; fuse_vnode_undirty_cached_timestamps(vp); fuse_internal_cache_attrs(vp, &fao->attr, fao->attr_valid, fao->attr_valid_nsec, NULL); } out: fdisp_destroy(&fdi); return err; } +/* + * FreeBSD clears the SUID and SGID bits on any write by a non-root user. + */ +void +fuse_internal_clear_suid_on_write(struct vnode *vp, struct ucred *cred, + struct thread *td) +{ + struct fuse_data *data; + struct mount *mp; + struct vattr va; + int dataflags; + + mp = vnode_mount(vp); + data = fuse_get_mpdata(mp); + dataflags = data->dataflags; + + ASSERT_VOP_LOCKED(vp, __func__); + + if (dataflags & FSESS_DEFAULT_PERMISSIONS) { + if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID)) { + fuse_internal_getattr(vp, &va, cred, td); + if (va.va_mode & (S_ISUID | S_ISGID)) { + mode_t mode = va.va_mode & ~(S_ISUID | S_ISGID); + /* Clear all vattr fields except mode */ + vattr_null(&va); + va.va_mode = mode; + + /* + * Ignore fuse_internal_setattr's return value, + * because at this point the write operation has + * already succeeded and we don't want to return + * failing status for that. + */ + (void)fuse_internal_setattr(vp, &va, td, NULL); + } + } + } +} + #ifdef ZERO_PAD_INCOMPLETE_BUFS static int isbzero(void *buf, size_t len) { int i; for (i = 0; i < len; i++) { if (((char *)buf)[i]) return (0); } return (1); } #endif void fuse_internal_init(void) { fuse_lookup_cache_misses = counter_u64_alloc(M_WAITOK); fuse_lookup_cache_hits = counter_u64_alloc(M_WAITOK); } void fuse_internal_destroy(void) { counter_u64_free(fuse_lookup_cache_hits); counter_u64_free(fuse_lookup_cache_misses); } diff --git a/sys/fs/fuse/fuse_internal.h b/sys/fs/fuse/fuse_internal.h index de68861beae2..20a10d7dfda0 100644 --- a/sys/fs/fuse/fuse_internal.h +++ b/sys/fs/fuse/fuse_internal.h @@ -1,325 +1,329 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2007-2009 Google Inc. and Amit Singh * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. * * Neither the name of Google Inc. nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Copyright (C) 2005 Csaba Henk. * All rights reserved. * * Copyright (c) 2019 The FreeBSD Foundation * * Portions of this software were developed by BFF Storage Systems, LLC under * sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _FUSE_INTERNAL_H_ #define _FUSE_INTERNAL_H_ #include #include #include #include #include #include #include "fuse_ipc.h" #include "fuse_node.h" extern counter_u64_t fuse_lookup_cache_hits; extern counter_u64_t fuse_lookup_cache_misses; static inline bool vfs_isrdonly(struct mount *mp) { return ((mp->mnt_flag & MNT_RDONLY) != 0); } static inline struct mount * vnode_mount(struct vnode *vp) { return (vp->v_mount); } static inline enum vtype vnode_vtype(struct vnode *vp) { return (vp->v_type); } static inline bool vnode_isvroot(struct vnode *vp) { return ((vp->v_vflag & VV_ROOT) != 0); } static inline bool vnode_isreg(struct vnode *vp) { return (vp->v_type == VREG); } static inline bool vnode_isdir(struct vnode *vp) { return (vp->v_type == VDIR); } static inline bool vnode_islnk(struct vnode *vp) { return (vp->v_type == VLNK); } static inline ssize_t uio_resid(struct uio *uio) { return (uio->uio_resid); } static inline off_t uio_offset(struct uio *uio) { return (uio->uio_offset); } static inline void uio_setoffset(struct uio *uio, off_t offset) { uio->uio_offset = offset; } /* miscellaneous */ static inline bool fuse_isdeadfs(struct vnode *vp) { struct fuse_data *data = fuse_get_mpdata(vnode_mount(vp)); return (data->dataflags & FSESS_DEAD); } static inline uint64_t fuse_iosize(struct vnode *vp) { return (vp->v_mount->mnt_stat.f_iosize); } /* * Make a cacheable timeout in bintime format value based on a fuse_attr_out * response */ static inline void fuse_validity_2_bintime(uint64_t attr_valid, uint32_t attr_valid_nsec, struct bintime *timeout) { struct timespec now, duration, timeout_ts; getnanouptime(&now); /* "+ 2" is the bound of attr_valid_nsec + now.tv_nsec */ /* Why oh why isn't there a TIME_MAX defined? */ if (attr_valid >= INT_MAX || attr_valid + now.tv_sec + 2 >= INT_MAX) { timeout->sec = INT_MAX; } else { duration.tv_sec = attr_valid; duration.tv_nsec = attr_valid_nsec; timespecadd(&duration, &now, &timeout_ts); timespec2bintime(&timeout_ts, timeout); } } /* * Make a cacheable timeout value in timespec format based on the fuse_entry_out * response */ static inline void fuse_validity_2_timespec(const struct fuse_entry_out *feo, struct timespec *timeout) { struct timespec duration, now; getnanouptime(&now); /* "+ 2" is the bound of entry_valid_nsec + now.tv_nsec */ if (feo->entry_valid >= INT_MAX || feo->entry_valid + now.tv_sec + 2 >= INT_MAX) { timeout->tv_sec = INT_MAX; } else { duration.tv_sec = feo->entry_valid; duration.tv_nsec = feo->entry_valid_nsec; timespecadd(&duration, &now, timeout); } } /* VFS ops */ int fuse_internal_get_cached_vnode(struct mount*, ino_t, int, struct vnode**); /* access */ static inline int fuse_match_cred(struct ucred *basecred, struct ucred *usercred) { if (basecred->cr_uid == usercred->cr_uid && basecred->cr_uid == usercred->cr_ruid && basecred->cr_uid == usercred->cr_svuid && basecred->cr_groups[0] == usercred->cr_groups[0] && basecred->cr_groups[0] == usercred->cr_rgid && basecred->cr_groups[0] == usercred->cr_svgid) return (0); return (EPERM); } int fuse_internal_access(struct vnode *vp, accmode_t mode, struct thread *td, struct ucred *cred); /* attributes */ void fuse_internal_cache_attrs(struct vnode *vp, struct fuse_attr *attr, uint64_t attr_valid, uint32_t attr_valid_nsec, struct vattr *vap); /* fsync */ int fuse_internal_fsync(struct vnode *vp, struct thread *td, int waitfor, bool datasync); int fuse_internal_fsync_callback(struct fuse_ticket *tick, struct uio *uio); /* getattr */ int fuse_internal_do_getattr(struct vnode *vp, struct vattr *vap, struct ucred *cred, struct thread *td); int fuse_internal_getattr(struct vnode *vp, struct vattr *vap, struct ucred *cred, struct thread *td); /* asynchronous invalidation */ int fuse_internal_invalidate_entry(struct mount *mp, struct uio *uio); int fuse_internal_invalidate_inode(struct mount *mp, struct uio *uio); /* mknod */ int fuse_internal_mknod(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct vattr *vap); /* readdir */ struct pseudo_dirent { uint32_t d_namlen; }; int fuse_internal_readdir(struct vnode *vp, struct uio *uio, off_t startoff, struct fuse_filehandle *fufh, struct fuse_iov *cookediov, int *ncookies, u_long *cookies); int fuse_internal_readdir_processdata(struct uio *uio, off_t startoff, int *fnd_start, size_t reqsize, void *buf, size_t bufsize, struct fuse_iov *cookediov, int *ncookies, u_long **cookiesp); /* remove */ int fuse_internal_remove(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, enum fuse_opcode op); /* rename */ int fuse_internal_rename(struct vnode *fdvp, struct componentname *fcnp, struct vnode *tdvp, struct componentname *tcnp); /* revoke */ void fuse_internal_vnode_disappear(struct vnode *vp); /* setattr */ int fuse_internal_setattr(struct vnode *vp, struct vattr *va, struct thread *td, struct ucred *cred); +/* write */ +void fuse_internal_clear_suid_on_write(struct vnode *vp, struct ucred *cred, + struct thread *td); + /* strategy */ /* entity creation */ static inline int fuse_internal_checkentry(struct fuse_entry_out *feo, enum vtype vtyp) { if (vtyp != IFTOVT(feo->attr.mode)) { return (EINVAL); } if (feo->nodeid == FUSE_NULL_ID) { return (EINVAL); } if (feo->nodeid == FUSE_ROOT_ID) { return (EINVAL); } return (0); } int fuse_internal_newentry(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, enum fuse_opcode op, void *buf, size_t bufsize, enum vtype vtyp); void fuse_internal_newentry_makerequest(struct mount *mp, uint64_t dnid, struct componentname *cnp, enum fuse_opcode op, void *buf, size_t bufsize, struct fuse_dispatcher *fdip); int fuse_internal_newentry_core(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, enum vtype vtyp, struct fuse_dispatcher *fdip); /* entity destruction */ int fuse_internal_forget_callback(struct fuse_ticket *tick, struct uio *uio); void fuse_internal_forget_send(struct mount *mp, struct thread *td, struct ucred *cred, uint64_t nodeid, uint64_t nlookup); /* fuse start/stop */ int fuse_internal_init_callback(struct fuse_ticket *tick, struct uio *uio); void fuse_internal_send_init(struct fuse_data *data, struct thread *td); /* module load/unload */ void fuse_internal_init(void); void fuse_internal_destroy(void); #endif /* _FUSE_INTERNAL_H_ */ diff --git a/sys/fs/fuse/fuse_io.c b/sys/fs/fuse/fuse_io.c index 4e178bb5340a..3f23a35a8626 100644 --- a/sys/fs/fuse/fuse_io.c +++ b/sys/fs/fuse/fuse_io.c @@ -1,1166 +1,1126 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2007-2009 Google Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. * * Neither the name of Google Inc. nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Copyright (C) 2005 Csaba Henk. * All rights reserved. * * Copyright (c) 2019 The FreeBSD Foundation * * Portions of this software were developed by BFF Storage Systems, LLC under * sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "fuse.h" #include "fuse_file.h" #include "fuse_node.h" #include "fuse_internal.h" #include "fuse_ipc.h" #include "fuse_io.h" /* * Set in a struct buf to indicate that the write came from the buffer cache * and the originating cred and pid are no longer known. */ #define B_FUSEFS_WRITE_CACHE B_FS_FLAG1 SDT_PROVIDER_DECLARE(fusefs); /* * Fuse trace probe: * arg0: verbosity. Higher numbers give more verbose messages * arg1: Textual message */ SDT_PROBE_DEFINE2(fusefs, , io, trace, "int", "char*"); static int fuse_inval_buf_range(struct vnode *vp, off_t filesize, off_t start, off_t end); -static void -fuse_io_clear_suid_on_write(struct vnode *vp, struct ucred *cred, - struct thread *td); static int fuse_read_directbackend(struct vnode *vp, struct uio *uio, struct ucred *cred, struct fuse_filehandle *fufh); static int fuse_read_biobackend(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred, struct fuse_filehandle *fufh, pid_t pid); static int fuse_write_directbackend(struct vnode *vp, struct uio *uio, struct ucred *cred, struct fuse_filehandle *fufh, off_t filesize, int ioflag, bool pages); static int fuse_write_biobackend(struct vnode *vp, struct uio *uio, struct ucred *cred, struct fuse_filehandle *fufh, int ioflag, pid_t pid); /* Invalidate a range of cached data, whether dirty of not */ static int fuse_inval_buf_range(struct vnode *vp, off_t filesize, off_t start, off_t end) { struct buf *bp; daddr_t left_lbn, end_lbn, right_lbn; off_t new_filesize; int iosize, left_on, right_on, right_blksize; iosize = fuse_iosize(vp); left_lbn = start / iosize; end_lbn = howmany(end, iosize); left_on = start & (iosize - 1); if (left_on != 0) { bp = getblk(vp, left_lbn, iosize, PCATCH, 0, 0); if ((bp->b_flags & B_CACHE) != 0 && bp->b_dirtyend >= left_on) { /* * Flush the dirty buffer, because we don't have a * byte-granular way to record which parts of the * buffer are valid. */ bwrite(bp); if (bp->b_error) return (bp->b_error); } else { brelse(bp); } } right_on = end & (iosize - 1); if (right_on != 0) { right_lbn = end / iosize; new_filesize = MAX(filesize, end); right_blksize = MIN(iosize, new_filesize - iosize * right_lbn); bp = getblk(vp, right_lbn, right_blksize, PCATCH, 0, 0); if ((bp->b_flags & B_CACHE) != 0 && bp->b_dirtyoff < right_on) { /* * Flush the dirty buffer, because we don't have a * byte-granular way to record which parts of the * buffer are valid. */ bwrite(bp); if (bp->b_error) return (bp->b_error); } else { brelse(bp); } } v_inval_buf_range(vp, left_lbn, end_lbn, iosize); return (0); } -/* - * FreeBSD clears the SUID and SGID bits on any write by a non-root user. - */ -static void -fuse_io_clear_suid_on_write(struct vnode *vp, struct ucred *cred, - struct thread *td) -{ - struct fuse_data *data; - struct mount *mp; - struct vattr va; - int dataflags; - - mp = vnode_mount(vp); - data = fuse_get_mpdata(mp); - dataflags = data->dataflags; - - if (dataflags & FSESS_DEFAULT_PERMISSIONS) { - if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID)) { - fuse_internal_getattr(vp, &va, cred, td); - if (va.va_mode & (S_ISUID | S_ISGID)) { - mode_t mode = va.va_mode & ~(S_ISUID | S_ISGID); - /* Clear all vattr fields except mode */ - vattr_null(&va); - va.va_mode = mode; - - /* - * Ignore fuse_internal_setattr's return value, - * because at this point the write operation has - * already succeeded and we don't want to return - * failing status for that. - */ - (void)fuse_internal_setattr(vp, &va, td, NULL); - } - } - } -} - SDT_PROBE_DEFINE5(fusefs, , io, io_dispatch, "struct vnode*", "struct uio*", "int", "struct ucred*", "struct fuse_filehandle*"); SDT_PROBE_DEFINE4(fusefs, , io, io_dispatch_filehandles_closed, "struct vnode*", "struct uio*", "int", "struct ucred*"); int fuse_io_dispatch(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred, pid_t pid) { struct fuse_filehandle *fufh; int err, directio; int fflag; bool closefufh = false; MPASS(vp->v_type == VREG || vp->v_type == VDIR); fflag = (uio->uio_rw == UIO_READ) ? FREAD : FWRITE; err = fuse_filehandle_getrw(vp, fflag, &fufh, cred, pid); if (err == EBADF && vnode_mount(vp)->mnt_flag & MNT_EXPORTED) { /* * nfsd will do I/O without first doing VOP_OPEN. We * must implicitly open the file here */ err = fuse_filehandle_open(vp, fflag, &fufh, curthread, cred); closefufh = true; } else if (err) { SDT_PROBE4(fusefs, , io, io_dispatch_filehandles_closed, vp, uio, ioflag, cred); printf("FUSE: io dispatch: filehandles are closed\n"); return err; } if (err) goto out; SDT_PROBE5(fusefs, , io, io_dispatch, vp, uio, ioflag, cred, fufh); /* * Ideally, when the daemon asks for direct io at open time, the * standard file flag should be set according to this, so that would * just change the default mode, which later on could be changed via * fcntl(2). * But this doesn't work, the O_DIRECT flag gets cleared at some point * (don't know where). So to make any use of the Fuse direct_io option, * we hardwire it into the file's private data (similarly to Linux, * btw.). */ directio = (ioflag & IO_DIRECT) || !fsess_opt_datacache(vnode_mount(vp)); switch (uio->uio_rw) { case UIO_READ: if (directio) { SDT_PROBE2(fusefs, , io, trace, 1, "direct read of vnode"); err = fuse_read_directbackend(vp, uio, cred, fufh); } else { SDT_PROBE2(fusefs, , io, trace, 1, "buffered read of vnode"); err = fuse_read_biobackend(vp, uio, ioflag, cred, fufh, pid); } break; case UIO_WRITE: fuse_vnode_update(vp, FN_MTIMECHANGE | FN_CTIMECHANGE); if (directio) { off_t start, end, filesize; bool pages = (ioflag & IO_VMIO) != 0; SDT_PROBE2(fusefs, , io, trace, 1, "direct write of vnode"); err = fuse_vnode_size(vp, &filesize, cred, curthread); if (err) goto out; start = uio->uio_offset; end = start + uio->uio_resid; if (!pages) { err = fuse_inval_buf_range(vp, filesize, start, end); if (err) return (err); } err = fuse_write_directbackend(vp, uio, cred, fufh, filesize, ioflag, pages); } else { SDT_PROBE2(fusefs, , io, trace, 1, "buffered write of vnode"); if (!fsess_opt_writeback(vnode_mount(vp))) ioflag |= IO_SYNC; err = fuse_write_biobackend(vp, uio, cred, fufh, ioflag, pid); } - fuse_io_clear_suid_on_write(vp, cred, uio->uio_td); + fuse_internal_clear_suid_on_write(vp, cred, uio->uio_td); break; default: panic("uninterpreted mode passed to fuse_io_dispatch"); } out: if (closefufh) fuse_filehandle_close(vp, fufh, curthread, cred); return (err); } SDT_PROBE_DEFINE4(fusefs, , io, read_bio_backend_start, "int", "int", "int", "int"); SDT_PROBE_DEFINE2(fusefs, , io, read_bio_backend_feed, "int", "struct buf*"); SDT_PROBE_DEFINE4(fusefs, , io, read_bio_backend_end, "int", "ssize_t", "int", "struct buf*"); static int fuse_read_biobackend(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred, struct fuse_filehandle *fufh, pid_t pid) { struct buf *bp; struct mount *mp; struct fuse_data *data; daddr_t lbn, nextlbn; int bcount, nextsize; int err, n = 0, on = 0, seqcount; off_t filesize; const int biosize = fuse_iosize(vp); mp = vnode_mount(vp); data = fuse_get_mpdata(mp); if (uio->uio_offset < 0) return (EINVAL); seqcount = ioflag >> IO_SEQSHIFT; err = fuse_vnode_size(vp, &filesize, cred, curthread); if (err) return err; for (err = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { if (fuse_isdeadfs(vp)) { err = ENXIO; break; } if (filesize - uio->uio_offset <= 0) break; lbn = uio->uio_offset / biosize; on = uio->uio_offset & (biosize - 1); if ((off_t)lbn * biosize >= filesize) { bcount = 0; } else if ((off_t)(lbn + 1) * biosize > filesize) { bcount = filesize - (off_t)lbn *biosize; } else { bcount = biosize; } nextlbn = lbn + 1; nextsize = MIN(biosize, filesize - nextlbn * biosize); SDT_PROBE4(fusefs, , io, read_bio_backend_start, biosize, (int)lbn, on, bcount); if (bcount < biosize) { /* If near EOF, don't do readahead */ err = bread(vp, lbn, bcount, NOCRED, &bp); } else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { /* Try clustered read */ long totread = uio->uio_resid + on; seqcount = MIN(seqcount, data->max_readahead_blocks + 1); err = cluster_read(vp, filesize, lbn, bcount, NOCRED, totread, seqcount, 0, &bp); } else if (seqcount > 1 && data->max_readahead_blocks >= 1) { /* Try non-clustered readahead */ err = breadn(vp, lbn, bcount, &nextlbn, &nextsize, 1, NOCRED, &bp); } else { /* Just read what was requested */ err = bread(vp, lbn, bcount, NOCRED, &bp); } if (err) { brelse(bp); bp = NULL; break; } /* * on is the offset into the current bp. Figure out how many * bytes we can copy out of the bp. Note that bcount is * NOT DEV_BSIZE aligned. * * Then figure out how many bytes we can copy into the uio. */ n = 0; if (on < bcount - bp->b_resid) n = MIN((unsigned)(bcount - bp->b_resid - on), uio->uio_resid); if (n > 0) { SDT_PROBE2(fusefs, , io, read_bio_backend_feed, n, bp); err = uiomove(bp->b_data + on, n, uio); } vfs_bio_brelse(bp, ioflag); SDT_PROBE4(fusefs, , io, read_bio_backend_end, err, uio->uio_resid, n, bp); if (bp->b_resid > 0) { /* Short read indicates EOF */ break; } } return (err); } SDT_PROBE_DEFINE1(fusefs, , io, read_directbackend_start, "struct fuse_read_in*"); SDT_PROBE_DEFINE3(fusefs, , io, read_directbackend_complete, "struct fuse_dispatcher*", "struct fuse_read_in*", "struct uio*"); static int fuse_read_directbackend(struct vnode *vp, struct uio *uio, struct ucred *cred, struct fuse_filehandle *fufh) { struct fuse_data *data; struct fuse_dispatcher fdi; struct fuse_read_in *fri; int err = 0; data = fuse_get_mpdata(vp->v_mount); if (uio->uio_resid == 0) return (0); fdisp_init(&fdi, 0); /* * XXX In "normal" case we use an intermediate kernel buffer for * transmitting data from daemon's context to ours. Eventually, we should * get rid of this. Anyway, if the target uio lives in sysspace (we are * called from pageops), and the input data doesn't need kernel-side * processing (we are not called from readdir) we can already invoke * an optimized, "peer-to-peer" I/O routine. */ while (uio->uio_resid > 0) { fdi.iosize = sizeof(*fri); fdisp_make_vp(&fdi, FUSE_READ, vp, uio->uio_td, cred); fri = fdi.indata; fri->fh = fufh->fh_id; fri->offset = uio->uio_offset; fri->size = MIN(uio->uio_resid, fuse_get_mpdata(vp->v_mount)->max_read); if (fuse_libabi_geq(data, 7, 9)) { /* See comment regarding FUSE_WRITE_LOCKOWNER */ fri->read_flags = 0; fri->flags = fufh_type_2_fflags(fufh->fufh_type); } SDT_PROBE1(fusefs, , io, read_directbackend_start, fri); if ((err = fdisp_wait_answ(&fdi))) goto out; SDT_PROBE3(fusefs, , io, read_directbackend_complete, &fdi, fri, uio); if ((err = uiomove(fdi.answ, MIN(fri->size, fdi.iosize), uio))) break; if (fdi.iosize < fri->size) { /* * Short read. Should only happen at EOF or with * direct io. */ break; } } out: fdisp_destroy(&fdi); return (err); } static int fuse_write_directbackend(struct vnode *vp, struct uio *uio, struct ucred *cred, struct fuse_filehandle *fufh, off_t filesize, int ioflag, bool pages) { struct fuse_vnode_data *fvdat = VTOFUD(vp); struct fuse_data *data; struct fuse_write_in *fwi; struct fuse_write_out *fwo; struct fuse_dispatcher fdi; size_t chunksize; void *fwi_data; off_t as_written_offset; int diff; int err = 0; bool direct_io = fufh->fuse_open_flags & FOPEN_DIRECT_IO; bool wrote_anything = false; uint32_t write_flags; data = fuse_get_mpdata(vp->v_mount); /* * Don't set FUSE_WRITE_LOCKOWNER in write_flags. It can't be set * accurately when using POSIX AIO, libfuse doesn't use it, and I'm not * aware of any file systems that do. It was an attempt to add * Linux-style mandatory locking to the FUSE protocol, but mandatory * locking is deprecated even on Linux. See Linux commit * f33321141b273d60cbb3a8f56a5489baad82ba5e . */ /* * Set FUSE_WRITE_CACHE whenever we don't know the uid, gid, and/or pid * that originated a write. For example when writing from the * writeback cache. I don't know of a single file system that cares, * but the protocol says we're supposed to do this. */ write_flags = !pages && ( (ioflag & IO_DIRECT) || !fsess_opt_datacache(vnode_mount(vp)) || !fsess_opt_writeback(vnode_mount(vp))) ? 0 : FUSE_WRITE_CACHE; if (uio->uio_resid == 0) return (0); if (ioflag & IO_APPEND) uio_setoffset(uio, filesize); if (vn_rlimit_fsize(vp, uio, uio->uio_td)) return (EFBIG); fdisp_init(&fdi, 0); while (uio->uio_resid > 0) { size_t sizeof_fwi; if (fuse_libabi_geq(data, 7, 9)) { sizeof_fwi = sizeof(*fwi); } else { sizeof_fwi = FUSE_COMPAT_WRITE_IN_SIZE; } chunksize = MIN(uio->uio_resid, data->max_write); fdi.iosize = sizeof_fwi + chunksize; fdisp_make_vp(&fdi, FUSE_WRITE, vp, uio->uio_td, cred); fwi = fdi.indata; fwi->fh = fufh->fh_id; fwi->offset = uio->uio_offset; fwi->size = chunksize; fwi->write_flags = write_flags; if (fuse_libabi_geq(data, 7, 9)) { fwi->flags = fufh_type_2_fflags(fufh->fufh_type); } fwi_data = (char *)fdi.indata + sizeof_fwi; if ((err = uiomove(fwi_data, chunksize, uio))) break; retry: err = fdisp_wait_answ(&fdi); if (err == ERESTART || err == EINTR || err == EWOULDBLOCK) { /* * Rewind the uio so dofilewrite will know it's * incomplete */ uio->uio_resid += fwi->size; uio->uio_offset -= fwi->size; /* * Change ERESTART into EINTR because we can't rewind * uio->uio_iov. Basically, once uiomove(9) has been * called, it's impossible to restart a syscall. */ if (err == ERESTART) err = EINTR; break; } else if (err) { break; } else { wrote_anything = true; } fwo = ((struct fuse_write_out *)fdi.answ); /* Adjust the uio in the case of short writes */ diff = fwi->size - fwo->size; as_written_offset = uio->uio_offset - diff; if (as_written_offset - diff > filesize) fuse_vnode_setsize(vp, as_written_offset); if (as_written_offset - diff >= filesize) fvdat->flag &= ~FN_SIZECHANGE; if (diff < 0) { printf("WARNING: misbehaving FUSE filesystem " "wrote more data than we provided it\n"); err = EINVAL; break; } else if (diff > 0) { /* Short write */ if (!direct_io) { printf("WARNING: misbehaving FUSE filesystem: " "short writes are only allowed with " "direct_io\n"); } if (ioflag & IO_DIRECT) { /* Return early */ uio->uio_resid += diff; uio->uio_offset -= diff; break; } else { /* Resend the unwritten portion of data */ fdi.iosize = sizeof_fwi + diff; /* Refresh fdi without clearing data buffer */ fdisp_refresh_vp(&fdi, FUSE_WRITE, vp, uio->uio_td, cred); fwi = fdi.indata; MPASS2(fwi == fdi.indata, "FUSE dispatcher " "reallocated despite no increase in " "size?"); void *src = (char*)fwi_data + fwo->size; memmove(fwi_data, src, diff); fwi->fh = fufh->fh_id; fwi->offset = as_written_offset; fwi->size = diff; fwi->write_flags = write_flags; goto retry; } } } fdisp_destroy(&fdi); if (wrote_anything) fuse_vnode_undirty_cached_timestamps(vp); return (err); } SDT_PROBE_DEFINE6(fusefs, , io, write_biobackend_start, "int64_t", "int", "int", "struct uio*", "int", "bool"); SDT_PROBE_DEFINE2(fusefs, , io, write_biobackend_append_race, "long", "int"); SDT_PROBE_DEFINE2(fusefs, , io, write_biobackend_issue, "int", "struct buf*"); static int fuse_write_biobackend(struct vnode *vp, struct uio *uio, struct ucred *cred, struct fuse_filehandle *fufh, int ioflag, pid_t pid) { struct fuse_vnode_data *fvdat = VTOFUD(vp); struct buf *bp; daddr_t lbn; off_t filesize; int bcount; int n, on, seqcount, err = 0; bool last_page; const int biosize = fuse_iosize(vp); seqcount = ioflag >> IO_SEQSHIFT; KASSERT(uio->uio_rw == UIO_WRITE, ("fuse_write_biobackend mode")); if (vp->v_type != VREG) return (EIO); if (uio->uio_offset < 0) return (EINVAL); if (uio->uio_resid == 0) return (0); err = fuse_vnode_size(vp, &filesize, cred, curthread); if (err) return err; if (ioflag & IO_APPEND) uio_setoffset(uio, filesize); if (vn_rlimit_fsize(vp, uio, uio->uio_td)) return (EFBIG); do { bool direct_append, extending; if (fuse_isdeadfs(vp)) { err = ENXIO; break; } lbn = uio->uio_offset / biosize; on = uio->uio_offset & (biosize - 1); n = MIN((unsigned)(biosize - on), uio->uio_resid); again: /* Get or create a buffer for the write */ direct_append = uio->uio_offset == filesize && n; if (uio->uio_offset + n < filesize) { extending = false; if ((off_t)(lbn + 1) * biosize < filesize) { /* Not the file's last block */ bcount = biosize; } else { /* The file's last block */ bcount = filesize - (off_t)lbn * biosize; } } else { extending = true; bcount = on + n; } if (howmany(((off_t)lbn * biosize + on + n - 1), PAGE_SIZE) >= howmany(filesize, PAGE_SIZE)) last_page = true; else last_page = false; if (direct_append) { /* * Take care to preserve the buffer's B_CACHE state so * as not to cause an unnecessary read. */ bp = getblk(vp, lbn, on, PCATCH, 0, 0); if (bp != NULL) { uint32_t save = bp->b_flags & B_CACHE; allocbuf(bp, bcount); bp->b_flags |= save; } } else { bp = getblk(vp, lbn, bcount, PCATCH, 0, 0); } if (!bp) { err = EINTR; break; } if (extending) { /* * Extend file _after_ locking buffer so we won't race * with other readers */ err = fuse_vnode_setsize(vp, uio->uio_offset + n); filesize = uio->uio_offset + n; fvdat->flag |= FN_SIZECHANGE; if (err) { brelse(bp); break; } } SDT_PROBE6(fusefs, , io, write_biobackend_start, lbn, on, n, uio, bcount, direct_append); /* * Issue a READ if B_CACHE is not set. In special-append * mode, B_CACHE is based on the buffer prior to the write * op and is typically set, avoiding the read. If a read * is required in special append mode, the server will * probably send us a short-read since we extended the file * on our end, resulting in b_resid == 0 and, thusly, * B_CACHE getting set. * * We can also avoid issuing the read if the write covers * the entire buffer. We have to make sure the buffer state * is reasonable in this case since we will not be initiating * I/O. See the comments in kern/vfs_bio.c's getblk() for * more information. * * B_CACHE may also be set due to the buffer being cached * normally. */ if (on == 0 && n == bcount) { bp->b_flags |= B_CACHE; bp->b_flags &= ~B_INVAL; bp->b_ioflags &= ~BIO_ERROR; } if ((bp->b_flags & B_CACHE) == 0) { bp->b_iocmd = BIO_READ; vfs_busy_pages(bp, 0); fuse_io_strategy(vp, bp); if ((err = bp->b_error)) { brelse(bp); break; } if (bp->b_resid > 0) { /* * Short read indicates EOF. Update file size * from the server and try again. */ SDT_PROBE2(fusefs, , io, trace, 1, "Short read during a RMW"); brelse(bp); err = fuse_vnode_size(vp, &filesize, cred, curthread); if (err) break; else goto again; } } if (bp->b_wcred == NOCRED) bp->b_wcred = crhold(cred); /* * If dirtyend exceeds file size, chop it down. This should * not normally occur but there is an append race where it * might occur XXX, so we log it. * * If the chopping creates a reverse-indexed or degenerate * situation with dirtyoff/end, we 0 both of them. */ if (bp->b_dirtyend > bcount) { SDT_PROBE2(fusefs, , io, write_biobackend_append_race, (long)bp->b_blkno * biosize, bp->b_dirtyend - bcount); bp->b_dirtyend = bcount; } if (bp->b_dirtyoff >= bp->b_dirtyend) bp->b_dirtyoff = bp->b_dirtyend = 0; /* * If the new write will leave a contiguous dirty * area, just update the b_dirtyoff and b_dirtyend, * otherwise force a write rpc of the old dirty area. * * While it is possible to merge discontiguous writes due to * our having a B_CACHE buffer ( and thus valid read data * for the hole), we don't because it could lead to * significant cache coherency problems with multiple clients, * especially if locking is implemented later on. * * as an optimization we could theoretically maintain * a linked list of discontinuous areas, but we would still * have to commit them separately so there isn't much * advantage to it except perhaps a bit of asynchronization. */ if (bp->b_dirtyend > 0 && (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) { /* * Yes, we mean it. Write out everything to "storage" * immediately, without hesitation. (Apart from other * reasons: the only way to know if a write is valid * if its actually written out.) */ SDT_PROBE2(fusefs, , io, write_biobackend_issue, 0, bp); bwrite(bp); if (bp->b_error == EINTR) { err = EINTR; break; } goto again; } err = uiomove((char *)bp->b_data + on, n, uio); if (err) { bp->b_ioflags |= BIO_ERROR; bp->b_error = err; brelse(bp); break; /* TODO: vfs_bio_clrbuf like ffs_write does? */ } /* * Only update dirtyoff/dirtyend if not a degenerate * condition. */ if (n) { if (bp->b_dirtyend > 0) { bp->b_dirtyoff = MIN(on, bp->b_dirtyoff); bp->b_dirtyend = MAX((on + n), bp->b_dirtyend); } else { bp->b_dirtyoff = on; bp->b_dirtyend = on + n; } vfs_bio_set_valid(bp, on, n); } vfs_bio_set_flags(bp, ioflag); bp->b_flags |= B_FUSEFS_WRITE_CACHE; if (ioflag & IO_SYNC) { SDT_PROBE2(fusefs, , io, write_biobackend_issue, 2, bp); if (!(ioflag & IO_VMIO)) bp->b_flags &= ~B_FUSEFS_WRITE_CACHE; err = bwrite(bp); } else if (vm_page_count_severe() || buf_dirty_count_severe() || (ioflag & IO_ASYNC)) { bp->b_flags |= B_CLUSTEROK; SDT_PROBE2(fusefs, , io, write_biobackend_issue, 3, bp); bawrite(bp); } else if (on == 0 && n == bcount) { if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) { bp->b_flags |= B_CLUSTEROK; SDT_PROBE2(fusefs, , io, write_biobackend_issue, 4, bp); cluster_write(vp, bp, filesize, seqcount, 0); } else { SDT_PROBE2(fusefs, , io, write_biobackend_issue, 5, bp); bawrite(bp); } } else if (ioflag & IO_DIRECT) { bp->b_flags |= B_CLUSTEROK; SDT_PROBE2(fusefs, , io, write_biobackend_issue, 6, bp); bawrite(bp); } else { bp->b_flags &= ~B_CLUSTEROK; SDT_PROBE2(fusefs, , io, write_biobackend_issue, 7, bp); bdwrite(bp); } if (err) break; } while (uio->uio_resid > 0 && n > 0); return (err); } int fuse_io_strategy(struct vnode *vp, struct buf *bp) { struct fuse_vnode_data *fvdat = VTOFUD(vp); struct fuse_filehandle *fufh; struct ucred *cred; struct uio *uiop; struct uio uio; struct iovec io; off_t filesize; int error = 0; int fflag; /* We don't know the true pid when we're dealing with the cache */ pid_t pid = 0; const int biosize = fuse_iosize(vp); MPASS(vp->v_type == VREG || vp->v_type == VDIR); MPASS(bp->b_iocmd == BIO_READ || bp->b_iocmd == BIO_WRITE); fflag = bp->b_iocmd == BIO_READ ? FREAD : FWRITE; cred = bp->b_iocmd == BIO_READ ? bp->b_rcred : bp->b_wcred; error = fuse_filehandle_getrw(vp, fflag, &fufh, cred, pid); if (bp->b_iocmd == BIO_READ && error == EBADF) { /* * This may be a read-modify-write operation on a cached file * opened O_WRONLY. The FUSE protocol allows this. */ error = fuse_filehandle_get(vp, FWRITE, &fufh, cred, pid); } if (error) { printf("FUSE: strategy: filehandles are closed\n"); bp->b_ioflags |= BIO_ERROR; bp->b_error = error; bufdone(bp); return (error); } uiop = &uio; uiop->uio_iov = &io; uiop->uio_iovcnt = 1; uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_td = curthread; /* * clear BIO_ERROR and B_INVAL state prior to initiating the I/O. We * do this here so we do not have to do it in all the code that * calls us. */ bp->b_flags &= ~B_INVAL; bp->b_ioflags &= ~BIO_ERROR; KASSERT(!(bp->b_flags & B_DONE), ("fuse_io_strategy: bp %p already marked done", bp)); if (bp->b_iocmd == BIO_READ) { ssize_t left; io.iov_len = uiop->uio_resid = bp->b_bcount; io.iov_base = bp->b_data; uiop->uio_rw = UIO_READ; uiop->uio_offset = ((off_t)bp->b_lblkno) * biosize; error = fuse_read_directbackend(vp, uiop, cred, fufh); /* * Store the amount we failed to read in the buffer's private * field, so callers can truncate the file if necessary' */ if (!error && uiop->uio_resid) { int nread = bp->b_bcount - uiop->uio_resid; left = uiop->uio_resid; bzero((char *)bp->b_data + nread, left); if ((fvdat->flag & FN_SIZECHANGE) == 0) { /* * A short read with no error, when not using * direct io, and when no writes are cached, * indicates EOF caused by a server-side * truncation. Clear the attr cache so we'll * pick up the new file size and timestamps. * * We must still bzero the remaining buffer so * uninitialized data doesn't get exposed by a * future truncate that extends the file. * * To prevent lock order problems, we must * truncate the file upstack, not here. */ SDT_PROBE2(fusefs, , io, trace, 1, "Short read of a clean file"); fuse_vnode_clear_attr_cache(vp); } else { /* * If dirty writes _are_ cached beyond EOF, * that indicates a newly created hole that the * server doesn't know about. Those don't pose * any problem. * XXX: we don't currently track whether dirty * writes are cached beyond EOF, before EOF, or * both. */ SDT_PROBE2(fusefs, , io, trace, 1, "Short read of a dirty file"); uiop->uio_resid = 0; } } if (error) { bp->b_ioflags |= BIO_ERROR; bp->b_error = error; } } else { /* * Setup for actual write */ error = fuse_vnode_size(vp, &filesize, cred, curthread); if (error) { bp->b_ioflags |= BIO_ERROR; bp->b_error = error; bufdone(bp); return (error); } if ((off_t)bp->b_lblkno * biosize + bp->b_dirtyend > filesize) bp->b_dirtyend = filesize - (off_t)bp->b_lblkno * biosize; if (bp->b_dirtyend > bp->b_dirtyoff) { io.iov_len = uiop->uio_resid = bp->b_dirtyend - bp->b_dirtyoff; uiop->uio_offset = (off_t)bp->b_lblkno * biosize + bp->b_dirtyoff; io.iov_base = (char *)bp->b_data + bp->b_dirtyoff; uiop->uio_rw = UIO_WRITE; bool pages = bp->b_flags & B_FUSEFS_WRITE_CACHE; error = fuse_write_directbackend(vp, uiop, cred, fufh, filesize, 0, pages); if (error == EINTR || error == ETIMEDOUT) { bp->b_flags &= ~(B_INVAL | B_NOCACHE); if ((bp->b_flags & B_PAGING) == 0) { bdirty(bp); bp->b_flags &= ~B_DONE; } if ((error == EINTR || error == ETIMEDOUT) && (bp->b_flags & B_ASYNC) == 0) bp->b_flags |= B_EINTR; } else { if (error) { bp->b_ioflags |= BIO_ERROR; bp->b_flags |= B_INVAL; bp->b_error = error; } bp->b_dirtyoff = bp->b_dirtyend = 0; } } else { bp->b_resid = 0; bufdone(bp); return (0); } } bp->b_resid = uiop->uio_resid; bufdone(bp); return (error); } int fuse_io_flushbuf(struct vnode *vp, int waitfor, struct thread *td) { return (vn_fsync_buf(vp, waitfor)); } /* * Flush and invalidate all dirty buffers. If another process is already * doing the flush, just wait for completion. */ int fuse_io_invalbuf(struct vnode *vp, struct thread *td) { struct fuse_vnode_data *fvdat = VTOFUD(vp); int error = 0; if (VN_IS_DOOMED(vp)) return 0; ASSERT_VOP_ELOCKED(vp, "fuse_io_invalbuf"); while (fvdat->flag & FN_FLUSHINPROG) { struct proc *p = td->td_proc; if (vp->v_mount->mnt_kern_flag & MNTK_UNMOUNTF) return EIO; fvdat->flag |= FN_FLUSHWANT; tsleep(&fvdat->flag, PRIBIO + 2, "fusevinv", 2 * hz); error = 0; if (p != NULL) { PROC_LOCK(p); if (SIGNOTEMPTY(p->p_siglist) || SIGNOTEMPTY(td->td_siglist)) error = EINTR; PROC_UNLOCK(p); } if (error == EINTR) return EINTR; } fvdat->flag |= FN_FLUSHINPROG; if (vp->v_bufobj.bo_object != NULL) { VM_OBJECT_WLOCK(vp->v_bufobj.bo_object); vm_object_page_clean(vp->v_bufobj.bo_object, 0, 0, OBJPC_SYNC); VM_OBJECT_WUNLOCK(vp->v_bufobj.bo_object); } error = vinvalbuf(vp, V_SAVE, PCATCH, 0); while (error) { if (error == ERESTART || error == EINTR) { fvdat->flag &= ~FN_FLUSHINPROG; if (fvdat->flag & FN_FLUSHWANT) { fvdat->flag &= ~FN_FLUSHWANT; wakeup(&fvdat->flag); } return EINTR; } error = vinvalbuf(vp, V_SAVE, PCATCH, 0); } fvdat->flag &= ~FN_FLUSHINPROG; if (fvdat->flag & FN_FLUSHWANT) { fvdat->flag &= ~FN_FLUSHWANT; wakeup(&fvdat->flag); } return (error); } diff --git a/sys/fs/fuse/fuse_ipc.c b/sys/fs/fuse/fuse_ipc.c index d3738da26b34..791ee9f38444 100644 --- a/sys/fs/fuse/fuse_ipc.c +++ b/sys/fs/fuse/fuse_ipc.c @@ -1,1071 +1,1075 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2007-2009 Google Inc. and Amit Singh * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. * * Neither the name of Google Inc. nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Copyright (C) 2005 Csaba Henk. * All rights reserved. * * Copyright (c) 2019 The FreeBSD Foundation * * Portions of this software were developed by BFF Storage Systems, LLC under * sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "fuse.h" #include "fuse_node.h" #include "fuse_ipc.h" #include "fuse_internal.h" SDT_PROVIDER_DECLARE(fusefs); /* * Fuse trace probe: * arg0: verbosity. Higher numbers give more verbose messages * arg1: Textual message */ SDT_PROBE_DEFINE2(fusefs, , ipc, trace, "int", "char*"); static void fdisp_make_pid(struct fuse_dispatcher *fdip, enum fuse_opcode op, struct fuse_data *data, uint64_t nid, pid_t pid, struct ucred *cred); static void fuse_interrupt_send(struct fuse_ticket *otick, int err); static struct fuse_ticket *fticket_alloc(struct fuse_data *data); static void fticket_refresh(struct fuse_ticket *ftick); static void fticket_destroy(struct fuse_ticket *ftick); static int fticket_wait_answer(struct fuse_ticket *ftick); static inline int fticket_aw_pull_uio(struct fuse_ticket *ftick, struct uio *uio); static int fuse_body_audit(struct fuse_ticket *ftick, size_t blen); static fuse_handler_t fuse_standard_handler; static counter_u64_t fuse_ticket_count; SYSCTL_COUNTER_U64(_vfs_fusefs_stats, OID_AUTO, ticket_count, CTLFLAG_RD, &fuse_ticket_count, "Number of allocated tickets"); static long fuse_iov_permanent_bufsize = 1 << 19; SYSCTL_LONG(_vfs_fusefs, OID_AUTO, iov_permanent_bufsize, CTLFLAG_RW, &fuse_iov_permanent_bufsize, 0, "limit for permanently stored buffer size for fuse_iovs"); static int fuse_iov_credit = 16; SYSCTL_INT(_vfs_fusefs, OID_AUTO, iov_credit, CTLFLAG_RW, &fuse_iov_credit, 0, "how many times is an oversized fuse_iov tolerated"); MALLOC_DEFINE(M_FUSEMSG, "fuse_msgbuf", "fuse message buffer"); static uma_zone_t ticket_zone; /* * TODO: figure out how to timeout INTERRUPT requests, because the daemon may * leagally never respond */ static int fuse_interrupt_callback(struct fuse_ticket *tick, struct uio *uio) { struct fuse_ticket *otick, *x_tick; struct fuse_interrupt_in *fii; struct fuse_data *data = tick->tk_data; bool found = false; fii = (struct fuse_interrupt_in*)((char*)tick->tk_ms_fiov.base + sizeof(struct fuse_in_header)); fuse_lck_mtx_lock(data->aw_mtx); TAILQ_FOREACH_SAFE(otick, &data->aw_head, tk_aw_link, x_tick) { if (otick->tk_unique == fii->unique) { found = true; break; } } fuse_lck_mtx_unlock(data->aw_mtx); if (!found) { /* Original is already complete. Just return */ return 0; } /* Clear the original ticket's interrupt association */ otick->irq_unique = 0; if (tick->tk_aw_ohead.error == ENOSYS) { fsess_set_notimpl(data->mp, FUSE_INTERRUPT); return 0; } else if (tick->tk_aw_ohead.error == EAGAIN) { /* * There are two reasons we might get this: * 1) the daemon received the INTERRUPT request before the * original, or * 2) the daemon received the INTERRUPT request after it * completed the original request. * In the first case we should re-send the INTERRUPT. In the * second, we should ignore it. */ /* Resend */ fuse_interrupt_send(otick, EINTR); return 0; } else { /* Illegal FUSE_INTERRUPT response */ return EINVAL; } } /* Interrupt the operation otick. Return err as its error code */ void fuse_interrupt_send(struct fuse_ticket *otick, int err) { struct fuse_dispatcher fdi; struct fuse_interrupt_in *fii; struct fuse_in_header *ftick_hdr; struct fuse_data *data = otick->tk_data; struct fuse_ticket *tick, *xtick; struct ucred reused_creds; gid_t reused_groups[1]; if (otick->irq_unique == 0) { /* * If the daemon hasn't yet received otick, then we can answer * it ourselves and return. */ fuse_lck_mtx_lock(data->ms_mtx); STAILQ_FOREACH_SAFE(tick, &otick->tk_data->ms_head, tk_ms_link, xtick) { if (tick == otick) { STAILQ_REMOVE(&otick->tk_data->ms_head, tick, fuse_ticket, tk_ms_link); otick->tk_data->ms_count--; otick->tk_ms_link.stqe_next = NULL; fuse_lck_mtx_unlock(data->ms_mtx); fuse_lck_mtx_lock(otick->tk_aw_mtx); if (!fticket_answered(otick)) { fticket_set_answered(otick); otick->tk_aw_errno = err; wakeup(otick); } fuse_lck_mtx_unlock(otick->tk_aw_mtx); fuse_ticket_drop(tick); return; } } fuse_lck_mtx_unlock(data->ms_mtx); /* * If the fuse daemon doesn't support interrupts, then there's * nothing more that we can do */ if (fsess_not_impl(data->mp, FUSE_INTERRUPT)) return; /* * If the fuse daemon has already received otick, then we must * send FUSE_INTERRUPT. */ ftick_hdr = fticket_in_header(otick); reused_creds.cr_uid = ftick_hdr->uid; reused_groups[0] = ftick_hdr->gid; reused_creds.cr_groups = reused_groups; fdisp_init(&fdi, sizeof(*fii)); fdisp_make_pid(&fdi, FUSE_INTERRUPT, data, ftick_hdr->nodeid, ftick_hdr->pid, &reused_creds); fii = fdi.indata; fii->unique = otick->tk_unique; fuse_insert_callback(fdi.tick, fuse_interrupt_callback); otick->irq_unique = fdi.tick->tk_unique; /* Interrupt ops should be delivered ASAP */ fuse_insert_message(fdi.tick, true); fdisp_destroy(&fdi); } else { /* This ticket has already been interrupted */ } } void fiov_init(struct fuse_iov *fiov, size_t size) { uint32_t msize = FU_AT_LEAST(size); fiov->len = 0; fiov->base = malloc(msize, M_FUSEMSG, M_WAITOK | M_ZERO); fiov->allocated_size = msize; fiov->credit = fuse_iov_credit; } void fiov_teardown(struct fuse_iov *fiov) { MPASS(fiov->base != NULL); free(fiov->base, M_FUSEMSG); } void fiov_adjust(struct fuse_iov *fiov, size_t size) { if (fiov->allocated_size < size || (fuse_iov_permanent_bufsize >= 0 && fiov->allocated_size - size > fuse_iov_permanent_bufsize && --fiov->credit < 0)) { fiov->base = realloc(fiov->base, FU_AT_LEAST(size), M_FUSEMSG, M_WAITOK | M_ZERO); if (!fiov->base) { panic("FUSE: realloc failed"); } fiov->allocated_size = FU_AT_LEAST(size); fiov->credit = fuse_iov_credit; /* Clear data buffer after reallocation */ bzero(fiov->base, size); } else if (size > fiov->len) { /* Clear newly extended portion of data buffer */ bzero((char*)fiov->base + fiov->len, size - fiov->len); } fiov->len = size; } /* Resize the fiov if needed, and clear it's buffer */ void fiov_refresh(struct fuse_iov *fiov) { fiov_adjust(fiov, 0); } static int fticket_ctor(void *mem, int size, void *arg, int flags) { struct fuse_ticket *ftick = mem; struct fuse_data *data = arg; FUSE_ASSERT_MS_DONE(ftick); FUSE_ASSERT_AW_DONE(ftick); ftick->tk_data = data; if (ftick->tk_unique != 0) fticket_refresh(ftick); /* May be truncated to 32 bits */ ftick->tk_unique = atomic_fetchadd_long(&data->ticketer, 1); if (ftick->tk_unique == 0) ftick->tk_unique = atomic_fetchadd_long(&data->ticketer, 1); ftick->irq_unique = 0; refcount_init(&ftick->tk_refcount, 1); counter_u64_add(fuse_ticket_count, 1); return 0; } static void fticket_dtor(void *mem, int size, void *arg) { #ifdef INVARIANTS struct fuse_ticket *ftick = mem; #endif FUSE_ASSERT_MS_DONE(ftick); FUSE_ASSERT_AW_DONE(ftick); counter_u64_add(fuse_ticket_count, -1); } static int fticket_init(void *mem, int size, int flags) { struct fuse_ticket *ftick = mem; bzero(ftick, sizeof(struct fuse_ticket)); fiov_init(&ftick->tk_ms_fiov, sizeof(struct fuse_in_header)); mtx_init(&ftick->tk_aw_mtx, "fuse answer delivery mutex", NULL, MTX_DEF); fiov_init(&ftick->tk_aw_fiov, 0); return 0; } static void fticket_fini(void *mem, int size) { struct fuse_ticket *ftick = mem; fiov_teardown(&ftick->tk_ms_fiov); fiov_teardown(&ftick->tk_aw_fiov); mtx_destroy(&ftick->tk_aw_mtx); } static inline struct fuse_ticket * fticket_alloc(struct fuse_data *data) { return uma_zalloc_arg(ticket_zone, data, M_WAITOK); } static inline void fticket_destroy(struct fuse_ticket *ftick) { return uma_zfree(ticket_zone, ftick); } static inline void fticket_refresh(struct fuse_ticket *ftick) { FUSE_ASSERT_MS_DONE(ftick); FUSE_ASSERT_AW_DONE(ftick); fiov_refresh(&ftick->tk_ms_fiov); bzero(&ftick->tk_aw_ohead, sizeof(struct fuse_out_header)); fiov_refresh(&ftick->tk_aw_fiov); ftick->tk_aw_errno = 0; ftick->tk_flag = 0; } /* Prepar the ticket to be reused, but don't clear its data buffers */ static inline void fticket_reset(struct fuse_ticket *ftick) { FUSE_ASSERT_MS_DONE(ftick); FUSE_ASSERT_AW_DONE(ftick); bzero(&ftick->tk_aw_ohead, sizeof(struct fuse_out_header)); ftick->tk_aw_errno = 0; ftick->tk_flag = 0; } static int fticket_wait_answer(struct fuse_ticket *ftick) { struct thread *td = curthread; sigset_t blockedset, oldset; int err = 0, stops_deferred; struct fuse_data *data = ftick->tk_data; bool interrupted = false; if (fsess_maybe_impl(ftick->tk_data->mp, FUSE_INTERRUPT) && data->dataflags & FSESS_INTR) { SIGEMPTYSET(blockedset); } else { /* Block all signals except (implicitly) SIGKILL */ SIGFILLSET(blockedset); } stops_deferred = sigdeferstop(SIGDEFERSTOP_SILENT); kern_sigprocmask(td, SIG_BLOCK, NULL, &oldset, 0); fuse_lck_mtx_lock(ftick->tk_aw_mtx); retry: if (fticket_answered(ftick)) { goto out; } if (fdata_get_dead(data)) { err = ENOTCONN; fticket_set_answered(ftick); goto out; } kern_sigprocmask(td, SIG_BLOCK, &blockedset, NULL, 0); err = msleep(ftick, &ftick->tk_aw_mtx, PCATCH, "fu_ans", data->daemon_timeout * hz); kern_sigprocmask(td, SIG_SETMASK, &oldset, NULL, 0); if (err == EWOULDBLOCK) { SDT_PROBE2(fusefs, , ipc, trace, 3, "fticket_wait_answer: EWOULDBLOCK"); #ifdef XXXIP /* die conditionally */ if (!fdata_get_dead(data)) { fdata_set_dead(data); } #endif err = ETIMEDOUT; fticket_set_answered(ftick); } else if ((err == EINTR || err == ERESTART)) { /* * Whether we get EINTR or ERESTART depends on whether * SA_RESTART was set by sigaction(2). * * Try to interrupt the operation and wait for an EINTR response * to the original operation. If the file system does not * support FUSE_INTERRUPT, then we'll just wait for it to * complete like normal. If it does support FUSE_INTERRUPT, * then it will either respond EINTR to the original operation, * or EAGAIN to the interrupt. */ sigset_t tmpset; SDT_PROBE2(fusefs, , ipc, trace, 4, "fticket_wait_answer: interrupt"); fuse_lck_mtx_unlock(ftick->tk_aw_mtx); fuse_interrupt_send(ftick, err); PROC_LOCK(td->td_proc); mtx_lock(&td->td_proc->p_sigacts->ps_mtx); tmpset = td->td_proc->p_siglist; SIGSETOR(tmpset, td->td_siglist); mtx_unlock(&td->td_proc->p_sigacts->ps_mtx); PROC_UNLOCK(td->td_proc); fuse_lck_mtx_lock(ftick->tk_aw_mtx); if (!interrupted && !SIGISMEMBER(tmpset, SIGKILL)) { /* * Block all signals while we wait for an interrupt * response. The protocol doesn't discriminate between * different signals. */ SIGFILLSET(blockedset); interrupted = true; goto retry; } else { /* * Return immediately for fatal signals, or if this is * the second interruption. We should only be * interrupted twice if the thread is stopped, for * example during sigexit. */ } } else if (err) { SDT_PROBE2(fusefs, , ipc, trace, 6, "fticket_wait_answer: other error"); } else { SDT_PROBE2(fusefs, , ipc, trace, 7, "fticket_wait_answer: OK"); } out: if (!(err || fticket_answered(ftick))) { SDT_PROBE2(fusefs, , ipc, trace, 1, "FUSE: requester was woken up but still no answer"); err = ENXIO; } fuse_lck_mtx_unlock(ftick->tk_aw_mtx); sigallowstop(stops_deferred); return err; } static inline int fticket_aw_pull_uio(struct fuse_ticket *ftick, struct uio *uio) { int err = 0; size_t len = uio_resid(uio); if (len) { fiov_adjust(fticket_resp(ftick), len); err = uiomove(fticket_resp(ftick)->base, len, uio); } return err; } int fticket_pull(struct fuse_ticket *ftick, struct uio *uio) { int err = 0; if (ftick->tk_aw_ohead.error) { return 0; } err = fuse_body_audit(ftick, uio_resid(uio)); if (!err) { err = fticket_aw_pull_uio(ftick, uio); } return err; } struct fuse_data * fdata_alloc(struct cdev *fdev, struct ucred *cred) { struct fuse_data *data; data = malloc(sizeof(struct fuse_data), M_FUSEMSG, M_WAITOK | M_ZERO); data->fdev = fdev; mtx_init(&data->ms_mtx, "fuse message list mutex", NULL, MTX_DEF); STAILQ_INIT(&data->ms_head); data->ms_count = 0; knlist_init_mtx(&data->ks_rsel.si_note, &data->ms_mtx); mtx_init(&data->aw_mtx, "fuse answer list mutex", NULL, MTX_DEF); TAILQ_INIT(&data->aw_head); data->daemoncred = crhold(cred); data->daemon_timeout = FUSE_DEFAULT_DAEMON_TIMEOUT; sx_init(&data->rename_lock, "fuse rename lock"); data->ref = 1; return data; } void fdata_trydestroy(struct fuse_data *data) { data->ref--; MPASS(data->ref >= 0); if (data->ref != 0) return; /* Driving off stage all that stuff thrown at device... */ sx_destroy(&data->rename_lock); crfree(data->daemoncred); mtx_destroy(&data->aw_mtx); knlist_delete(&data->ks_rsel.si_note, curthread, 0); knlist_destroy(&data->ks_rsel.si_note); mtx_destroy(&data->ms_mtx); free(data, M_FUSEMSG); } void fdata_set_dead(struct fuse_data *data) { FUSE_LOCK(); if (fdata_get_dead(data)) { FUSE_UNLOCK(); return; } fuse_lck_mtx_lock(data->ms_mtx); data->dataflags |= FSESS_DEAD; wakeup_one(data); selwakeuppri(&data->ks_rsel, PZERO + 1); wakeup(&data->ticketer); fuse_lck_mtx_unlock(data->ms_mtx); FUSE_UNLOCK(); } struct fuse_ticket * fuse_ticket_fetch(struct fuse_data *data) { int err = 0; struct fuse_ticket *ftick; ftick = fticket_alloc(data); if (!(data->dataflags & FSESS_INITED)) { /* Sleep until get answer for INIT messsage */ FUSE_LOCK(); if (!(data->dataflags & FSESS_INITED) && data->ticketer > 2) { err = msleep(&data->ticketer, &fuse_mtx, PCATCH | PDROP, "fu_ini", 0); if (err) fdata_set_dead(data); } else FUSE_UNLOCK(); } return ftick; } int fuse_ticket_drop(struct fuse_ticket *ftick) { int die; die = refcount_release(&ftick->tk_refcount); if (die) fticket_destroy(ftick); return die; } void fuse_insert_callback(struct fuse_ticket *ftick, fuse_handler_t * handler) { if (fdata_get_dead(ftick->tk_data)) { return; } ftick->tk_aw_handler = handler; fuse_lck_mtx_lock(ftick->tk_data->aw_mtx); fuse_aw_push(ftick); fuse_lck_mtx_unlock(ftick->tk_data->aw_mtx); } /* * Insert a new upgoing ticket into the message queue * * If urgent is true, insert at the front of the queue. Otherwise, insert in * FIFO order. */ void fuse_insert_message(struct fuse_ticket *ftick, bool urgent) { if (ftick->tk_flag & FT_DIRTY) { panic("FUSE: ticket reused without being refreshed"); } ftick->tk_flag |= FT_DIRTY; if (fdata_get_dead(ftick->tk_data)) { return; } fuse_lck_mtx_lock(ftick->tk_data->ms_mtx); if (urgent) fuse_ms_push_head(ftick); else fuse_ms_push(ftick); wakeup_one(ftick->tk_data); selwakeuppri(&ftick->tk_data->ks_rsel, PZERO + 1); KNOTE_LOCKED(&ftick->tk_data->ks_rsel.si_note, 0); fuse_lck_mtx_unlock(ftick->tk_data->ms_mtx); } static int fuse_body_audit(struct fuse_ticket *ftick, size_t blen) { int err = 0; enum fuse_opcode opcode; opcode = fticket_opcode(ftick); switch (opcode) { case FUSE_BMAP: err = (blen == sizeof(struct fuse_bmap_out)) ? 0 : EINVAL; break; case FUSE_LINK: case FUSE_LOOKUP: case FUSE_MKDIR: case FUSE_MKNOD: case FUSE_SYMLINK: if (fuse_libabi_geq(ftick->tk_data, 7, 9)) { err = (blen == sizeof(struct fuse_entry_out)) ? 0 : EINVAL; } else { err = (blen == FUSE_COMPAT_ENTRY_OUT_SIZE) ? 0 : EINVAL; } break; case FUSE_FORGET: panic("FUSE: a handler has been intalled for FUSE_FORGET"); break; case FUSE_GETATTR: case FUSE_SETATTR: if (fuse_libabi_geq(ftick->tk_data, 7, 9)) { err = (blen == sizeof(struct fuse_attr_out)) ? 0 : EINVAL; } else { err = (blen == FUSE_COMPAT_ATTR_OUT_SIZE) ? 0 : EINVAL; } break; case FUSE_READLINK: err = (PAGE_SIZE >= blen) ? 0 : EINVAL; break; case FUSE_UNLINK: err = (blen == 0) ? 0 : EINVAL; break; case FUSE_RMDIR: err = (blen == 0) ? 0 : EINVAL; break; case FUSE_RENAME: err = (blen == 0) ? 0 : EINVAL; break; case FUSE_OPEN: err = (blen == sizeof(struct fuse_open_out)) ? 0 : EINVAL; break; case FUSE_READ: err = (((struct fuse_read_in *)( (char *)ftick->tk_ms_fiov.base + sizeof(struct fuse_in_header) ))->size >= blen) ? 0 : EINVAL; break; case FUSE_WRITE: err = (blen == sizeof(struct fuse_write_out)) ? 0 : EINVAL; break; case FUSE_STATFS: if (fuse_libabi_geq(ftick->tk_data, 7, 4)) { err = (blen == sizeof(struct fuse_statfs_out)) ? 0 : EINVAL; } else { err = (blen == FUSE_COMPAT_STATFS_SIZE) ? 0 : EINVAL; } break; case FUSE_RELEASE: err = (blen == 0) ? 0 : EINVAL; break; case FUSE_FSYNC: err = (blen == 0) ? 0 : EINVAL; break; case FUSE_SETXATTR: err = (blen == 0) ? 0 : EINVAL; break; case FUSE_GETXATTR: case FUSE_LISTXATTR: /* * These can have varying response lengths, and 0 length * isn't necessarily invalid. */ err = 0; break; case FUSE_REMOVEXATTR: err = (blen == 0) ? 0 : EINVAL; break; case FUSE_FLUSH: err = (blen == 0) ? 0 : EINVAL; break; case FUSE_INIT: if (blen == sizeof(struct fuse_init_out) || blen == FUSE_COMPAT_INIT_OUT_SIZE || blen == FUSE_COMPAT_22_INIT_OUT_SIZE) { err = 0; } else { err = EINVAL; } break; case FUSE_OPENDIR: err = (blen == sizeof(struct fuse_open_out)) ? 0 : EINVAL; break; case FUSE_READDIR: err = (((struct fuse_read_in *)( (char *)ftick->tk_ms_fiov.base + sizeof(struct fuse_in_header) ))->size >= blen) ? 0 : EINVAL; break; case FUSE_RELEASEDIR: err = (blen == 0) ? 0 : EINVAL; break; case FUSE_FSYNCDIR: err = (blen == 0) ? 0 : EINVAL; break; case FUSE_GETLK: err = (blen == sizeof(struct fuse_lk_out)) ? 0 : EINVAL; break; case FUSE_SETLK: err = (blen == 0) ? 0 : EINVAL; break; case FUSE_SETLKW: err = (blen == 0) ? 0 : EINVAL; break; case FUSE_ACCESS: err = (blen == 0) ? 0 : EINVAL; break; case FUSE_CREATE: if (fuse_libabi_geq(ftick->tk_data, 7, 9)) { err = (blen == sizeof(struct fuse_entry_out) + sizeof(struct fuse_open_out)) ? 0 : EINVAL; } else { err = (blen == FUSE_COMPAT_ENTRY_OUT_SIZE + sizeof(struct fuse_open_out)) ? 0 : EINVAL; } break; case FUSE_DESTROY: err = (blen == 0) ? 0 : EINVAL; break; case FUSE_LSEEK: err = (blen == sizeof(struct fuse_lseek_out)) ? 0 : EINVAL; break; + case FUSE_COPY_FILE_RANGE: + err = (blen == sizeof(struct fuse_write_out)) ? 0 : EINVAL; + break; + default: panic("FUSE: opcodes out of sync (%d)\n", opcode); } return err; } static inline void fuse_setup_ihead(struct fuse_in_header *ihead, struct fuse_ticket *ftick, uint64_t nid, enum fuse_opcode op, size_t blen, pid_t pid, struct ucred *cred) { ihead->len = sizeof(*ihead) + blen; ihead->unique = ftick->tk_unique; ihead->nodeid = nid; ihead->opcode = op; ihead->pid = pid; ihead->uid = cred->cr_uid; ihead->gid = cred->cr_groups[0]; } /* * fuse_standard_handler just pulls indata and wakes up pretender. * Doesn't try to interpret data, that's left for the pretender. * Though might do a basic size verification before the pull-in takes place */ static int fuse_standard_handler(struct fuse_ticket *ftick, struct uio *uio) { int err = 0; err = fticket_pull(ftick, uio); fuse_lck_mtx_lock(ftick->tk_aw_mtx); if (!fticket_answered(ftick)) { fticket_set_answered(ftick); ftick->tk_aw_errno = err; wakeup(ftick); } fuse_lck_mtx_unlock(ftick->tk_aw_mtx); return err; } /* * Reinitialize a dispatcher from a pid and node id, without resizing or * clearing its data buffers */ static void fdisp_refresh_pid(struct fuse_dispatcher *fdip, enum fuse_opcode op, struct mount *mp, uint64_t nid, pid_t pid, struct ucred *cred) { MPASS(fdip->tick); MPASS2(sizeof(fdip->finh) + fdip->iosize <= fdip->tick->tk_ms_fiov.len, "Must use fdisp_make_pid to increase the size of the fiov"); fticket_reset(fdip->tick); FUSE_DIMALLOC(&fdip->tick->tk_ms_fiov, fdip->finh, fdip->indata, fdip->iosize); fuse_setup_ihead(fdip->finh, fdip->tick, nid, op, fdip->iosize, pid, cred); } /* Initialize a dispatcher from a pid and node id */ static void fdisp_make_pid(struct fuse_dispatcher *fdip, enum fuse_opcode op, struct fuse_data *data, uint64_t nid, pid_t pid, struct ucred *cred) { if (fdip->tick) { fticket_refresh(fdip->tick); } else { fdip->tick = fuse_ticket_fetch(data); } /* FUSE_DIMALLOC will bzero the fiovs when it enlarges them */ FUSE_DIMALLOC(&fdip->tick->tk_ms_fiov, fdip->finh, fdip->indata, fdip->iosize); fuse_setup_ihead(fdip->finh, fdip->tick, nid, op, fdip->iosize, pid, cred); } void fdisp_make(struct fuse_dispatcher *fdip, enum fuse_opcode op, struct mount *mp, uint64_t nid, struct thread *td, struct ucred *cred) { struct fuse_data *data = fuse_get_mpdata(mp); RECTIFY_TDCR(td, cred); return fdisp_make_pid(fdip, op, data, nid, td->td_proc->p_pid, cred); } void fdisp_make_vp(struct fuse_dispatcher *fdip, enum fuse_opcode op, struct vnode *vp, struct thread *td, struct ucred *cred) { struct mount *mp = vnode_mount(vp); struct fuse_data *data = fuse_get_mpdata(mp); RECTIFY_TDCR(td, cred); return fdisp_make_pid(fdip, op, data, VTOI(vp), td->td_proc->p_pid, cred); } /* Refresh a fuse_dispatcher so it can be reused, but don't zero its data */ void fdisp_refresh_vp(struct fuse_dispatcher *fdip, enum fuse_opcode op, struct vnode *vp, struct thread *td, struct ucred *cred) { RECTIFY_TDCR(td, cred); return fdisp_refresh_pid(fdip, op, vnode_mount(vp), VTOI(vp), td->td_proc->p_pid, cred); } void fdisp_refresh(struct fuse_dispatcher *fdip) { fticket_refresh(fdip->tick); } SDT_PROBE_DEFINE2(fusefs, , ipc, fdisp_wait_answ_error, "char*", "int"); int fdisp_wait_answ(struct fuse_dispatcher *fdip) { int err = 0; fdip->answ_stat = 0; fuse_insert_callback(fdip->tick, fuse_standard_handler); fuse_insert_message(fdip->tick, false); if ((err = fticket_wait_answer(fdip->tick))) { fuse_lck_mtx_lock(fdip->tick->tk_aw_mtx); if (fticket_answered(fdip->tick)) { /* * Just between noticing the interrupt and getting here, * the standard handler has completed his job. * So we drop the ticket and exit as usual. */ SDT_PROBE2(fusefs, , ipc, fdisp_wait_answ_error, "IPC: interrupted, already answered", err); fuse_lck_mtx_unlock(fdip->tick->tk_aw_mtx); goto out; } else { /* * So we were faster than the standard handler. * Then by setting the answered flag we get *him* * to drop the ticket. */ SDT_PROBE2(fusefs, , ipc, fdisp_wait_answ_error, "IPC: interrupted, setting to answered", err); fticket_set_answered(fdip->tick); fuse_lck_mtx_unlock(fdip->tick->tk_aw_mtx); return err; } } if (fdip->tick->tk_aw_errno == ENOTCONN) { /* The daemon died while we were waiting for a response */ err = ENOTCONN; goto out; } else if (fdip->tick->tk_aw_errno) { /* * There was some sort of communication error with the daemon * that the client wouldn't understand. */ SDT_PROBE2(fusefs, , ipc, fdisp_wait_answ_error, "IPC: explicit EIO-ing", fdip->tick->tk_aw_errno); err = EIO; goto out; } if ((err = fdip->tick->tk_aw_ohead.error)) { SDT_PROBE2(fusefs, , ipc, fdisp_wait_answ_error, "IPC: setting status", fdip->tick->tk_aw_ohead.error); /* * This means a "proper" fuse syscall error. * We record this value so the caller will * be able to know it's not a boring messaging * failure, if she wishes so (and if not, she can * just simply propagate the return value of this routine). * [XXX Maybe a bitflag would do the job too, * if other flags needed, this will be converted thusly.] */ fdip->answ_stat = err; goto out; } fdip->answ = fticket_resp(fdip->tick)->base; fdip->iosize = fticket_resp(fdip->tick)->len; return 0; out: return err; } void fuse_ipc_init(void) { ticket_zone = uma_zcreate("fuse_ticket", sizeof(struct fuse_ticket), fticket_ctor, fticket_dtor, fticket_init, fticket_fini, UMA_ALIGN_PTR, 0); fuse_ticket_count = counter_u64_alloc(M_WAITOK); } void fuse_ipc_destroy(void) { counter_u64_free(fuse_ticket_count); uma_zdestroy(ticket_zone); } diff --git a/sys/fs/fuse/fuse_kernel.h b/sys/fs/fuse/fuse_kernel.h index 6e97b04a733f..14cf4fabac14 100644 --- a/sys/fs/fuse/fuse_kernel.h +++ b/sys/fs/fuse/fuse_kernel.h @@ -1,769 +1,814 @@ -/*-- +/*- + * SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) + * * This file defines the kernel interface of FUSE * Copyright (C) 2001-2008 Miklos Szeredi * * This program can be distributed under the terms of the GNU GPL. * See the file COPYING. * * This -- and only this -- header file may also be distributed under * the terms of the BSD Licence as follows: * * Copyright (C) 2001-2007 Miklos Szeredi. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * This file defines the kernel interface of FUSE * * Protocol changelog: * * 7.9: * - new fuse_getattr_in input argument of GETATTR * - add lk_flags in fuse_lk_in * - add lock_owner field to fuse_setattr_in, fuse_read_in and fuse_write_in * - add blksize field to fuse_attr * - add file flags field to fuse_read_in and fuse_write_in * * 7.10 * - add nonseekable open flag * * 7.11 * - add IOCTL message * - add unsolicited notification support * * 7.12 * - add umask flag to input argument of open, mknod and mkdir * - add notification messages for invalidation of inodes and * directory entries * * 7.13 * - make max number of background requests and congestion threshold * tunables * * 7.14 * - add splice support to fuse device * * 7.15 * - add store notify * - add retrieve notify * * 7.16 * - add BATCH_FORGET request * - FUSE_IOCTL_UNRESTRICTED shall now return with array of 'struct * fuse_ioctl_iovec' instead of ambiguous 'struct iovec' * - add FUSE_IOCTL_32BIT flag * * 7.17 * - add FUSE_FLOCK_LOCKS and FUSE_RELEASE_FLOCK_UNLOCK * * 7.18 * - add FUSE_IOCTL_DIR flag * - add FUSE_NOTIFY_DELETE * * 7.19 * - add FUSE_FALLOCATE * * 7.20 * - add FUSE_AUTO_INVAL_DATA * 7.21 * - add FUSE_READDIRPLUS * - send the requested events in POLL request * * 7.22 * - add FUSE_ASYNC_DIO * * 7.23 * - add FUSE_WRITEBACK_CACHE * - add time_gran to fuse_init_out * - add reserved space to fuse_init_out * - add FATTR_CTIME * - add ctime and ctimensec to fuse_setattr_in * - add FUSE_RENAME2 request * - add FUSE_NO_OPEN_SUPPORT flag * * 7.24 * - add FUSE_LSEEK for SEEK_HOLE and SEEK_DATA support + * + * 7.25 + * - add FUSE_PARALLEL_DIROPS + * + * 7.26 + * - add FUSE_HANDLE_KILLPRIV + * - add FUSE_POSIX_ACL + * + * 7.27 + * - add FUSE_ABORT_ERROR + * + * 7.28 + * - add FUSE_COPY_FILE_RANGE + * - add FOPEN_CACHE_DIR + * - add FUSE_MAX_PAGES, add max_pages to init_out + * - add FUSE_CACHE_SYMLINKS */ #ifndef _FUSE_FUSE_KERNEL_H #define _FUSE_FUSE_KERNEL_H #ifdef __linux__ #include #else #include #endif /** Version number of this interface */ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 24 +#define FUSE_KERNEL_MINOR_VERSION 28 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 /* Make sure all structures are padded to 64bit boundary, so 32bit userspace works under 64bit kernels */ struct fuse_attr { uint64_t ino; uint64_t size; uint64_t blocks; uint64_t atime; uint64_t mtime; uint64_t ctime; uint32_t atimensec; uint32_t mtimensec; uint32_t ctimensec; uint32_t mode; uint32_t nlink; uint32_t uid; uint32_t gid; uint32_t rdev; uint32_t blksize; uint32_t padding; }; struct fuse_kstatfs { uint64_t blocks; uint64_t bfree; uint64_t bavail; uint64_t files; uint64_t ffree; uint32_t bsize; uint32_t namelen; uint32_t frsize; uint32_t padding; uint32_t spare[6]; }; struct fuse_file_lock { uint64_t start; uint64_t end; uint32_t type; uint32_t pid; /* tgid */ }; /** * Bitmasks for fuse_setattr_in.valid */ #define FATTR_MODE (1 << 0) #define FATTR_UID (1 << 1) #define FATTR_GID (1 << 2) #define FATTR_SIZE (1 << 3) #define FATTR_ATIME (1 << 4) #define FATTR_MTIME (1 << 5) #define FATTR_FH (1 << 6) #define FATTR_ATIME_NOW (1 << 7) #define FATTR_MTIME_NOW (1 << 8) #define FATTR_LOCKOWNER (1 << 9) #define FATTR_CTIME (1 << 10) /** * Flags returned by the OPEN request * * FOPEN_DIRECT_IO: bypass page cache for this open file * FOPEN_KEEP_CACHE: don't invalidate the data cache on open * FOPEN_NONSEEKABLE: the file is not seekable + * FOPEN_CACHE_DIR: allow caching this directory */ #define FOPEN_DIRECT_IO (1 << 0) #define FOPEN_KEEP_CACHE (1 << 1) #define FOPEN_NONSEEKABLE (1 << 2) +#define FOPEN_CACHE_DIR (1 << 3) /** * INIT request/reply flags * * FUSE_ASYNC_READ: asynchronous read requests * FUSE_POSIX_LOCKS: remote locking for POSIX file locks * FUSE_FILE_OPS: kernel sends file handle for fstat, etc... (not yet supported) * FUSE_ATOMIC_O_TRUNC: handles the O_TRUNC open flag in the filesystem * FUSE_EXPORT_SUPPORT: filesystem handles lookups of "." and ".." * FUSE_BIG_WRITES: filesystem can handle write size larger than 4kB * FUSE_DONT_MASK: don't apply umask to file mode on create operations * FUSE_SPLICE_WRITE: kernel supports splice write on the device * FUSE_SPLICE_MOVE: kernel supports splice move on the device * FUSE_SPLICE_READ: kernel supports splice read on the device * FUSE_FLOCK_LOCKS: remote locking for BSD style file locks * FUSE_HAS_IOCTL_DIR: kernel supports ioctl on directories * FUSE_AUTO_INVAL_DATA: automatically invalidate cached pages * FUSE_DO_READDIRPLUS: do READDIRPLUS (READDIR+LOOKUP in one) * FUSE_READDIRPLUS_AUTO: adaptive readdirplus * FUSE_ASYNC_DIO: asynchronous direct I/O submission * FUSE_WRITEBACK_CACHE: use writeback cache for buffered writes * FUSE_NO_OPEN_SUPPORT: kernel supports zero-message opens + * FUSE_PARALLEL_DIROPS: allow parallel lookups and readdir + * FUSE_HANDLE_KILLPRIV: fs handles killing suid/sgid/cap on write/chown/trunc + * FUSE_POSIX_ACL: filesystem supports posix acls + * FUSE_ABORT_ERROR: reading the device after abort returns ECONNABORTED + * FUSE_MAX_PAGES: init_out.max_pages contains the max number of req pages + * FUSE_CACHE_SYMLINKS: cache READLINK responses */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) #define FUSE_FILE_OPS (1 << 2) #define FUSE_ATOMIC_O_TRUNC (1 << 3) #define FUSE_EXPORT_SUPPORT (1 << 4) #define FUSE_BIG_WRITES (1 << 5) #define FUSE_DONT_MASK (1 << 6) #define FUSE_SPLICE_WRITE (1 << 7) #define FUSE_SPLICE_MOVE (1 << 8) #define FUSE_SPLICE_READ (1 << 9) #define FUSE_FLOCK_LOCKS (1 << 10) #define FUSE_HAS_IOCTL_DIR (1 << 11) #define FUSE_AUTO_INVAL_DATA (1 << 12) #define FUSE_DO_READDIRPLUS (1 << 13) #define FUSE_READDIRPLUS_AUTO (1 << 14) #define FUSE_ASYNC_DIO (1 << 15) #define FUSE_WRITEBACK_CACHE (1 << 16) #define FUSE_NO_OPEN_SUPPORT (1 << 17) +#define FUSE_PARALLEL_DIROPS (1 << 18) +#define FUSE_HANDLE_KILLPRIV (1 << 19) +#define FUSE_POSIX_ACL (1 << 20) +#define FUSE_ABORT_ERROR (1 << 21) +#define FUSE_MAX_PAGES (1 << 22) +#define FUSE_CACHE_SYMLINKS (1 << 23) #ifdef linux /** * CUSE INIT request/reply flags * * CUSE_UNRESTRICTED_IOCTL: use unrestricted ioctl */ #define CUSE_UNRESTRICTED_IOCTL (1 << 0) #endif /* linux */ /** * Release flags */ #define FUSE_RELEASE_FLUSH (1 << 0) #define FUSE_RELEASE_FLOCK_UNLOCK (1 << 1) /** * Getattr flags */ #define FUSE_GETATTR_FH (1 << 0) /** * Lock flags */ #define FUSE_LK_FLOCK (1 << 0) /** * WRITE flags * * FUSE_WRITE_CACHE: delayed write from page cache, file handle is guessed * FUSE_WRITE_LOCKOWNER: lock_owner field is valid */ #define FUSE_WRITE_CACHE (1 << 0) #define FUSE_WRITE_LOCKOWNER (1 << 1) /** * Read flags */ #define FUSE_READ_LOCKOWNER (1 << 1) /** * Ioctl flags * * FUSE_IOCTL_COMPAT: 32bit compat ioctl on 64bit machine * FUSE_IOCTL_UNRESTRICTED: not restricted to well-formed ioctls, retry allowed * FUSE_IOCTL_RETRY: retry with new iovecs * FUSE_IOCTL_32BIT: 32bit ioctl * FUSE_IOCTL_DIR: is a directory * * FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs */ #define FUSE_IOCTL_COMPAT (1 << 0) #define FUSE_IOCTL_UNRESTRICTED (1 << 1) #define FUSE_IOCTL_RETRY (1 << 2) #define FUSE_IOCTL_32BIT (1 << 3) #define FUSE_IOCTL_DIR (1 << 4) #define FUSE_IOCTL_MAX_IOV 256 /** * Poll flags * * FUSE_POLL_SCHEDULE_NOTIFY: request poll notify */ #define FUSE_POLL_SCHEDULE_NOTIFY (1 << 0) enum fuse_opcode { - FUSE_LOOKUP = 1, - FUSE_FORGET = 2, /* no reply */ - FUSE_GETATTR = 3, - FUSE_SETATTR = 4, - FUSE_READLINK = 5, - FUSE_SYMLINK = 6, - FUSE_MKNOD = 8, - FUSE_MKDIR = 9, - FUSE_UNLINK = 10, - FUSE_RMDIR = 11, - FUSE_RENAME = 12, - FUSE_LINK = 13, - FUSE_OPEN = 14, - FUSE_READ = 15, - FUSE_WRITE = 16, - FUSE_STATFS = 17, - FUSE_RELEASE = 18, - FUSE_FSYNC = 20, - FUSE_SETXATTR = 21, - FUSE_GETXATTR = 22, - FUSE_LISTXATTR = 23, - FUSE_REMOVEXATTR = 24, - FUSE_FLUSH = 25, - FUSE_INIT = 26, - FUSE_OPENDIR = 27, - FUSE_READDIR = 28, - FUSE_RELEASEDIR = 29, - FUSE_FSYNCDIR = 30, - FUSE_GETLK = 31, - FUSE_SETLK = 32, - FUSE_SETLKW = 33, - FUSE_ACCESS = 34, - FUSE_CREATE = 35, - FUSE_INTERRUPT = 36, - FUSE_BMAP = 37, - FUSE_DESTROY = 38, - FUSE_IOCTL = 39, - FUSE_POLL = 40, - FUSE_NOTIFY_REPLY = 41, - FUSE_BATCH_FORGET = 42, - FUSE_FALLOCATE = 43, - FUSE_READDIRPLUS = 44, - FUSE_RENAME2 = 45, - FUSE_LSEEK = 46, + FUSE_LOOKUP = 1, + FUSE_FORGET = 2, /* no reply */ + FUSE_GETATTR = 3, + FUSE_SETATTR = 4, + FUSE_READLINK = 5, + FUSE_SYMLINK = 6, + FUSE_MKNOD = 8, + FUSE_MKDIR = 9, + FUSE_UNLINK = 10, + FUSE_RMDIR = 11, + FUSE_RENAME = 12, + FUSE_LINK = 13, + FUSE_OPEN = 14, + FUSE_READ = 15, + FUSE_WRITE = 16, + FUSE_STATFS = 17, + FUSE_RELEASE = 18, + FUSE_FSYNC = 20, + FUSE_SETXATTR = 21, + FUSE_GETXATTR = 22, + FUSE_LISTXATTR = 23, + FUSE_REMOVEXATTR = 24, + FUSE_FLUSH = 25, + FUSE_INIT = 26, + FUSE_OPENDIR = 27, + FUSE_READDIR = 28, + FUSE_RELEASEDIR = 29, + FUSE_FSYNCDIR = 30, + FUSE_GETLK = 31, + FUSE_SETLK = 32, + FUSE_SETLKW = 33, + FUSE_ACCESS = 34, + FUSE_CREATE = 35, + FUSE_INTERRUPT = 36, + FUSE_BMAP = 37, + FUSE_DESTROY = 38, + FUSE_IOCTL = 39, + FUSE_POLL = 40, + FUSE_NOTIFY_REPLY = 41, + FUSE_BATCH_FORGET = 42, + FUSE_FALLOCATE = 43, + FUSE_READDIRPLUS = 44, + FUSE_RENAME2 = 45, + FUSE_LSEEK = 46, + FUSE_COPY_FILE_RANGE = 47, #ifdef linux /* CUSE specific operations */ - CUSE_INIT = 4096, + CUSE_INIT = 4096, #endif /* linux */ }; enum fuse_notify_code { FUSE_NOTIFY_POLL = 1, FUSE_NOTIFY_INVAL_INODE = 2, FUSE_NOTIFY_INVAL_ENTRY = 3, FUSE_NOTIFY_STORE = 4, FUSE_NOTIFY_RETRIEVE = 5, FUSE_NOTIFY_DELETE = 6, FUSE_NOTIFY_CODE_MAX, }; /* The read buffer is required to be at least 8k, but may be much larger */ #define FUSE_MIN_READ_BUFFER 8192 #define FUSE_COMPAT_ENTRY_OUT_SIZE 120 struct fuse_entry_out { uint64_t nodeid; /* Inode ID */ uint64_t generation; /* Inode generation: nodeid:gen must be unique for the fs's lifetime */ uint64_t entry_valid; /* Cache timeout for the name */ uint64_t attr_valid; /* Cache timeout for the attributes */ uint32_t entry_valid_nsec; uint32_t attr_valid_nsec; struct fuse_attr attr; }; struct fuse_forget_in { uint64_t nlookup; }; struct fuse_forget_one { uint64_t nodeid; uint64_t nlookup; }; struct fuse_batch_forget_in { uint32_t count; uint32_t dummy; }; struct fuse_getattr_in { uint32_t getattr_flags; uint32_t dummy; uint64_t fh; }; #define FUSE_COMPAT_ATTR_OUT_SIZE 96 struct fuse_attr_out { uint64_t attr_valid; /* Cache timeout for the attributes */ uint32_t attr_valid_nsec; uint32_t dummy; struct fuse_attr attr; }; #define FUSE_COMPAT_MKNOD_IN_SIZE 8 struct fuse_mknod_in { uint32_t mode; uint32_t rdev; uint32_t umask; uint32_t padding; }; struct fuse_mkdir_in { uint32_t mode; uint32_t umask; }; struct fuse_rename_in { uint64_t newdir; }; struct fuse_rename2_in { uint64_t newdir; uint32_t flags; uint32_t padding; }; struct fuse_link_in { uint64_t oldnodeid; }; struct fuse_setattr_in { uint32_t valid; uint32_t padding; uint64_t fh; uint64_t size; uint64_t lock_owner; uint64_t atime; uint64_t mtime; uint64_t ctime; uint32_t atimensec; uint32_t mtimensec; uint32_t ctimensec; uint32_t mode; uint32_t unused4; uint32_t uid; uint32_t gid; uint32_t unused5; }; struct fuse_open_in { uint32_t flags; uint32_t unused; }; struct fuse_create_in { uint32_t flags; uint32_t mode; uint32_t umask; uint32_t padding; }; struct fuse_open_out { uint64_t fh; uint32_t open_flags; uint32_t padding; }; struct fuse_release_in { uint64_t fh; uint32_t flags; uint32_t release_flags; uint64_t lock_owner; }; struct fuse_flush_in { uint64_t fh; uint32_t unused; uint32_t padding; uint64_t lock_owner; }; struct fuse_read_in { uint64_t fh; uint64_t offset; uint32_t size; uint32_t read_flags; uint64_t lock_owner; uint32_t flags; uint32_t padding; }; #define FUSE_COMPAT_WRITE_IN_SIZE 24 struct fuse_write_in { uint64_t fh; uint64_t offset; uint32_t size; uint32_t write_flags; uint64_t lock_owner; uint32_t flags; uint32_t padding; }; struct fuse_write_out { uint32_t size; uint32_t padding; }; #define FUSE_COMPAT_STATFS_SIZE 48 struct fuse_statfs_out { struct fuse_kstatfs st; }; struct fuse_fsync_in { uint64_t fh; uint32_t fsync_flags; uint32_t padding; }; struct fuse_setxattr_in { uint32_t size; uint32_t flags; }; struct fuse_listxattr_in { uint32_t size; uint32_t padding; }; struct fuse_listxattr_out { uint32_t size; uint32_t padding; }; struct fuse_getxattr_in { uint32_t size; uint32_t padding; }; struct fuse_getxattr_out { uint32_t size; uint32_t padding; }; struct fuse_lk_in { uint64_t fh; uint64_t owner; struct fuse_file_lock lk; uint32_t lk_flags; uint32_t padding; }; struct fuse_lk_out { struct fuse_file_lock lk; }; struct fuse_access_in { uint32_t mask; uint32_t padding; }; struct fuse_init_in { uint32_t major; uint32_t minor; uint32_t max_readahead; uint32_t flags; }; #define FUSE_COMPAT_INIT_OUT_SIZE 8 #define FUSE_COMPAT_22_INIT_OUT_SIZE 24 struct fuse_init_out { uint32_t major; uint32_t minor; uint32_t max_readahead; uint32_t flags; uint16_t max_background; uint16_t congestion_threshold; uint32_t max_write; uint32_t time_gran; - uint32_t unused[9]; + uint16_t max_pages; + uint16_t padding; + uint32_t unused[8]; }; #ifdef linux #define CUSE_INIT_INFO_MAX 4096 struct cuse_init_in { uint32_t major; uint32_t minor; uint32_t unused; uint32_t flags; }; struct cuse_init_out { uint32_t major; uint32_t minor; uint32_t unused; uint32_t flags; uint32_t max_read; uint32_t max_write; uint32_t dev_major; /* chardev major */ uint32_t dev_minor; /* chardev minor */ uint32_t spare[10]; }; #endif /* linux */ struct fuse_interrupt_in { uint64_t unique; }; struct fuse_bmap_in { uint64_t block; uint32_t blocksize; uint32_t padding; }; struct fuse_bmap_out { uint64_t block; }; struct fuse_ioctl_in { uint64_t fh; uint32_t flags; uint32_t cmd; uint64_t arg; uint32_t in_size; uint32_t out_size; }; struct fuse_ioctl_iovec { uint64_t base; uint64_t len; }; struct fuse_ioctl_out { int32_t result; uint32_t flags; uint32_t in_iovs; uint32_t out_iovs; }; struct fuse_poll_in { uint64_t fh; uint64_t kh; uint32_t flags; uint32_t events; }; struct fuse_poll_out { uint32_t revents; uint32_t padding; }; struct fuse_notify_poll_wakeup_out { uint64_t kh; }; struct fuse_fallocate_in { uint64_t fh; uint64_t offset; uint64_t length; uint32_t mode; uint32_t padding; }; struct fuse_in_header { uint32_t len; uint32_t opcode; uint64_t unique; uint64_t nodeid; uint32_t uid; uint32_t gid; uint32_t pid; uint32_t padding; }; struct fuse_out_header { uint32_t len; int32_t error; uint64_t unique; }; struct fuse_dirent { uint64_t ino; uint64_t off; uint32_t namelen; uint32_t type; char name[]; }; #define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name) #define FUSE_DIRENT_ALIGN(x) \ (((x) + sizeof(uint64_t) - 1) & ~(sizeof(uint64_t) - 1)) #define FUSE_DIRENT_SIZE(d) \ FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen) struct fuse_direntplus { struct fuse_entry_out entry_out; struct fuse_dirent dirent; }; #define FUSE_NAME_OFFSET_DIRENTPLUS \ offsetof(struct fuse_direntplus, dirent.name) #define FUSE_DIRENTPLUS_SIZE(d) \ FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET_DIRENTPLUS + (d)->dirent.namelen) struct fuse_notify_inval_inode_out { uint64_t ino; int64_t off; int64_t len; }; struct fuse_notify_inval_entry_out { uint64_t parent; uint32_t namelen; uint32_t padding; }; struct fuse_notify_delete_out { uint64_t parent; uint64_t child; uint32_t namelen; uint32_t padding; }; struct fuse_notify_store_out { uint64_t nodeid; uint64_t offset; uint32_t size; uint32_t padding; }; struct fuse_notify_retrieve_out { uint64_t notify_unique; uint64_t nodeid; uint64_t offset; uint32_t size; uint32_t padding; }; /* Matches the size of fuse_write_in */ struct fuse_notify_retrieve_in { uint64_t dummy1; uint64_t offset; uint32_t size; uint32_t dummy2; uint64_t dummy3; uint64_t dummy4; }; struct fuse_lseek_in { uint64_t fh; uint64_t offset; uint32_t whence; uint32_t padding; }; struct fuse_lseek_out { uint64_t offset; }; +struct fuse_copy_file_range_in { + uint64_t fh_in; + uint64_t off_in; + uint64_t nodeid_out; + uint64_t fh_out; + uint64_t off_out; + uint64_t len; + uint64_t flags; +}; + #endif /* _FUSE_FUSE_KERNEL_H */ diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c index efac7e041cf6..1e9434f9403d 100644 --- a/sys/fs/fuse/fuse_vnops.c +++ b/sys/fs/fuse/fuse_vnops.c @@ -1,2623 +1,2745 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2007-2009 Google Inc. and Amit Singh * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. * * Neither the name of Google Inc. nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Copyright (C) 2005 Csaba Henk. * All rights reserved. * * Copyright (c) 2019 The FreeBSD Foundation * * Portions of this software were developed by BFF Storage Systems, LLC under * sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "fuse.h" #include "fuse_file.h" #include "fuse_internal.h" #include "fuse_ipc.h" #include "fuse_node.h" #include "fuse_io.h" #include /* Maximum number of hardlinks to a single FUSE file */ #define FUSE_LINK_MAX UINT32_MAX SDT_PROVIDER_DECLARE(fusefs); /* * Fuse trace probe: * arg0: verbosity. Higher numbers give more verbose messages * arg1: Textual message */ SDT_PROBE_DEFINE2(fusefs, , vnops, trace, "int", "char*"); /* vnode ops */ static vop_access_t fuse_vnop_access; static vop_advlock_t fuse_vnop_advlock; static vop_bmap_t fuse_vnop_bmap; static vop_close_t fuse_fifo_close; static vop_close_t fuse_vnop_close; +static vop_copy_file_range_t fuse_vnop_copy_file_range; static vop_create_t fuse_vnop_create; static vop_deleteextattr_t fuse_vnop_deleteextattr; static vop_fdatasync_t fuse_vnop_fdatasync; static vop_fsync_t fuse_vnop_fsync; static vop_getattr_t fuse_vnop_getattr; static vop_getextattr_t fuse_vnop_getextattr; static vop_inactive_t fuse_vnop_inactive; static vop_ioctl_t fuse_vnop_ioctl; static vop_link_t fuse_vnop_link; static vop_listextattr_t fuse_vnop_listextattr; static vop_lookup_t fuse_vnop_lookup; static vop_mkdir_t fuse_vnop_mkdir; static vop_mknod_t fuse_vnop_mknod; static vop_open_t fuse_vnop_open; static vop_pathconf_t fuse_vnop_pathconf; static vop_read_t fuse_vnop_read; static vop_readdir_t fuse_vnop_readdir; static vop_readlink_t fuse_vnop_readlink; static vop_reclaim_t fuse_vnop_reclaim; static vop_remove_t fuse_vnop_remove; static vop_rename_t fuse_vnop_rename; static vop_rmdir_t fuse_vnop_rmdir; static vop_setattr_t fuse_vnop_setattr; static vop_setextattr_t fuse_vnop_setextattr; static vop_strategy_t fuse_vnop_strategy; static vop_symlink_t fuse_vnop_symlink; static vop_write_t fuse_vnop_write; static vop_getpages_t fuse_vnop_getpages; static vop_print_t fuse_vnop_print; static vop_vptofh_t fuse_vnop_vptofh; struct vop_vector fuse_fifoops = { .vop_default = &fifo_specops, .vop_access = fuse_vnop_access, .vop_close = fuse_fifo_close, .vop_fsync = fuse_vnop_fsync, .vop_getattr = fuse_vnop_getattr, .vop_inactive = fuse_vnop_inactive, .vop_pathconf = fuse_vnop_pathconf, .vop_print = fuse_vnop_print, .vop_read = VOP_PANIC, .vop_reclaim = fuse_vnop_reclaim, .vop_setattr = fuse_vnop_setattr, .vop_write = VOP_PANIC, .vop_vptofh = fuse_vnop_vptofh, }; VFS_VOP_VECTOR_REGISTER(fuse_fifoops); struct vop_vector fuse_vnops = { .vop_allocate = VOP_EINVAL, .vop_default = &default_vnodeops, .vop_access = fuse_vnop_access, .vop_advlock = fuse_vnop_advlock, .vop_bmap = fuse_vnop_bmap, .vop_close = fuse_vnop_close, + .vop_copy_file_range = fuse_vnop_copy_file_range, .vop_create = fuse_vnop_create, .vop_deleteextattr = fuse_vnop_deleteextattr, .vop_fsync = fuse_vnop_fsync, .vop_fdatasync = fuse_vnop_fdatasync, .vop_getattr = fuse_vnop_getattr, .vop_getextattr = fuse_vnop_getextattr, .vop_inactive = fuse_vnop_inactive, .vop_ioctl = fuse_vnop_ioctl, .vop_link = fuse_vnop_link, .vop_listextattr = fuse_vnop_listextattr, .vop_lookup = fuse_vnop_lookup, .vop_mkdir = fuse_vnop_mkdir, .vop_mknod = fuse_vnop_mknod, .vop_open = fuse_vnop_open, .vop_pathconf = fuse_vnop_pathconf, /* * TODO: implement vop_poll after upgrading to protocol 7.21. * FUSE_POLL was added in protocol 7.11, but it's kind of broken until * 7.21, which adds the ability for the client to choose which poll * events it wants, and for a client to deregister a file handle */ .vop_read = fuse_vnop_read, .vop_readdir = fuse_vnop_readdir, .vop_readlink = fuse_vnop_readlink, .vop_reclaim = fuse_vnop_reclaim, .vop_remove = fuse_vnop_remove, .vop_rename = fuse_vnop_rename, .vop_rmdir = fuse_vnop_rmdir, .vop_setattr = fuse_vnop_setattr, .vop_setextattr = fuse_vnop_setextattr, .vop_strategy = fuse_vnop_strategy, .vop_symlink = fuse_vnop_symlink, .vop_write = fuse_vnop_write, .vop_getpages = fuse_vnop_getpages, .vop_print = fuse_vnop_print, .vop_vptofh = fuse_vnop_vptofh, }; VFS_VOP_VECTOR_REGISTER(fuse_vnops); uma_zone_t fuse_pbuf_zone; /* Check permission for extattr operations, much like extattr_check_cred */ static int fuse_extattr_check_cred(struct vnode *vp, int ns, struct ucred *cred, struct thread *td, accmode_t accmode) { struct mount *mp = vnode_mount(vp); struct fuse_data *data = fuse_get_mpdata(mp); int default_permissions = data->dataflags & FSESS_DEFAULT_PERMISSIONS; /* * Kernel-invoked always succeeds. */ if (cred == NOCRED) return (0); /* * Do not allow privileged processes in jail to directly manipulate * system attributes. */ switch (ns) { case EXTATTR_NAMESPACE_SYSTEM: if (default_permissions) { return (priv_check_cred(cred, PRIV_VFS_EXTATTR_SYSTEM)); } return (0); case EXTATTR_NAMESPACE_USER: if (default_permissions) { return (fuse_internal_access(vp, accmode, td, cred)); } return (0); default: return (EPERM); } } /* Get a filehandle for a directory */ static int fuse_filehandle_get_dir(struct vnode *vp, struct fuse_filehandle **fufhp, struct ucred *cred, pid_t pid) { if (fuse_filehandle_get(vp, FREAD, fufhp, cred, pid) == 0) return 0; return fuse_filehandle_get(vp, FEXEC, fufhp, cred, pid); } /* Send FUSE_FLUSH for this vnode */ static int fuse_flush(struct vnode *vp, struct ucred *cred, pid_t pid, int fflag) { struct fuse_flush_in *ffi; struct fuse_filehandle *fufh; struct fuse_dispatcher fdi; struct thread *td = curthread; struct mount *mp = vnode_mount(vp); int err; if (fsess_not_impl(vnode_mount(vp), FUSE_FLUSH)) return 0; err = fuse_filehandle_getrw(vp, fflag, &fufh, cred, pid); if (err) return err; fdisp_init(&fdi, sizeof(*ffi)); fdisp_make_vp(&fdi, FUSE_FLUSH, vp, td, cred); ffi = fdi.indata; ffi->fh = fufh->fh_id; /* * If the file has a POSIX lock then we're supposed to set lock_owner. * If not, then lock_owner is undefined. So we may as well always set * it. */ ffi->lock_owner = td->td_proc->p_pid; err = fdisp_wait_answ(&fdi); if (err == ENOSYS) { fsess_set_notimpl(mp, FUSE_FLUSH); err = 0; } fdisp_destroy(&fdi); return err; } /* Close wrapper for fifos. */ static int fuse_fifo_close(struct vop_close_args *ap) { return (fifo_specops.vop_close(ap)); } /* Send FUSE_LSEEK for this node */ static int fuse_vnop_do_lseek(struct vnode *vp, struct thread *td, struct ucred *cred, pid_t pid, off_t *offp, int whence) { struct fuse_dispatcher fdi; struct fuse_filehandle *fufh; struct fuse_lseek_in *flsi; struct fuse_lseek_out *flso; struct mount *mp = vnode_mount(vp); int err; MPASS(VOP_ISLOCKED(vp)); err = fuse_filehandle_getrw(vp, FREAD, &fufh, cred, pid); if (err) return (err); fdisp_init(&fdi, sizeof(*flsi)); fdisp_make_vp(&fdi, FUSE_LSEEK, vp, td, cred); flsi = fdi.indata; flsi->fh = fufh->fh_id; flsi->offset = *offp; flsi->whence = whence; err = fdisp_wait_answ(&fdi); if (err == ENOSYS) { fsess_set_notimpl(mp, FUSE_LSEEK); } else if (err == 0) { fsess_set_impl(mp, FUSE_LSEEK); flso = fdi.answ; *offp = flso->offset; } fdisp_destroy(&fdi); return (err); } /* struct vnop_access_args { struct vnode *a_vp; #if VOP_ACCESS_TAKES_ACCMODE_T accmode_t a_accmode; #else int a_mode; #endif struct ucred *a_cred; struct thread *a_td; }; */ static int fuse_vnop_access(struct vop_access_args *ap) { struct vnode *vp = ap->a_vp; int accmode = ap->a_accmode; struct ucred *cred = ap->a_cred; struct fuse_data *data = fuse_get_mpdata(vnode_mount(vp)); int err; if (fuse_isdeadfs(vp)) { if (vnode_isvroot(vp)) { return 0; } return ENXIO; } if (!(data->dataflags & FSESS_INITED)) { if (vnode_isvroot(vp)) { if (priv_check_cred(cred, PRIV_VFS_ADMIN) || (fuse_match_cred(data->daemoncred, cred) == 0)) { return 0; } } return EBADF; } if (vnode_islnk(vp)) { return 0; } err = fuse_internal_access(vp, accmode, ap->a_td, ap->a_cred); return err; } /* * struct vop_advlock_args { * struct vop_generic_args a_gen; * struct vnode *a_vp; * void *a_id; * int a_op; * struct flock *a_fl; * int a_flags; * } */ static int fuse_vnop_advlock(struct vop_advlock_args *ap) { struct vnode *vp = ap->a_vp; struct flock *fl = ap->a_fl; struct thread *td = curthread; struct ucred *cred = td->td_ucred; pid_t pid = td->td_proc->p_pid; struct fuse_filehandle *fufh; struct fuse_dispatcher fdi; struct fuse_lk_in *fli; struct fuse_lk_out *flo; enum fuse_opcode op; int dataflags, err; int flags = ap->a_flags; dataflags = fuse_get_mpdata(vnode_mount(vp))->dataflags; if (fuse_isdeadfs(vp)) { return ENXIO; } if (!(dataflags & FSESS_POSIX_LOCKS)) return vop_stdadvlock(ap); /* FUSE doesn't properly support flock until protocol 7.17 */ if (flags & F_FLOCK) return vop_stdadvlock(ap); err = fuse_filehandle_get_anyflags(vp, &fufh, cred, pid); if (err) return err; fdisp_init(&fdi, sizeof(*fli)); switch(ap->a_op) { case F_GETLK: op = FUSE_GETLK; break; case F_SETLK: op = FUSE_SETLK; break; case F_SETLKW: op = FUSE_SETLKW; break; default: return EINVAL; } fdisp_make_vp(&fdi, op, vp, td, cred); fli = fdi.indata; fli->fh = fufh->fh_id; fli->owner = fl->l_pid; fli->lk.start = fl->l_start; if (fl->l_len != 0) fli->lk.end = fl->l_start + fl->l_len - 1; else fli->lk.end = INT64_MAX; fli->lk.type = fl->l_type; fli->lk.pid = fl->l_pid; err = fdisp_wait_answ(&fdi); fdisp_destroy(&fdi); if (err == 0 && op == FUSE_GETLK) { flo = fdi.answ; fl->l_type = flo->lk.type; fl->l_pid = flo->lk.pid; if (flo->lk.type != F_UNLCK) { fl->l_start = flo->lk.start; if (flo->lk.end == INT64_MAX) fl->l_len = 0; else fl->l_len = flo->lk.end - flo->lk.start + 1; fl->l_start = flo->lk.start; } } return err; } /* { struct vnode *a_vp; daddr_t a_bn; struct bufobj **a_bop; daddr_t *a_bnp; int *a_runp; int *a_runb; } */ static int fuse_vnop_bmap(struct vop_bmap_args *ap) { struct vnode *vp = ap->a_vp; struct bufobj **bo = ap->a_bop; struct thread *td = curthread; struct mount *mp; struct fuse_dispatcher fdi; struct fuse_bmap_in *fbi; struct fuse_bmap_out *fbo; struct fuse_data *data; uint64_t biosize; off_t filesize; daddr_t lbn = ap->a_bn; daddr_t *pbn = ap->a_bnp; int *runp = ap->a_runp; int *runb = ap->a_runb; int error = 0; int maxrun; if (fuse_isdeadfs(vp)) { return ENXIO; } mp = vnode_mount(vp); data = fuse_get_mpdata(mp); biosize = fuse_iosize(vp); maxrun = MIN(vp->v_mount->mnt_iosize_max / biosize - 1, data->max_readahead_blocks); if (bo != NULL) *bo = &vp->v_bufobj; /* * The FUSE_BMAP operation does not include the runp and runb * variables, so we must guess. Report nonzero contiguous runs so * cluster_read will combine adjacent reads. It's worthwhile to reduce * upcalls even if we don't know the true physical layout of the file. * * FUSE file systems may opt out of read clustering in two ways: * * mounting with -onoclusterr * * Setting max_readahead <= maxbcachebuf during FUSE_INIT */ if (runb != NULL) *runb = MIN(lbn, maxrun); if (runp != NULL) { error = fuse_vnode_size(vp, &filesize, td->td_ucred, td); if (error == 0) *runp = MIN(MAX(0, filesize / (off_t)biosize - lbn - 1), maxrun); else *runp = 0; } if (fsess_maybe_impl(mp, FUSE_BMAP)) { fdisp_init(&fdi, sizeof(*fbi)); fdisp_make_vp(&fdi, FUSE_BMAP, vp, td, td->td_ucred); fbi = fdi.indata; fbi->block = lbn; fbi->blocksize = biosize; error = fdisp_wait_answ(&fdi); if (error == ENOSYS) { fdisp_destroy(&fdi); fsess_set_notimpl(mp, FUSE_BMAP); error = 0; } else { fbo = fdi.answ; if (error == 0 && pbn != NULL) *pbn = fbo->block; fdisp_destroy(&fdi); return error; } } /* If the daemon doesn't support BMAP, make up a sensible default */ if (pbn != NULL) *pbn = lbn * btodb(biosize); return (error); } /* struct vop_close_args { struct vnode *a_vp; int a_fflag; struct ucred *a_cred; struct thread *a_td; }; */ static int fuse_vnop_close(struct vop_close_args *ap) { struct vnode *vp = ap->a_vp; struct ucred *cred = ap->a_cred; int fflag = ap->a_fflag; struct thread *td = ap->a_td; pid_t pid = td->td_proc->p_pid; int err = 0; if (fuse_isdeadfs(vp)) return 0; if (vnode_isdir(vp)) return 0; if (fflag & IO_NDELAY) return 0; err = fuse_flush(vp, cred, pid, fflag); /* TODO: close the file handle, if we're sure it's no longer used */ if ((VTOFUD(vp)->flag & FN_SIZECHANGE) != 0) { fuse_vnode_savesize(vp, cred, td->td_proc->p_pid); } return err; } +/* + struct vop_copy_file_range_args { + struct vop_generic_args a_gen; + struct vnode *a_invp; + off_t *a_inoffp; + struct vnode *a_outvp; + off_t *a_outoffp; + size_t *a_lenp; + unsigned int a_flags; + struct ucred *a_incred; + struct ucred *a_outcred; + struct thread *a_fsizetd; +} + */ +static int +fuse_vnop_copy_file_range(struct vop_copy_file_range_args *ap) +{ + struct vnode *invp = ap->a_invp; + struct vnode *outvp = ap->a_outvp; + struct mount *mp = vnode_mount(invp); + struct fuse_dispatcher fdi; + struct fuse_filehandle *infufh, *outfufh; + struct fuse_copy_file_range_in *fcfri; + struct ucred *incred = ap->a_incred; + struct ucred *outcred = ap->a_outcred; + struct fuse_write_out *fwo; + struct thread *td; + struct uio io; + pid_t pid; + int err; + + if (mp != vnode_mount(outvp)) + goto fallback; + + if (incred->cr_uid != outcred->cr_uid) + goto fallback; + + if (incred->cr_groups[0] != outcred->cr_groups[0]) + goto fallback; + + if (fsess_not_impl(mp, FUSE_COPY_FILE_RANGE)) + goto fallback; + + if (ap->a_fsizetd == NULL) + td = curthread; + else + td = ap->a_fsizetd; + pid = td->td_proc->p_pid; + + err = fuse_filehandle_getrw(invp, FREAD, &infufh, incred, pid); + if (err) + return (err); + + err = fuse_filehandle_getrw(outvp, FWRITE, &outfufh, outcred, pid); + if (err) + return (err); + + /* Lock both vnodes, avoiding risk of deadlock. */ + do { + err = vn_lock(outvp, LK_EXCLUSIVE); + if (invp == outvp) + break; + if (err == 0) { + err = vn_lock(invp, LK_SHARED | LK_NOWAIT); + if (err == 0) + break; + VOP_UNLOCK(outvp); + err = vn_lock(invp, LK_SHARED); + if (err == 0) + VOP_UNLOCK(invp); + } + } while (err == 0); + if (err != 0) + return (err); + + if (ap->a_fsizetd) { + io.uio_offset = *ap->a_outoffp; + io.uio_resid = *ap->a_lenp; + err = vn_rlimit_fsize(outvp, &io, ap->a_fsizetd); + if (err) + goto unlock; + } + + fdisp_init(&fdi, sizeof(*fcfri)); + fdisp_make_vp(&fdi, FUSE_COPY_FILE_RANGE, invp, td, incred); + fcfri = fdi.indata; + fcfri->fh_in = infufh->fh_id; + fcfri->off_in = *ap->a_inoffp; + fcfri->nodeid_out = VTOI(outvp); + fcfri->fh_out = outfufh->fh_id; + fcfri->off_out = *ap->a_outoffp; + fcfri->len = *ap->a_lenp; + fcfri->flags = 0; + + err = fdisp_wait_answ(&fdi); + if (err == 0) { + fwo = fdi.answ; + *ap->a_lenp = fwo->size; + *ap->a_inoffp += fwo->size; + *ap->a_outoffp += fwo->size; + fuse_internal_clear_suid_on_write(outvp, outcred, td); + } + fdisp_destroy(&fdi); + +unlock: + if (invp != outvp) + VOP_UNLOCK(invp); + VOP_UNLOCK(outvp); + + if (err == ENOSYS) { + fsess_set_notimpl(mp, FUSE_COPY_FILE_RANGE); +fallback: + err = vn_generic_copy_file_range(ap->a_invp, ap->a_inoffp, + ap->a_outvp, ap->a_outoffp, ap->a_lenp, ap->a_flags, + ap->a_incred, ap->a_outcred, ap->a_fsizetd); + } + + return (err); +} + static void fdisp_make_mknod_for_fallback( struct fuse_dispatcher *fdip, struct componentname *cnp, struct vnode *dvp, uint64_t parentnid, struct thread *td, struct ucred *cred, mode_t mode, enum fuse_opcode *op) { struct fuse_mknod_in *fmni; fdisp_init(fdip, sizeof(*fmni) + cnp->cn_namelen + 1); *op = FUSE_MKNOD; fdisp_make(fdip, *op, vnode_mount(dvp), parentnid, td, cred); fmni = fdip->indata; fmni->mode = mode; fmni->rdev = 0; memcpy((char *)fdip->indata + sizeof(*fmni), cnp->cn_nameptr, cnp->cn_namelen); ((char *)fdip->indata)[sizeof(*fmni) + cnp->cn_namelen] = '\0'; } /* struct vnop_create_args { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; }; */ static int fuse_vnop_create(struct vop_create_args *ap) { struct vnode *dvp = ap->a_dvp; struct vnode **vpp = ap->a_vpp; struct componentname *cnp = ap->a_cnp; struct vattr *vap = ap->a_vap; struct thread *td = cnp->cn_thread; struct ucred *cred = cnp->cn_cred; struct fuse_data *data; struct fuse_create_in *fci; struct fuse_entry_out *feo; struct fuse_open_out *foo; struct fuse_dispatcher fdi, fdi2; struct fuse_dispatcher *fdip = &fdi; struct fuse_dispatcher *fdip2 = NULL; int err; struct mount *mp = vnode_mount(dvp); data = fuse_get_mpdata(mp); uint64_t parentnid = VTOFUD(dvp)->nid; mode_t mode = MAKEIMODE(vap->va_type, vap->va_mode); enum fuse_opcode op; int flags; if (fuse_isdeadfs(dvp)) return ENXIO; /* FUSE expects sockets to be created with FUSE_MKNOD */ if (vap->va_type == VSOCK) return fuse_internal_mknod(dvp, vpp, cnp, vap); /* * VOP_CREATE doesn't tell us the open(2) flags, so we guess. Only a * writable mode makes sense, and we might as well include readability * too. */ flags = O_RDWR; bzero(&fdi, sizeof(fdi)); if (vap->va_type != VREG) return (EINVAL); if (fsess_not_impl(mp, FUSE_CREATE) || vap->va_type == VSOCK) { /* Fallback to FUSE_MKNOD/FUSE_OPEN */ fdisp_make_mknod_for_fallback(fdip, cnp, dvp, parentnid, td, cred, mode, &op); } else { /* Use FUSE_CREATE */ size_t insize; op = FUSE_CREATE; fdisp_init(fdip, sizeof(*fci) + cnp->cn_namelen + 1); fdisp_make(fdip, op, vnode_mount(dvp), parentnid, td, cred); fci = fdip->indata; fci->mode = mode; fci->flags = O_CREAT | flags; if (fuse_libabi_geq(data, 7, 12)) { insize = sizeof(*fci); fci->umask = td->td_proc->p_pd->pd_cmask; } else { insize = sizeof(struct fuse_open_in); } memcpy((char *)fdip->indata + insize, cnp->cn_nameptr, cnp->cn_namelen); ((char *)fdip->indata)[insize + cnp->cn_namelen] = '\0'; } err = fdisp_wait_answ(fdip); if (err) { if (err == ENOSYS && op == FUSE_CREATE) { fsess_set_notimpl(mp, FUSE_CREATE); fdisp_destroy(fdip); fdisp_make_mknod_for_fallback(fdip, cnp, dvp, parentnid, td, cred, mode, &op); err = fdisp_wait_answ(fdip); } if (err) goto out; } feo = fdip->answ; if ((err = fuse_internal_checkentry(feo, vap->va_type))) { goto out; } if (op == FUSE_CREATE) { foo = (struct fuse_open_out*)(feo + 1); } else { /* Issue a separate FUSE_OPEN */ struct fuse_open_in *foi; fdip2 = &fdi2; fdisp_init(fdip2, sizeof(*foi)); fdisp_make(fdip2, FUSE_OPEN, vnode_mount(dvp), feo->nodeid, td, cred); foi = fdip2->indata; foi->flags = flags; err = fdisp_wait_answ(fdip2); if (err) goto out; foo = fdip2->answ; } err = fuse_vnode_get(mp, feo, feo->nodeid, dvp, vpp, cnp, vap->va_type); if (err) { struct fuse_release_in *fri; uint64_t nodeid = feo->nodeid; uint64_t fh_id = foo->fh; fdisp_init(fdip, sizeof(*fri)); fdisp_make(fdip, FUSE_RELEASE, mp, nodeid, td, cred); fri = fdip->indata; fri->fh = fh_id; fri->flags = flags; fuse_insert_callback(fdip->tick, fuse_internal_forget_callback); fuse_insert_message(fdip->tick, false); goto out; } ASSERT_VOP_ELOCKED(*vpp, "fuse_vnop_create"); fuse_internal_cache_attrs(*vpp, &feo->attr, feo->attr_valid, feo->attr_valid_nsec, NULL); fuse_filehandle_init(*vpp, FUFH_RDWR, NULL, td, cred, foo); fuse_vnode_open(*vpp, foo->open_flags, td); /* * Purge the parent's attribute cache because the daemon should've * updated its mtime and ctime */ fuse_vnode_clear_attr_cache(dvp); cache_purge_negative(dvp); out: if (fdip2) fdisp_destroy(fdip2); fdisp_destroy(fdip); return err; } /* struct vnop_fdatasync_args { struct vop_generic_args a_gen; struct vnode * a_vp; struct thread * a_td; }; */ static int fuse_vnop_fdatasync(struct vop_fdatasync_args *ap) { struct vnode *vp = ap->a_vp; struct thread *td = ap->a_td; int waitfor = MNT_WAIT; int err = 0; if (fuse_isdeadfs(vp)) { return 0; } if ((err = vop_stdfdatasync_buf(ap))) return err; return fuse_internal_fsync(vp, td, waitfor, true); } /* struct vnop_fsync_args { struct vop_generic_args a_gen; struct vnode * a_vp; int a_waitfor; struct thread * a_td; }; */ static int fuse_vnop_fsync(struct vop_fsync_args *ap) { struct vnode *vp = ap->a_vp; struct thread *td = ap->a_td; int waitfor = ap->a_waitfor; int err = 0; if (fuse_isdeadfs(vp)) { return 0; } if ((err = vop_stdfsync(ap))) return err; return fuse_internal_fsync(vp, td, waitfor, false); } /* struct vnop_getattr_args { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; struct thread *a_td; }; */ static int fuse_vnop_getattr(struct vop_getattr_args *ap) { struct vnode *vp = ap->a_vp; struct vattr *vap = ap->a_vap; struct ucred *cred = ap->a_cred; struct thread *td = curthread; int err = 0; int dataflags; dataflags = fuse_get_mpdata(vnode_mount(vp))->dataflags; /* Note that we are not bailing out on a dead file system just yet. */ if (!(dataflags & FSESS_INITED)) { if (!vnode_isvroot(vp)) { fdata_set_dead(fuse_get_mpdata(vnode_mount(vp))); err = ENOTCONN; return err; } else { goto fake; } } err = fuse_internal_getattr(vp, vap, cred, td); if (err == ENOTCONN && vnode_isvroot(vp)) { /* see comment in fuse_vfsop_statfs() */ goto fake; } else { return err; } fake: bzero(vap, sizeof(*vap)); vap->va_type = vnode_vtype(vp); return 0; } /* struct vnop_inactive_args { struct vnode *a_vp; }; */ static int fuse_vnop_inactive(struct vop_inactive_args *ap) { struct vnode *vp = ap->a_vp; struct thread *td = curthread; struct fuse_vnode_data *fvdat = VTOFUD(vp); struct fuse_filehandle *fufh, *fufh_tmp; int need_flush = 1; LIST_FOREACH_SAFE(fufh, &fvdat->handles, next, fufh_tmp) { if (need_flush && vp->v_type == VREG) { if ((VTOFUD(vp)->flag & FN_SIZECHANGE) != 0) { fuse_vnode_savesize(vp, NULL, 0); } if ((fvdat->flag & FN_REVOKED) != 0) fuse_io_invalbuf(vp, td); else fuse_io_flushbuf(vp, MNT_WAIT, td); need_flush = 0; } fuse_filehandle_close(vp, fufh, td, NULL); } if ((fvdat->flag & FN_REVOKED) != 0) vrecycle(vp); return 0; } /* struct vnop_ioctl_args { struct vnode *a_vp; u_long a_command; caddr_t a_data; int a_fflag; struct ucred *a_cred; struct thread *a_td; }; */ static int fuse_vnop_ioctl(struct vop_ioctl_args *ap) { struct vnode *vp = ap->a_vp; struct mount *mp = vnode_mount(vp); struct ucred *cred = ap->a_cred; off_t *offp; pid_t pid = ap->a_td->td_proc->p_pid; int err; switch (ap->a_command) { case FIOSEEKDATA: case FIOSEEKHOLE: /* Call FUSE_LSEEK, if we can, or fall back to vop_stdioctl */ if (fsess_maybe_impl(mp, FUSE_LSEEK)) { int whence; offp = ap->a_data; if (ap->a_command == FIOSEEKDATA) whence = SEEK_DATA; else whence = SEEK_HOLE; vn_lock(vp, LK_SHARED | LK_RETRY); err = fuse_vnop_do_lseek(vp, ap->a_td, cred, pid, offp, whence); VOP_UNLOCK(vp); } if (fsess_not_impl(mp, FUSE_LSEEK)) err = vop_stdioctl(ap); break; default: /* TODO: implement FUSE_IOCTL */ err = ENOTTY; break; } return (err); } /* struct vnop_link_args { struct vnode *a_tdvp; struct vnode *a_vp; struct componentname *a_cnp; }; */ static int fuse_vnop_link(struct vop_link_args *ap) { struct vnode *vp = ap->a_vp; struct vnode *tdvp = ap->a_tdvp; struct componentname *cnp = ap->a_cnp; struct vattr *vap = VTOVA(vp); struct fuse_dispatcher fdi; struct fuse_entry_out *feo; struct fuse_link_in fli; int err; if (fuse_isdeadfs(vp)) { return ENXIO; } if (vnode_mount(tdvp) != vnode_mount(vp)) { return EXDEV; } /* * This is a seatbelt check to protect naive userspace filesystems from * themselves and the limitations of the FUSE IPC protocol. If a * filesystem does not allow attribute caching, assume it is capable of * validating that nlink does not overflow. */ if (vap != NULL && vap->va_nlink >= FUSE_LINK_MAX) return EMLINK; fli.oldnodeid = VTOI(vp); fdisp_init(&fdi, 0); fuse_internal_newentry_makerequest(vnode_mount(tdvp), VTOI(tdvp), cnp, FUSE_LINK, &fli, sizeof(fli), &fdi); if ((err = fdisp_wait_answ(&fdi))) { goto out; } feo = fdi.answ; err = fuse_internal_checkentry(feo, vnode_vtype(vp)); if (!err) { /* * Purge the parent's attribute cache because the daemon * should've updated its mtime and ctime */ fuse_vnode_clear_attr_cache(tdvp); fuse_internal_cache_attrs(vp, &feo->attr, feo->attr_valid, feo->attr_valid_nsec, NULL); } out: fdisp_destroy(&fdi); return err; } struct fuse_lookup_alloc_arg { struct fuse_entry_out *feo; struct componentname *cnp; uint64_t nid; enum vtype vtyp; }; /* Callback for vn_get_ino */ static int fuse_lookup_alloc(struct mount *mp, void *arg, int lkflags, struct vnode **vpp) { struct fuse_lookup_alloc_arg *flaa = arg; return fuse_vnode_get(mp, flaa->feo, flaa->nid, NULL, vpp, flaa->cnp, flaa->vtyp); } SDT_PROBE_DEFINE3(fusefs, , vnops, cache_lookup, "int", "struct timespec*", "struct timespec*"); SDT_PROBE_DEFINE2(fusefs, , vnops, lookup_cache_incoherent, "struct vnode*", "struct fuse_entry_out*"); /* struct vnop_lookup_args { struct vnodeop_desc *a_desc; struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; }; */ int fuse_vnop_lookup(struct vop_lookup_args *ap) { struct vnode *dvp = ap->a_dvp; struct vnode **vpp = ap->a_vpp; struct componentname *cnp = ap->a_cnp; struct thread *td = cnp->cn_thread; struct ucred *cred = cnp->cn_cred; int nameiop = cnp->cn_nameiop; int flags = cnp->cn_flags; int wantparent = flags & (LOCKPARENT | WANTPARENT); int islastcn = flags & ISLASTCN; struct mount *mp = vnode_mount(dvp); struct fuse_data *data = fuse_get_mpdata(mp); int default_permissions = data->dataflags & FSESS_DEFAULT_PERMISSIONS; int err = 0; int lookup_err = 0; struct vnode *vp = NULL; struct fuse_dispatcher fdi; bool did_lookup = false; struct fuse_entry_out *feo = NULL; enum vtype vtyp; /* vnode type of target */ off_t filesize; /* filesize of target */ uint64_t nid; if (fuse_isdeadfs(dvp)) { *vpp = NULL; return ENXIO; } if (!vnode_isdir(dvp)) return ENOTDIR; if (islastcn && vfs_isrdonly(mp) && (nameiop != LOOKUP)) return EROFS; if ((cnp->cn_flags & NOEXECCHECK) != 0) cnp->cn_flags &= ~NOEXECCHECK; else if ((err = fuse_internal_access(dvp, VEXEC, td, cred))) return err; if (flags & ISDOTDOT) { KASSERT(VTOFUD(dvp)->flag & FN_PARENT_NID, ("Looking up .. is TODO")); nid = VTOFUD(dvp)->parent_nid; if (nid == 0) return ENOENT; /* .. is obviously a directory */ vtyp = VDIR; filesize = 0; } else if (cnp->cn_namelen == 1 && *(cnp->cn_nameptr) == '.') { nid = VTOI(dvp); /* . is obviously a directory */ vtyp = VDIR; filesize = 0; } else { struct timespec now, timeout; int ncpticks; /* here to accomodate for API contract */ err = cache_lookup(dvp, vpp, cnp, &timeout, &ncpticks); getnanouptime(&now); SDT_PROBE3(fusefs, , vnops, cache_lookup, err, &timeout, &now); switch (err) { case -1: /* positive match */ if (timespeccmp(&timeout, &now, >)) { counter_u64_add(fuse_lookup_cache_hits, 1); } else { /* Cache timeout */ counter_u64_add(fuse_lookup_cache_misses, 1); bintime_clear( &VTOFUD(*vpp)->entry_cache_timeout); cache_purge(*vpp); if (dvp != *vpp) vput(*vpp); else vrele(*vpp); *vpp = NULL; break; } return 0; case 0: /* no match in cache */ counter_u64_add(fuse_lookup_cache_misses, 1); break; case ENOENT: /* negative match */ getnanouptime(&now); if (timespeccmp(&timeout, &now, <=)) { /* Cache timeout */ cache_purge_negative(dvp); break; } /* fall through */ default: return err; } nid = VTOI(dvp); fdisp_init(&fdi, cnp->cn_namelen + 1); fdisp_make(&fdi, FUSE_LOOKUP, mp, nid, td, cred); memcpy(fdi.indata, cnp->cn_nameptr, cnp->cn_namelen); ((char *)fdi.indata)[cnp->cn_namelen] = '\0'; lookup_err = fdisp_wait_answ(&fdi); did_lookup = true; if (!lookup_err) { /* lookup call succeeded */ feo = (struct fuse_entry_out *)fdi.answ; nid = feo->nodeid; if (nid == 0) { /* zero nodeid means ENOENT and cache it */ struct timespec timeout; fdi.answ_stat = ENOENT; lookup_err = ENOENT; if (cnp->cn_flags & MAKEENTRY) { fuse_validity_2_timespec(feo, &timeout); cache_enter_time(dvp, *vpp, cnp, &timeout, NULL); } } else if (nid == FUSE_ROOT_ID) { lookup_err = EINVAL; } vtyp = IFTOVT(feo->attr.mode); filesize = feo->attr.size; } if (lookup_err && (!fdi.answ_stat || lookup_err != ENOENT)) { fdisp_destroy(&fdi); return lookup_err; } } /* lookup_err, if non-zero, must be ENOENT at this point */ if (lookup_err) { /* Entry not found */ if ((nameiop == CREATE || nameiop == RENAME) && islastcn) { if (default_permissions) err = fuse_internal_access(dvp, VWRITE, td, cred); else err = 0; if (!err) { /* * Set the SAVENAME flag to hold onto the * pathname for use later in VOP_CREATE or * VOP_RENAME. */ cnp->cn_flags |= SAVENAME; err = EJUSTRETURN; } } else { err = ENOENT; } } else { /* Entry was found */ if (flags & ISDOTDOT) { struct fuse_lookup_alloc_arg flaa; flaa.nid = nid; flaa.feo = feo; flaa.cnp = cnp; flaa.vtyp = vtyp; err = vn_vget_ino_gen(dvp, fuse_lookup_alloc, &flaa, 0, &vp); *vpp = vp; } else if (nid == VTOI(dvp)) { vref(dvp); *vpp = dvp; } else { struct fuse_vnode_data *fvdat; struct vattr *vap; err = fuse_vnode_get(vnode_mount(dvp), feo, nid, dvp, &vp, cnp, vtyp); if (err) goto out; *vpp = vp; /* * In the case where we are looking up a FUSE node * represented by an existing cached vnode, and the * true size reported by FUSE_LOOKUP doesn't match * the vnode's cached size, then any cached writes * beyond the file's current size are lost. * * We can get here: * * following attribute cache expiration, or * * due a bug in the daemon, or */ fvdat = VTOFUD(vp); if (vnode_isreg(vp) && ((filesize != fvdat->cached_attrs.va_size && fvdat->flag & FN_SIZECHANGE) || ((vap = VTOVA(vp)) && filesize != vap->va_size))) { SDT_PROBE2(fusefs, , vnops, lookup_cache_incoherent, vp, feo); fvdat->flag &= ~FN_SIZECHANGE; /* * The server changed the file's size even * though we had it cached, or had dirty writes * in the WB cache! */ printf("%s: cache incoherent on %s! " "Buggy FUSE server detected. To prevent " "data corruption, disable the data cache " "by mounting with -o direct_io, or as " "directed otherwise by your FUSE server's " "documentation\n", __func__, vnode_mount(vp)->mnt_stat.f_mntonname); int iosize = fuse_iosize(vp); v_inval_buf_range(vp, 0, INT64_MAX, iosize); } MPASS(feo != NULL); fuse_internal_cache_attrs(*vpp, &feo->attr, feo->attr_valid, feo->attr_valid_nsec, NULL); fuse_validity_2_bintime(feo->entry_valid, feo->entry_valid_nsec, &fvdat->entry_cache_timeout); if ((nameiop == DELETE || nameiop == RENAME) && islastcn && default_permissions) { struct vattr dvattr; err = fuse_internal_access(dvp, VWRITE, td, cred); if (err != 0) goto out; /* * if the parent's sticky bit is set, check * whether we're allowed to remove the file. * Need to figure out the vnode locking to make * this work. */ fuse_internal_getattr(dvp, &dvattr, cred, td); if ((dvattr.va_mode & S_ISTXT) && fuse_internal_access(dvp, VADMIN, td, cred) && fuse_internal_access(*vpp, VADMIN, td, cred)) { err = EPERM; goto out; } } if (islastcn && ( (nameiop == DELETE) || (nameiop == RENAME && wantparent))) { cnp->cn_flags |= SAVENAME; } } } out: if (err) { if (vp != NULL && dvp != vp) vput(vp); else if (vp != NULL) vrele(vp); *vpp = NULL; } if (did_lookup) fdisp_destroy(&fdi); return err; } /* struct vnop_mkdir_args { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; }; */ static int fuse_vnop_mkdir(struct vop_mkdir_args *ap) { struct vnode *dvp = ap->a_dvp; struct vnode **vpp = ap->a_vpp; struct componentname *cnp = ap->a_cnp; struct vattr *vap = ap->a_vap; struct fuse_mkdir_in fmdi; if (fuse_isdeadfs(dvp)) { return ENXIO; } fmdi.mode = MAKEIMODE(vap->va_type, vap->va_mode); fmdi.umask = curthread->td_proc->p_pd->pd_cmask; return (fuse_internal_newentry(dvp, vpp, cnp, FUSE_MKDIR, &fmdi, sizeof(fmdi), VDIR)); } /* struct vnop_mknod_args { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; }; */ static int fuse_vnop_mknod(struct vop_mknod_args *ap) { struct vnode *dvp = ap->a_dvp; struct vnode **vpp = ap->a_vpp; struct componentname *cnp = ap->a_cnp; struct vattr *vap = ap->a_vap; if (fuse_isdeadfs(dvp)) return ENXIO; return fuse_internal_mknod(dvp, vpp, cnp, vap); } /* struct vop_open_args { struct vnode *a_vp; int a_mode; struct ucred *a_cred; struct thread *a_td; int a_fdidx; / struct file *a_fp; }; */ static int fuse_vnop_open(struct vop_open_args *ap) { struct vnode *vp = ap->a_vp; int a_mode = ap->a_mode; struct thread *td = ap->a_td; struct ucred *cred = ap->a_cred; pid_t pid = td->td_proc->p_pid; struct fuse_vnode_data *fvdat; if (fuse_isdeadfs(vp)) return ENXIO; if (vp->v_type == VCHR || vp->v_type == VBLK || vp->v_type == VFIFO) return (EOPNOTSUPP); if ((a_mode & (FREAD | FWRITE | FEXEC)) == 0) return EINVAL; fvdat = VTOFUD(vp); if (fuse_filehandle_validrw(vp, a_mode, cred, pid)) { fuse_vnode_open(vp, 0, td); return 0; } return fuse_filehandle_open(vp, a_mode, NULL, td, cred); } static int fuse_vnop_pathconf(struct vop_pathconf_args *ap) { struct vnode *vp = ap->a_vp; struct mount *mp; switch (ap->a_name) { case _PC_FILESIZEBITS: *ap->a_retval = 64; return (0); case _PC_NAME_MAX: *ap->a_retval = NAME_MAX; return (0); case _PC_LINK_MAX: *ap->a_retval = MIN(LONG_MAX, FUSE_LINK_MAX); return (0); case _PC_SYMLINK_MAX: *ap->a_retval = MAXPATHLEN; return (0); case _PC_NO_TRUNC: *ap->a_retval = 1; return (0); case _PC_MIN_HOLE_SIZE: /* * The FUSE protocol provides no mechanism for a server to * report _PC_MIN_HOLE_SIZE. It's a protocol bug. Instead, * return EINVAL if the server does not support FUSE_LSEEK, or * 1 if it does. */ mp = vnode_mount(vp); if (!fsess_is_impl(mp, FUSE_LSEEK) && !fsess_not_impl(mp, FUSE_LSEEK)) { off_t offset = 0; /* Issue a FUSE_LSEEK to find out if it's implemented */ fuse_vnop_do_lseek(vp, curthread, curthread->td_ucred, curthread->td_proc->p_pid, &offset, SEEK_DATA); } if (fsess_is_impl(mp, FUSE_LSEEK)) { *ap->a_retval = 1; return (0); } else { /* * Probably FUSE_LSEEK is not implemented. It might * be, if the FUSE_LSEEK above returned an error like * EACCES, but in that case we can't tell, so it's * safest to report EINVAL anyway. */ return (EINVAL); } default: return (vop_stdpathconf(ap)); } } /* struct vnop_read_args { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; }; */ static int fuse_vnop_read(struct vop_read_args *ap) { struct vnode *vp = ap->a_vp; struct uio *uio = ap->a_uio; int ioflag = ap->a_ioflag; struct ucred *cred = ap->a_cred; pid_t pid = curthread->td_proc->p_pid; if (fuse_isdeadfs(vp)) { return ENXIO; } if (VTOFUD(vp)->flag & FN_DIRECTIO) { ioflag |= IO_DIRECT; } return fuse_io_dispatch(vp, uio, ioflag, cred, pid); } /* struct vnop_readdir_args { struct vnode *a_vp; struct uio *a_uio; struct ucred *a_cred; int *a_eofflag; int *a_ncookies; u_long **a_cookies; }; */ static int fuse_vnop_readdir(struct vop_readdir_args *ap) { struct vnode *vp = ap->a_vp; struct uio *uio = ap->a_uio; struct ucred *cred = ap->a_cred; struct fuse_filehandle *fufh = NULL; struct fuse_iov cookediov; int err = 0; u_long *cookies; off_t startoff; ssize_t tresid; int ncookies; bool closefufh = false; pid_t pid = curthread->td_proc->p_pid; if (ap->a_eofflag) *ap->a_eofflag = 0; if (fuse_isdeadfs(vp)) { return ENXIO; } if ( /* XXXIP ((uio_iovcnt(uio) > 1)) || */ (uio_resid(uio) < sizeof(struct dirent))) { return EINVAL; } tresid = uio->uio_resid; startoff = uio->uio_offset; err = fuse_filehandle_get_dir(vp, &fufh, cred, pid); if (err == EBADF && vnode_mount(vp)->mnt_flag & MNT_EXPORTED) { /* * nfsd will do VOP_READDIR without first doing VOP_OPEN. We * must implicitly open the directory here */ err = fuse_filehandle_open(vp, FREAD, &fufh, curthread, cred); if (err == 0) { /* * When a directory is opened, it must be read from * the beginning. Hopefully, the "startoff" still * exists as an offset cookie for the directory. * If not, it will read the entire directory without * returning any entries and just return eof. */ uio->uio_offset = 0; } closefufh = true; } if (err) return (err); if (ap->a_ncookies != NULL) { ncookies = uio->uio_resid / (offsetof(struct dirent, d_name) + 4) + 1; cookies = malloc(ncookies * sizeof(*cookies), M_TEMP, M_WAITOK); *ap->a_ncookies = ncookies; *ap->a_cookies = cookies; } else { ncookies = 0; cookies = NULL; } #define DIRCOOKEDSIZE FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + MAXNAMLEN + 1) fiov_init(&cookediov, DIRCOOKEDSIZE); err = fuse_internal_readdir(vp, uio, startoff, fufh, &cookediov, &ncookies, cookies); fiov_teardown(&cookediov); if (closefufh) fuse_filehandle_close(vp, fufh, curthread, cred); if (ap->a_ncookies != NULL) { if (err == 0) { *ap->a_ncookies -= ncookies; } else { free(*ap->a_cookies, M_TEMP); *ap->a_ncookies = 0; *ap->a_cookies = NULL; } } if (err == 0 && tresid == uio->uio_resid) *ap->a_eofflag = 1; return err; } /* struct vnop_readlink_args { struct vnode *a_vp; struct uio *a_uio; struct ucred *a_cred; }; */ static int fuse_vnop_readlink(struct vop_readlink_args *ap) { struct vnode *vp = ap->a_vp; struct uio *uio = ap->a_uio; struct ucred *cred = ap->a_cred; struct fuse_dispatcher fdi; int err; if (fuse_isdeadfs(vp)) { return ENXIO; } if (!vnode_islnk(vp)) { return EINVAL; } fdisp_init(&fdi, 0); err = fdisp_simple_putget_vp(&fdi, FUSE_READLINK, vp, curthread, cred); if (err) { goto out; } if (((char *)fdi.answ)[0] == '/' && fuse_get_mpdata(vnode_mount(vp))->dataflags & FSESS_PUSH_SYMLINKS_IN) { char *mpth = vnode_mount(vp)->mnt_stat.f_mntonname; err = uiomove(mpth, strlen(mpth), uio); } if (!err) { err = uiomove(fdi.answ, fdi.iosize, uio); } out: fdisp_destroy(&fdi); return err; } /* struct vnop_reclaim_args { struct vnode *a_vp; }; */ static int fuse_vnop_reclaim(struct vop_reclaim_args *ap) { struct vnode *vp = ap->a_vp; struct thread *td = curthread; struct fuse_vnode_data *fvdat = VTOFUD(vp); struct fuse_filehandle *fufh, *fufh_tmp; if (!fvdat) { panic("FUSE: no vnode data during recycling"); } LIST_FOREACH_SAFE(fufh, &fvdat->handles, next, fufh_tmp) { printf("FUSE: vnode being reclaimed with open fufh " "(type=%#x)", fufh->fufh_type); fuse_filehandle_close(vp, fufh, td, NULL); } if (!fuse_isdeadfs(vp) && fvdat->nlookup > 0) { fuse_internal_forget_send(vnode_mount(vp), td, NULL, VTOI(vp), fvdat->nlookup); } cache_purge(vp); vfs_hash_remove(vp); fuse_vnode_destroy(vp); return 0; } /* struct vnop_remove_args { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; }; */ static int fuse_vnop_remove(struct vop_remove_args *ap) { struct vnode *dvp = ap->a_dvp; struct vnode *vp = ap->a_vp; struct componentname *cnp = ap->a_cnp; int err; if (fuse_isdeadfs(vp)) { return ENXIO; } if (vnode_isdir(vp)) { return EPERM; } err = fuse_internal_remove(dvp, vp, cnp, FUSE_UNLINK); return err; } /* struct vnop_rename_args { struct vnode *a_fdvp; struct vnode *a_fvp; struct componentname *a_fcnp; struct vnode *a_tdvp; struct vnode *a_tvp; struct componentname *a_tcnp; }; */ static int fuse_vnop_rename(struct vop_rename_args *ap) { struct vnode *fdvp = ap->a_fdvp; struct vnode *fvp = ap->a_fvp; struct componentname *fcnp = ap->a_fcnp; struct vnode *tdvp = ap->a_tdvp; struct vnode *tvp = ap->a_tvp; struct componentname *tcnp = ap->a_tcnp; struct fuse_data *data; bool newparent = fdvp != tdvp; bool isdir = fvp->v_type == VDIR; int err = 0; if (fuse_isdeadfs(fdvp)) { return ENXIO; } if (fvp->v_mount != tdvp->v_mount || (tvp && fvp->v_mount != tvp->v_mount)) { SDT_PROBE2(fusefs, , vnops, trace, 1, "cross-device rename"); err = EXDEV; goto out; } cache_purge(fvp); /* * FUSE library is expected to check if target directory is not * under the source directory in the file system tree. * Linux performs this check at VFS level. */ /* * If source is a directory, and it will get a new parent, user must * have write permission to it, so ".." can be modified. */ data = fuse_get_mpdata(vnode_mount(tdvp)); if (data->dataflags & FSESS_DEFAULT_PERMISSIONS && isdir && newparent) { err = fuse_internal_access(fvp, VWRITE, tcnp->cn_thread, tcnp->cn_cred); if (err) goto out; } sx_xlock(&data->rename_lock); err = fuse_internal_rename(fdvp, fcnp, tdvp, tcnp); if (err == 0) { if (tdvp != fdvp) fuse_vnode_setparent(fvp, tdvp); if (tvp != NULL) fuse_vnode_setparent(tvp, NULL); } sx_unlock(&data->rename_lock); if (tvp != NULL && tvp != fvp) { cache_purge(tvp); } if (vnode_isdir(fvp)) { if ((tvp != NULL) && vnode_isdir(tvp)) { cache_purge(tdvp); } cache_purge(fdvp); } out: if (tdvp == tvp) { vrele(tdvp); } else { vput(tdvp); } if (tvp != NULL) { vput(tvp); } vrele(fdvp); vrele(fvp); return err; } /* struct vnop_rmdir_args { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; } *ap; */ static int fuse_vnop_rmdir(struct vop_rmdir_args *ap) { struct vnode *dvp = ap->a_dvp; struct vnode *vp = ap->a_vp; int err; if (fuse_isdeadfs(vp)) { return ENXIO; } if (VTOFUD(vp) == VTOFUD(dvp)) { return EINVAL; } err = fuse_internal_remove(dvp, vp, ap->a_cnp, FUSE_RMDIR); return err; } /* struct vnop_setattr_args { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; struct thread *a_td; }; */ static int fuse_vnop_setattr(struct vop_setattr_args *ap) { struct vnode *vp = ap->a_vp; struct vattr *vap = ap->a_vap; struct ucred *cred = ap->a_cred; struct thread *td = curthread; struct mount *mp; struct fuse_data *data; struct vattr old_va; int dataflags; int err = 0, err2; accmode_t accmode = 0; bool checkperm; bool drop_suid = false; gid_t cr_gid; mp = vnode_mount(vp); data = fuse_get_mpdata(mp); dataflags = data->dataflags; checkperm = dataflags & FSESS_DEFAULT_PERMISSIONS; if (cred->cr_ngroups > 0) cr_gid = cred->cr_groups[0]; else cr_gid = 0; if (fuse_isdeadfs(vp)) { return ENXIO; } if (vap->va_uid != (uid_t)VNOVAL) { if (checkperm) { /* Only root may change a file's owner */ err = priv_check_cred(cred, PRIV_VFS_CHOWN); if (err) { /* As a special case, allow the null chown */ err2 = fuse_internal_getattr(vp, &old_va, cred, td); if (err2) return (err2); if (vap->va_uid != old_va.va_uid) return err; else accmode |= VADMIN; drop_suid = true; } else accmode |= VADMIN; } else accmode |= VADMIN; } if (vap->va_gid != (gid_t)VNOVAL) { if (checkperm && priv_check_cred(cred, PRIV_VFS_CHOWN)) drop_suid = true; if (checkperm && !groupmember(vap->va_gid, cred)) { /* * Non-root users may only chgrp to one of their own * groups */ err = priv_check_cred(cred, PRIV_VFS_CHOWN); if (err) { /* As a special case, allow the null chgrp */ err2 = fuse_internal_getattr(vp, &old_va, cred, td); if (err2) return (err2); if (vap->va_gid != old_va.va_gid) return err; accmode |= VADMIN; } else accmode |= VADMIN; } else accmode |= VADMIN; } if (vap->va_size != VNOVAL) { switch (vp->v_type) { case VDIR: return (EISDIR); case VLNK: case VREG: if (vfs_isrdonly(mp)) return (EROFS); break; default: /* * According to POSIX, the result is unspecified * for file types other than regular files, * directories and shared memory objects. We * don't support shared memory objects in the file * system, and have dubious support for truncating * symlinks. Just ignore the request in other cases. */ return (0); } /* Don't set accmode. Permission to trunc is checked upstack */ } if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) { if (vap->va_vaflags & VA_UTIMES_NULL) accmode |= VWRITE; else accmode |= VADMIN; } if (drop_suid) { if (vap->va_mode != (mode_t)VNOVAL) vap->va_mode &= ~(S_ISUID | S_ISGID); else { err = fuse_internal_getattr(vp, &old_va, cred, td); if (err) return (err); vap->va_mode = old_va.va_mode & ~(S_ISUID | S_ISGID); } } if (vap->va_mode != (mode_t)VNOVAL) { /* Only root may set the sticky bit on non-directories */ if (checkperm && vp->v_type != VDIR && (vap->va_mode & S_ISTXT) && priv_check_cred(cred, PRIV_VFS_STICKYFILE)) return EFTYPE; if (checkperm && (vap->va_mode & S_ISGID)) { err = fuse_internal_getattr(vp, &old_va, cred, td); if (err) return (err); if (!groupmember(old_va.va_gid, cred)) { err = priv_check_cred(cred, PRIV_VFS_SETGID); if (err) return (err); } } accmode |= VADMIN; } if (vfs_isrdonly(mp)) return EROFS; if (checkperm) { err = fuse_internal_access(vp, accmode, td, cred); } else { err = 0; } if (err) return err; else return fuse_internal_setattr(vp, vap, td, cred); } /* struct vnop_strategy_args { struct vnode *a_vp; struct buf *a_bp; }; */ static int fuse_vnop_strategy(struct vop_strategy_args *ap) { struct vnode *vp = ap->a_vp; struct buf *bp = ap->a_bp; if (!vp || fuse_isdeadfs(vp)) { bp->b_ioflags |= BIO_ERROR; bp->b_error = ENXIO; bufdone(bp); return 0; } /* * VOP_STRATEGY always returns zero and signals error via bp->b_ioflags. * fuse_io_strategy sets bp's error fields */ (void)fuse_io_strategy(vp, bp); return 0; } /* struct vnop_symlink_args { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; char *a_target; }; */ static int fuse_vnop_symlink(struct vop_symlink_args *ap) { struct vnode *dvp = ap->a_dvp; struct vnode **vpp = ap->a_vpp; struct componentname *cnp = ap->a_cnp; const char *target = ap->a_target; struct fuse_dispatcher fdi; int err; size_t len; if (fuse_isdeadfs(dvp)) { return ENXIO; } /* * Unlike the other creator type calls, here we have to create a message * where the name of the new entry comes first, and the data describing * the entry comes second. * Hence we can't rely on our handy fuse_internal_newentry() routine, * but put together the message manually and just call the core part. */ len = strlen(target) + 1; fdisp_init(&fdi, len + cnp->cn_namelen + 1); fdisp_make_vp(&fdi, FUSE_SYMLINK, dvp, curthread, NULL); memcpy(fdi.indata, cnp->cn_nameptr, cnp->cn_namelen); ((char *)fdi.indata)[cnp->cn_namelen] = '\0'; memcpy((char *)fdi.indata + cnp->cn_namelen + 1, target, len); err = fuse_internal_newentry_core(dvp, vpp, cnp, VLNK, &fdi); fdisp_destroy(&fdi); return err; } /* struct vnop_write_args { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; }; */ static int fuse_vnop_write(struct vop_write_args *ap) { struct vnode *vp = ap->a_vp; struct uio *uio = ap->a_uio; int ioflag = ap->a_ioflag; struct ucred *cred = ap->a_cred; pid_t pid = curthread->td_proc->p_pid; if (fuse_isdeadfs(vp)) { return ENXIO; } if (VTOFUD(vp)->flag & FN_DIRECTIO) { ioflag |= IO_DIRECT; } return fuse_io_dispatch(vp, uio, ioflag, cred, pid); } static daddr_t fuse_gbp_getblkno(struct vnode *vp, vm_ooffset_t off) { const int biosize = fuse_iosize(vp); return (off / biosize); } static int fuse_gbp_getblksz(struct vnode *vp, daddr_t lbn) { off_t filesize; int blksz, err; const int biosize = fuse_iosize(vp); err = fuse_vnode_size(vp, &filesize, NULL, NULL); KASSERT(err == 0, ("vfs_bio_getpages can't handle errors here")); if (err) return biosize; if ((off_t)lbn * biosize >= filesize) { blksz = 0; } else if ((off_t)(lbn + 1) * biosize > filesize) { blksz = filesize - (off_t)lbn *biosize; } else { blksz = biosize; } return (blksz); } /* struct vnop_getpages_args { struct vnode *a_vp; vm_page_t *a_m; int a_count; int a_reqpage; }; */ static int fuse_vnop_getpages(struct vop_getpages_args *ap) { struct vnode *vp = ap->a_vp; if (!fsess_opt_mmap(vnode_mount(vp))) { SDT_PROBE2(fusefs, , vnops, trace, 1, "called on non-cacheable vnode??\n"); return (VM_PAGER_ERROR); } return (vfs_bio_getpages(vp, ap->a_m, ap->a_count, ap->a_rbehind, ap->a_rahead, fuse_gbp_getblkno, fuse_gbp_getblksz)); } static const char extattr_namespace_separator = '.'; /* struct vop_getextattr_args { struct vop_generic_args a_gen; struct vnode *a_vp; int a_attrnamespace; const char *a_name; struct uio *a_uio; size_t *a_size; struct ucred *a_cred; struct thread *a_td; }; */ static int fuse_vnop_getextattr(struct vop_getextattr_args *ap) { struct vnode *vp = ap->a_vp; struct uio *uio = ap->a_uio; struct fuse_dispatcher fdi; struct fuse_getxattr_in *get_xattr_in; struct fuse_getxattr_out *get_xattr_out; struct mount *mp = vnode_mount(vp); struct thread *td = ap->a_td; struct ucred *cred = ap->a_cred; char *prefix; char *attr_str; size_t len; int err; if (fuse_isdeadfs(vp)) return (ENXIO); if (fsess_not_impl(mp, FUSE_GETXATTR)) return EOPNOTSUPP; err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, VREAD); if (err) return err; /* Default to looking for user attributes. */ if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM) prefix = EXTATTR_NAMESPACE_SYSTEM_STRING; else prefix = EXTATTR_NAMESPACE_USER_STRING; len = strlen(prefix) + sizeof(extattr_namespace_separator) + strlen(ap->a_name) + 1; fdisp_init(&fdi, len + sizeof(*get_xattr_in)); fdisp_make_vp(&fdi, FUSE_GETXATTR, vp, td, cred); get_xattr_in = fdi.indata; /* * Check to see whether we're querying the available size or * issuing the actual request. If we pass in 0, we get back struct * fuse_getxattr_out. If we pass in a non-zero size, we get back * that much data, without the struct fuse_getxattr_out header. */ if (uio == NULL) get_xattr_in->size = 0; else get_xattr_in->size = uio->uio_resid; attr_str = (char *)fdi.indata + sizeof(*get_xattr_in); snprintf(attr_str, len, "%s%c%s", prefix, extattr_namespace_separator, ap->a_name); err = fdisp_wait_answ(&fdi); if (err != 0) { if (err == ENOSYS) { fsess_set_notimpl(mp, FUSE_GETXATTR); err = EOPNOTSUPP; } goto out; } get_xattr_out = fdi.answ; if (ap->a_size != NULL) *ap->a_size = get_xattr_out->size; if (uio != NULL) err = uiomove(fdi.answ, fdi.iosize, uio); out: fdisp_destroy(&fdi); return (err); } /* struct vop_setextattr_args { struct vop_generic_args a_gen; struct vnode *a_vp; int a_attrnamespace; const char *a_name; struct uio *a_uio; struct ucred *a_cred; struct thread *a_td; }; */ static int fuse_vnop_setextattr(struct vop_setextattr_args *ap) { struct vnode *vp = ap->a_vp; struct uio *uio = ap->a_uio; struct fuse_dispatcher fdi; struct fuse_setxattr_in *set_xattr_in; struct mount *mp = vnode_mount(vp); struct thread *td = ap->a_td; struct ucred *cred = ap->a_cred; char *prefix; size_t len; char *attr_str; int err; if (fuse_isdeadfs(vp)) return (ENXIO); if (fsess_not_impl(mp, FUSE_SETXATTR)) return EOPNOTSUPP; if (vfs_isrdonly(mp)) return EROFS; /* Deleting xattrs must use VOP_DELETEEXTATTR instead */ if (ap->a_uio == NULL) { /* * If we got here as fallback from VOP_DELETEEXTATTR, then * return EOPNOTSUPP. */ if (fsess_not_impl(mp, FUSE_REMOVEXATTR)) return (EOPNOTSUPP); else return (EINVAL); } err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, VWRITE); if (err) return err; /* Default to looking for user attributes. */ if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM) prefix = EXTATTR_NAMESPACE_SYSTEM_STRING; else prefix = EXTATTR_NAMESPACE_USER_STRING; len = strlen(prefix) + sizeof(extattr_namespace_separator) + strlen(ap->a_name) + 1; fdisp_init(&fdi, len + sizeof(*set_xattr_in) + uio->uio_resid); fdisp_make_vp(&fdi, FUSE_SETXATTR, vp, td, cred); set_xattr_in = fdi.indata; set_xattr_in->size = uio->uio_resid; attr_str = (char *)fdi.indata + sizeof(*set_xattr_in); snprintf(attr_str, len, "%s%c%s", prefix, extattr_namespace_separator, ap->a_name); err = uiomove((char *)fdi.indata + sizeof(*set_xattr_in) + len, uio->uio_resid, uio); if (err != 0) { goto out; } err = fdisp_wait_answ(&fdi); if (err == ENOSYS) { fsess_set_notimpl(mp, FUSE_SETXATTR); err = EOPNOTSUPP; } if (err == ERESTART) { /* Can't restart after calling uiomove */ err = EINTR; } out: fdisp_destroy(&fdi); return (err); } /* * The Linux / FUSE extended attribute list is simply a collection of * NUL-terminated strings. The FreeBSD extended attribute list is a single * byte length followed by a non-NUL terminated string. So, this allows * conversion of the Linux / FUSE format to the FreeBSD format in place. * Linux attribute names are reported with the namespace as a prefix (e.g. * "user.attribute_name"), but in FreeBSD they are reported without the * namespace prefix (e.g. "attribute_name"). So, we're going from: * * user.attr_name1\0user.attr_name2\0 * * to: * * attr_name1attr_name2 * * Where "" is a single byte number of characters in the attribute name. * * Args: * prefix - exattr namespace prefix string * list, list_len - input list with namespace prefixes * bsd_list, bsd_list_len - output list compatible with bsd vfs */ static int fuse_xattrlist_convert(char *prefix, const char *list, int list_len, char *bsd_list, int *bsd_list_len) { int len, pos, dist_to_next, prefix_len; pos = 0; *bsd_list_len = 0; prefix_len = strlen(prefix); while (pos < list_len && list[pos] != '\0') { dist_to_next = strlen(&list[pos]) + 1; if (bcmp(&list[pos], prefix, prefix_len) == 0 && list[pos + prefix_len] == extattr_namespace_separator) { len = dist_to_next - (prefix_len + sizeof(extattr_namespace_separator)) - 1; if (len >= EXTATTR_MAXNAMELEN) return (ENAMETOOLONG); bsd_list[*bsd_list_len] = len; memcpy(&bsd_list[*bsd_list_len + 1], &list[pos + prefix_len + sizeof(extattr_namespace_separator)], len); *bsd_list_len += len + 1; } pos += dist_to_next; } return (0); } /* * List extended attributes * * The FUSE_LISTXATTR operation is based on Linux's listxattr(2) syscall, which * has a number of differences compared to its FreeBSD equivalent, * extattr_list_file: * * - FUSE_LISTXATTR returns all extended attributes across all namespaces, * whereas listxattr(2) only returns attributes for a single namespace * - FUSE_LISTXATTR prepends each attribute name with "namespace." * - If the provided buffer is not large enough to hold the result, * FUSE_LISTXATTR should return ERANGE, whereas listxattr is expected to * return as many results as will fit. */ /* struct vop_listextattr_args { struct vop_generic_args a_gen; struct vnode *a_vp; int a_attrnamespace; struct uio *a_uio; size_t *a_size; struct ucred *a_cred; struct thread *a_td; }; */ static int fuse_vnop_listextattr(struct vop_listextattr_args *ap) { struct vnode *vp = ap->a_vp; struct uio *uio = ap->a_uio; struct fuse_dispatcher fdi; struct fuse_listxattr_in *list_xattr_in; struct fuse_listxattr_out *list_xattr_out; struct mount *mp = vnode_mount(vp); struct thread *td = ap->a_td; struct ucred *cred = ap->a_cred; char *prefix; char *bsd_list = NULL; char *linux_list; int bsd_list_len; int linux_list_len; int err; if (fuse_isdeadfs(vp)) return (ENXIO); if (fsess_not_impl(mp, FUSE_LISTXATTR)) return EOPNOTSUPP; err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, VREAD); if (err) return err; /* * Add space for a NUL and the period separator if enabled. * Default to looking for user attributes. */ if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM) prefix = EXTATTR_NAMESPACE_SYSTEM_STRING; else prefix = EXTATTR_NAMESPACE_USER_STRING; fdisp_init(&fdi, sizeof(*list_xattr_in)); fdisp_make_vp(&fdi, FUSE_LISTXATTR, vp, td, cred); /* * Retrieve Linux / FUSE compatible list size. */ list_xattr_in = fdi.indata; list_xattr_in->size = 0; err = fdisp_wait_answ(&fdi); if (err != 0) { if (err == ENOSYS) { fsess_set_notimpl(mp, FUSE_LISTXATTR); err = EOPNOTSUPP; } goto out; } list_xattr_out = fdi.answ; linux_list_len = list_xattr_out->size; if (linux_list_len == 0) { if (ap->a_size != NULL) *ap->a_size = linux_list_len; goto out; } /* * Retrieve Linux / FUSE compatible list values. */ fdisp_refresh_vp(&fdi, FUSE_LISTXATTR, vp, td, cred); list_xattr_in = fdi.indata; list_xattr_in->size = linux_list_len; err = fdisp_wait_answ(&fdi); if (err == ERANGE) { /* * Race detected. The attribute list must've grown since the * first FUSE_LISTXATTR call. Start over. Go all the way back * to userland so we can process signals, if necessary, before * restarting. */ err = ERESTART; goto out; } else if (err != 0) goto out; linux_list = fdi.answ; /* FUSE doesn't allow the server to return more data than requested */ if (fdi.iosize > linux_list_len) { printf("WARNING: FUSE protocol violation. Server returned " "more extended attribute data than requested; " "should've returned ERANGE instead"); } else { /* But returning less data is fine */ linux_list_len = fdi.iosize; } /* * Retrieve the BSD compatible list values. * The Linux / FUSE attribute list format isn't the same * as FreeBSD's format. So we need to transform it into * FreeBSD's format before giving it to the user. */ bsd_list = malloc(linux_list_len, M_TEMP, M_WAITOK); err = fuse_xattrlist_convert(prefix, linux_list, linux_list_len, bsd_list, &bsd_list_len); if (err != 0) goto out; if (ap->a_size != NULL) *ap->a_size = bsd_list_len; if (uio != NULL) err = uiomove(bsd_list, bsd_list_len, uio); out: free(bsd_list, M_TEMP); fdisp_destroy(&fdi); return (err); } /* struct vop_deleteextattr_args { struct vop_generic_args a_gen; struct vnode *a_vp; int a_attrnamespace; const char *a_name; struct ucred *a_cred; struct thread *a_td; }; */ static int fuse_vnop_deleteextattr(struct vop_deleteextattr_args *ap) { struct vnode *vp = ap->a_vp; struct fuse_dispatcher fdi; struct mount *mp = vnode_mount(vp); struct thread *td = ap->a_td; struct ucred *cred = ap->a_cred; char *prefix; size_t len; char *attr_str; int err; if (fuse_isdeadfs(vp)) return (ENXIO); if (fsess_not_impl(mp, FUSE_REMOVEXATTR)) return EOPNOTSUPP; if (vfs_isrdonly(mp)) return EROFS; err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, VWRITE); if (err) return err; /* Default to looking for user attributes. */ if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM) prefix = EXTATTR_NAMESPACE_SYSTEM_STRING; else prefix = EXTATTR_NAMESPACE_USER_STRING; len = strlen(prefix) + sizeof(extattr_namespace_separator) + strlen(ap->a_name) + 1; fdisp_init(&fdi, len); fdisp_make_vp(&fdi, FUSE_REMOVEXATTR, vp, td, cred); attr_str = fdi.indata; snprintf(attr_str, len, "%s%c%s", prefix, extattr_namespace_separator, ap->a_name); err = fdisp_wait_answ(&fdi); if (err == ENOSYS) { fsess_set_notimpl(mp, FUSE_REMOVEXATTR); err = EOPNOTSUPP; } fdisp_destroy(&fdi); return (err); } /* struct vnop_print_args { struct vnode *a_vp; }; */ static int fuse_vnop_print(struct vop_print_args *ap) { struct fuse_vnode_data *fvdat = VTOFUD(ap->a_vp); printf("nodeid: %ju, parent nodeid: %ju, nlookup: %ju, flag: %#x\n", (uintmax_t)VTOILLU(ap->a_vp), (uintmax_t)fvdat->parent_nid, (uintmax_t)fvdat->nlookup, fvdat->flag); return 0; } /* * Get an NFS filehandle for a FUSE file. * * This will only work for FUSE file systems that guarantee the uniqueness of * nodeid:generation, which most don't. */ /* vop_vptofh { IN struct vnode *a_vp; IN struct fid *a_fhp; }; */ static int fuse_vnop_vptofh(struct vop_vptofh_args *ap) { struct vnode *vp = ap->a_vp; struct fuse_vnode_data *fvdat = VTOFUD(vp); struct fuse_fid *fhp = (struct fuse_fid *)(ap->a_fhp); _Static_assert(sizeof(struct fuse_fid) <= sizeof(struct fid), "FUSE fid type is too big"); struct mount *mp = vnode_mount(vp); struct fuse_data *data = fuse_get_mpdata(mp); struct vattr va; int err; if (!(data->dataflags & FSESS_EXPORT_SUPPORT)) return EOPNOTSUPP; err = fuse_internal_getattr(vp, &va, curthread->td_ucred, curthread); if (err) return err; /*ip = VTOI(ap->a_vp);*/ /*ufhp = (struct ufid *)ap->a_fhp;*/ fhp->len = sizeof(struct fuse_fid); fhp->nid = fvdat->nid; if (fvdat->generation <= UINT32_MAX) fhp->gen = fvdat->generation; else return EOVERFLOW; return (0); } diff --git a/tests/sys/fs/fusefs/Makefile b/tests/sys/fs/fusefs/Makefile index 8d199a53c074..2c858ff42dd1 100644 --- a/tests/sys/fs/fusefs/Makefile +++ b/tests/sys/fs/fusefs/Makefile @@ -1,86 +1,87 @@ # $FreeBSD$ .include PACKAGE= tests TESTSDIR= ${TESTSBASE}/sys/fs/fusefs # We could simply link all of these files into a single executable. But since # Kyua treats googletest programs as plain tests, it's better to separate them # out, so we get more granular reporting. GTESTS+= access GTESTS+= allow_other GTESTS+= bmap GTESTS+= cache +GTESTS+= copy_file_range GTESTS+= create GTESTS+= default_permissions GTESTS+= default_permissions_privileged GTESTS+= destroy GTESTS+= dev_fuse_poll GTESTS+= fifo GTESTS+= flush GTESTS+= forget GTESTS+= fsync GTESTS+= fsyncdir GTESTS+= getattr GTESTS+= interrupt GTESTS+= io GTESTS+= link GTESTS+= locks GTESTS+= lookup GTESTS+= lseek GTESTS+= mkdir GTESTS+= mknod GTESTS+= mount GTESTS+= nfs GTESTS+= notify GTESTS+= open GTESTS+= opendir GTESTS+= read GTESTS+= readdir GTESTS+= readlink GTESTS+= release GTESTS+= releasedir GTESTS+= rename GTESTS+= rmdir GTESTS+= setattr GTESTS+= statfs GTESTS+= symlink GTESTS+= unlink GTESTS+= write GTESTS+= xattr .for p in ${GTESTS} SRCS.$p+= ${p}.cc SRCS.$p+= getmntopts.c SRCS.$p+= mockfs.cc SRCS.$p+= utils.cc .endfor TEST_METADATA.default_permissions+= required_user="unprivileged" TEST_METADATA.default_permissions_privileged+= required_user="root" TEST_METADATA.mknod+= required_user="root" TEST_METADATA.nfs+= required_user="root" # TODO: drastically increase timeout after test development is mostly complete TEST_METADATA+= timeout=10 FUSEFS= ${SRCTOP}/sys/fs/fuse MOUNT= ${SRCTOP}/sbin/mount # Suppress warnings that GCC generates for the libc++ and gtest headers. CXXWARNFLAGS.gcc+= -Wno-placement-new -Wno-attributes .if ${COMPILER_TYPE} == "gcc" && ${COMPILER_VERSION} >= 80000 CXXWARNFLAGS+= -Wno-class-memaccess .endif CXXFLAGS+= -I${SRCTOP}/tests CXXFLAGS+= -I${FUSEFS} CXXFLAGS+= -I${MOUNT} .PATH: ${MOUNT} CXXSTD= c++14 LIBADD+= pthread LIBADD+= gmock gtest LIBADD+= util .include diff --git a/tests/sys/fs/fusefs/copy_file_range.cc b/tests/sys/fs/fusefs/copy_file_range.cc new file mode 100644 index 000000000000..bb8eecf8b862 --- /dev/null +++ b/tests/sys/fs/fusefs/copy_file_range.cc @@ -0,0 +1,401 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2020 Alan Somers + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +extern "C" { +#include +#include +#include + +#include +#include +#include +} + +#include "mockfs.hh" +#include "utils.hh" + +using namespace testing; + +class CopyFileRange: public FuseTest { +public: +static sig_atomic_t s_sigxfsz; + +void SetUp() { + s_sigxfsz = 0; + FuseTest::SetUp(); +} + +void TearDown() { + struct sigaction sa; + + bzero(&sa, sizeof(sa)); + sa.sa_handler = SIG_DFL; + sigaction(SIGXFSZ, &sa, NULL); + + FuseTest::TearDown(); +} + +void expect_maybe_lseek(uint64_t ino) +{ + EXPECT_CALL(*m_mock, process( + ResultOf([=](auto in) { + return (in.header.opcode == FUSE_LSEEK && + in.header.nodeid == ino); + }, Eq(true)), + _) + ).Times(AtMost(1)) + .WillRepeatedly(Invoke(ReturnErrno(ENOSYS))); +} + +void expect_open(uint64_t ino, uint32_t flags, int times, uint64_t fh) +{ + EXPECT_CALL(*m_mock, process( + ResultOf([=](auto in) { + return (in.header.opcode == FUSE_OPEN && + in.header.nodeid == ino); + }, Eq(true)), + _) + ).Times(times) + .WillRepeatedly(Invoke( + ReturnImmediate([=](auto in __unused, auto& out) { + out.header.len = sizeof(out.header); + SET_OUT_HEADER_LEN(out, open); + out.body.open.fh = fh; + out.body.open.open_flags = flags; + }))); +} + +void expect_write(uint64_t ino, uint64_t offset, uint64_t isize, + uint64_t osize, const void *contents) +{ + EXPECT_CALL(*m_mock, process( + ResultOf([=](auto in) { + const char *buf = (const char*)in.body.bytes + + sizeof(struct fuse_write_in); + + return (in.header.opcode == FUSE_WRITE && + in.header.nodeid == ino && + in.body.write.offset == offset && + in.body.write.size == isize && + 0 == bcmp(buf, contents, isize)); + }, Eq(true)), + _) + ).WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { + SET_OUT_HEADER_LEN(out, write); + out.body.write.size = osize; + }))); +} + +}; + +sig_atomic_t CopyFileRange::s_sigxfsz = 0; + +void sigxfsz_handler(int __unused sig) { + CopyFileRange::s_sigxfsz = 1; +} + + +class CopyFileRange_7_27: public CopyFileRange { +public: +virtual void SetUp() { + m_kernel_minor_version = 27; + CopyFileRange::SetUp(); +} +}; + +TEST_F(CopyFileRange, eio) +{ + const char FULLPATH1[] = "mountpoint/src.txt"; + const char RELPATH1[] = "src.txt"; + const char FULLPATH2[] = "mountpoint/dst.txt"; + const char RELPATH2[] = "dst.txt"; + const uint64_t ino1 = 42; + const uint64_t ino2 = 43; + const uint64_t fh1 = 0xdeadbeef1a7ebabe; + const uint64_t fh2 = 0xdeadc0de88c0ffee; + off_t fsize1 = 1 << 20; /* 1 MiB */ + off_t fsize2 = 1 << 19; /* 512 KiB */ + off_t start1 = 1 << 18; + off_t start2 = 3 << 17; + ssize_t len = 65536; + int fd1, fd2; + + expect_lookup(RELPATH1, ino1, S_IFREG | 0644, fsize1, 1); + expect_lookup(RELPATH2, ino2, S_IFREG | 0644, fsize2, 1); + expect_open(ino1, 0, 1, fh1); + expect_open(ino2, 0, 1, fh2); + EXPECT_CALL(*m_mock, process( + ResultOf([=](auto in) { + return (in.header.opcode == FUSE_COPY_FILE_RANGE && + in.header.nodeid == ino1 && + in.body.copy_file_range.fh_in == fh1 && + (off_t)in.body.copy_file_range.off_in == start1 && + in.body.copy_file_range.nodeid_out == ino2 && + in.body.copy_file_range.fh_out == fh2 && + (off_t)in.body.copy_file_range.off_out == start2 && + in.body.copy_file_range.len == (size_t)len && + in.body.copy_file_range.flags == 0); + }, Eq(true)), + _) + ).WillOnce(Invoke(ReturnErrno(EIO))); + + fd1 = open(FULLPATH1, O_RDONLY); + fd2 = open(FULLPATH2, O_WRONLY); + ASSERT_EQ(-1, copy_file_range(fd1, &start1, fd2, &start2, len, 0)); + EXPECT_EQ(EIO, errno); +} + +/* + * If the server doesn't support FUSE_COPY_FILE_RANGE, the kernel should + * fallback to a read/write based implementation. + */ +TEST_F(CopyFileRange, fallback) +{ + const char FULLPATH1[] = "mountpoint/src.txt"; + const char RELPATH1[] = "src.txt"; + const char FULLPATH2[] = "mountpoint/dst.txt"; + const char RELPATH2[] = "dst.txt"; + const uint64_t ino1 = 42; + const uint64_t ino2 = 43; + const uint64_t fh1 = 0xdeadbeef1a7ebabe; + const uint64_t fh2 = 0xdeadc0de88c0ffee; + off_t fsize2 = 0; + off_t start1 = 0; + off_t start2 = 0; + const char *contents = "Hello, world!"; + ssize_t len; + int fd1, fd2; + + len = strlen(contents); + + /* + * Ensure that we read to EOF, just so the buffer cache's read size is + * predictable. + */ + expect_lookup(RELPATH1, ino1, S_IFREG | 0644, start1 + len, 1); + expect_lookup(RELPATH2, ino2, S_IFREG | 0644, fsize2, 1); + expect_open(ino1, 0, 1, fh1); + expect_open(ino2, 0, 1, fh2); + EXPECT_CALL(*m_mock, process( + ResultOf([=](auto in) { + return (in.header.opcode == FUSE_COPY_FILE_RANGE && + in.header.nodeid == ino1 && + in.body.copy_file_range.fh_in == fh1 && + (off_t)in.body.copy_file_range.off_in == start1 && + in.body.copy_file_range.nodeid_out == ino2 && + in.body.copy_file_range.fh_out == fh2 && + (off_t)in.body.copy_file_range.off_out == start2 && + in.body.copy_file_range.len == (size_t)len && + in.body.copy_file_range.flags == 0); + }, Eq(true)), + _) + ).WillOnce(Invoke(ReturnErrno(ENOSYS))); + expect_maybe_lseek(ino1); + expect_read(ino1, start1, len, len, contents, 0); + expect_write(ino2, start2, len, len, contents); + + fd1 = open(FULLPATH1, O_RDONLY); + ASSERT_GE(fd1, 0); + fd2 = open(FULLPATH2, O_WRONLY); + ASSERT_GE(fd2, 0); + ASSERT_EQ(len, copy_file_range(fd1, &start1, fd2, &start2, len, 0)); +} + +/* fusefs should respect RLIMIT_FSIZE */ +TEST_F(CopyFileRange, rlimit_fsize) +{ + const char FULLPATH1[] = "mountpoint/src.txt"; + const char RELPATH1[] = "src.txt"; + const char FULLPATH2[] = "mountpoint/dst.txt"; + const char RELPATH2[] = "dst.txt"; + struct rlimit rl; + const uint64_t ino1 = 42; + const uint64_t ino2 = 43; + const uint64_t fh1 = 0xdeadbeef1a7ebabe; + const uint64_t fh2 = 0xdeadc0de88c0ffee; + off_t fsize1 = 1 << 20; /* 1 MiB */ + off_t fsize2 = 1 << 19; /* 512 KiB */ + off_t start1 = 1 << 18; + off_t start2 = fsize2; + ssize_t len = 65536; + int fd1, fd2; + + expect_lookup(RELPATH1, ino1, S_IFREG | 0644, fsize1, 1); + expect_lookup(RELPATH2, ino2, S_IFREG | 0644, fsize2, 1); + expect_open(ino1, 0, 1, fh1); + expect_open(ino2, 0, 1, fh2); + EXPECT_CALL(*m_mock, process( + ResultOf([=](auto in) { + return (in.header.opcode == FUSE_COPY_FILE_RANGE); + }, Eq(true)), + _) + ).Times(0); + + rl.rlim_cur = fsize2; + rl.rlim_max = 10 * fsize2; + ASSERT_EQ(0, setrlimit(RLIMIT_FSIZE, &rl)) << strerror(errno); + ASSERT_NE(SIG_ERR, signal(SIGXFSZ, sigxfsz_handler)) << strerror(errno); + + fd1 = open(FULLPATH1, O_RDONLY); + fd2 = open(FULLPATH2, O_WRONLY); + ASSERT_EQ(-1, copy_file_range(fd1, &start1, fd2, &start2, len, 0)); + EXPECT_EQ(EFBIG, errno); + EXPECT_EQ(1, s_sigxfsz); +} + +TEST_F(CopyFileRange, ok) +{ + const char FULLPATH1[] = "mountpoint/src.txt"; + const char RELPATH1[] = "src.txt"; + const char FULLPATH2[] = "mountpoint/dst.txt"; + const char RELPATH2[] = "dst.txt"; + const uint64_t ino1 = 42; + const uint64_t ino2 = 43; + const uint64_t fh1 = 0xdeadbeef1a7ebabe; + const uint64_t fh2 = 0xdeadc0de88c0ffee; + off_t fsize1 = 1 << 20; /* 1 MiB */ + off_t fsize2 = 1 << 19; /* 512 KiB */ + off_t start1 = 1 << 18; + off_t start2 = 3 << 17; + ssize_t len = 65536; + int fd1, fd2; + + expect_lookup(RELPATH1, ino1, S_IFREG | 0644, fsize1, 1); + expect_lookup(RELPATH2, ino2, S_IFREG | 0644, fsize2, 1); + expect_open(ino1, 0, 1, fh1); + expect_open(ino2, 0, 1, fh2); + EXPECT_CALL(*m_mock, process( + ResultOf([=](auto in) { + return (in.header.opcode == FUSE_COPY_FILE_RANGE && + in.header.nodeid == ino1 && + in.body.copy_file_range.fh_in == fh1 && + (off_t)in.body.copy_file_range.off_in == start1 && + in.body.copy_file_range.nodeid_out == ino2 && + in.body.copy_file_range.fh_out == fh2 && + (off_t)in.body.copy_file_range.off_out == start2 && + in.body.copy_file_range.len == (size_t)len && + in.body.copy_file_range.flags == 0); + }, Eq(true)), + _) + ).WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { + SET_OUT_HEADER_LEN(out, write); + out.body.write.size = len; + }))); + + fd1 = open(FULLPATH1, O_RDONLY); + fd2 = open(FULLPATH2, O_WRONLY); + ASSERT_EQ(len, copy_file_range(fd1, &start1, fd2, &start2, len, 0)); +} + +/* + * copy_file_range can make copies within a single file, as long as the ranges + * don't overlap. + * */ +TEST_F(CopyFileRange, same_file) +{ + const char FULLPATH[] = "mountpoint/src.txt"; + const char RELPATH[] = "src.txt"; + const uint64_t ino = 4; + const uint64_t fh = 0xdeadbeefa7ebabe; + off_t fsize = 1 << 20; /* 1 MiB */ + off_t off_in = 1 << 18; + off_t off_out = 3 << 17; + ssize_t len = 65536; + int fd; + + expect_lookup(RELPATH, ino, S_IFREG | 0644, fsize, 1); + expect_open(ino, 0, 1, fh); + EXPECT_CALL(*m_mock, process( + ResultOf([=](auto in) { + return (in.header.opcode == FUSE_COPY_FILE_RANGE && + in.header.nodeid == ino && + in.body.copy_file_range.fh_in == fh && + (off_t)in.body.copy_file_range.off_in == off_in && + in.body.copy_file_range.nodeid_out == ino && + in.body.copy_file_range.fh_out == fh && + (off_t)in.body.copy_file_range.off_out == off_out && + in.body.copy_file_range.len == (size_t)len && + in.body.copy_file_range.flags == 0); + }, Eq(true)), + _) + ).WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { + SET_OUT_HEADER_LEN(out, write); + out.body.write.size = len; + }))); + + fd = open(FULLPATH, O_RDWR); + ASSERT_EQ(len, copy_file_range(fd, &off_in, fd, &off_out, len, 0)); +} + +/* With older protocol versions, no FUSE_COPY_FILE_RANGE should be attempted */ +TEST_F(CopyFileRange_7_27, fallback) +{ + const char FULLPATH1[] = "mountpoint/src.txt"; + const char RELPATH1[] = "src.txt"; + const char FULLPATH2[] = "mountpoint/dst.txt"; + const char RELPATH2[] = "dst.txt"; + const uint64_t ino1 = 42; + const uint64_t ino2 = 43; + const uint64_t fh1 = 0xdeadbeef1a7ebabe; + const uint64_t fh2 = 0xdeadc0de88c0ffee; + off_t fsize2 = 0; + off_t start1 = 0; + off_t start2 = 0; + const char *contents = "Hello, world!"; + ssize_t len; + int fd1, fd2; + + len = strlen(contents); + + /* + * Ensure that we read to EOF, just so the buffer cache's read size is + * predictable. + */ + expect_lookup(RELPATH1, ino1, S_IFREG | 0644, start1 + len, 1); + expect_lookup(RELPATH2, ino2, S_IFREG | 0644, fsize2, 1); + expect_open(ino1, 0, 1, fh1); + expect_open(ino2, 0, 1, fh2); + EXPECT_CALL(*m_mock, process( + ResultOf([=](auto in) { + return (in.header.opcode == FUSE_COPY_FILE_RANGE); + }, Eq(true)), + _) + ).Times(0); + expect_maybe_lseek(ino1); + expect_read(ino1, start1, len, len, contents, 0); + expect_write(ino2, start2, len, len, contents); + + fd1 = open(FULLPATH1, O_RDONLY); + ASSERT_GE(fd1, 0); + fd2 = open(FULLPATH2, O_WRONLY); + ASSERT_GE(fd2, 0); + ASSERT_EQ(len, copy_file_range(fd1, &start1, fd2, &start2, len, 0)); +} + + diff --git a/tests/sys/fs/fusefs/default_permissions.cc b/tests/sys/fs/fusefs/default_permissions.cc index 368c28bbcb3f..6401f926bb49 100644 --- a/tests/sys/fs/fusefs/default_permissions.cc +++ b/tests/sys/fs/fusefs/default_permissions.cc @@ -1,1315 +1,1421 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2019 The FreeBSD Foundation * * This software was developed by BFF Storage Systems, LLC under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * Tests for the "default_permissions" mount option. They must be in their own * file so they can be run as an unprivileged user */ extern "C" { #include #include #include #include #include } #include "mockfs.hh" #include "utils.hh" using namespace testing; class DefaultPermissions: public FuseTest { virtual void SetUp() { m_default_permissions = true; FuseTest::SetUp(); if (HasFatalFailure() || IsSkipped()) return; if (geteuid() == 0) { GTEST_SKIP() << "This test requires an unprivileged user"; } /* With -o default_permissions, FUSE_ACCESS should never be called */ EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { return (in.header.opcode == FUSE_ACCESS); }, Eq(true)), _) ).Times(0); } public: void expect_chmod(uint64_t ino, mode_t mode, uint64_t size = 0) { EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { return (in.header.opcode == FUSE_SETATTR && in.header.nodeid == ino && in.body.setattr.valid == FATTR_MODE && in.body.setattr.mode == mode); }, Eq(true)), _) ).WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, attr); out.body.attr.attr.ino = ino; // Must match nodeid out.body.attr.attr.mode = S_IFREG | mode; out.body.attr.attr.size = size; out.body.attr.attr_valid = UINT64_MAX; }))); } void expect_create(const char *relpath, uint64_t ino) { EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { const char *name = (const char*)in.body.bytes + sizeof(fuse_create_in); return (in.header.opcode == FUSE_CREATE && (0 == strcmp(relpath, name))); }, Eq(true)), _) ).WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, create); out.body.create.entry.attr.mode = S_IFREG | 0644; out.body.create.entry.nodeid = ino; out.body.create.entry.entry_valid = UINT64_MAX; out.body.create.entry.attr_valid = UINT64_MAX; }))); } +void expect_copy_file_range(uint64_t ino_in, uint64_t off_in, uint64_t ino_out, + uint64_t off_out, uint64_t len) +{ + EXPECT_CALL(*m_mock, process( + ResultOf([=](auto in) { + return (in.header.opcode == FUSE_COPY_FILE_RANGE && + in.header.nodeid == ino_in && + in.body.copy_file_range.off_in == off_in && + in.body.copy_file_range.nodeid_out == ino_out && + in.body.copy_file_range.off_out == off_out && + in.body.copy_file_range.len == len); + }, Eq(true)), + _) + ).WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { + SET_OUT_HEADER_LEN(out, write); + out.body.write.size = len; + }))); +} + void expect_getattr(uint64_t ino, mode_t mode, uint64_t attr_valid, int times, uid_t uid = 0, gid_t gid = 0) { EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { return (in.header.opcode == FUSE_GETATTR && in.header.nodeid == ino); }, Eq(true)), _) ).Times(times) .WillRepeatedly(Invoke(ReturnImmediate([=](auto i __unused, auto& out) { SET_OUT_HEADER_LEN(out, attr); out.body.attr.attr.ino = ino; // Must match nodeid out.body.attr.attr.mode = mode; out.body.attr.attr.size = 0; out.body.attr.attr.uid = uid; out.body.attr.attr.gid = gid; out.body.attr.attr_valid = attr_valid; }))); } void expect_lookup(const char *relpath, uint64_t ino, mode_t mode, uint64_t attr_valid, uid_t uid = 0, gid_t gid = 0) { FuseTest::expect_lookup(relpath, ino, mode, 0, 1, attr_valid, uid, gid); } }; class Access: public DefaultPermissions {}; class Chown: public DefaultPermissions {}; class Chgrp: public DefaultPermissions {}; +class CopyFileRange: public DefaultPermissions {}; class Lookup: public DefaultPermissions {}; class Open: public DefaultPermissions {}; class Setattr: public DefaultPermissions {}; class Unlink: public DefaultPermissions {}; class Utimensat: public DefaultPermissions {}; class Write: public DefaultPermissions {}; /* * Test permission handling during create, mkdir, mknod, link, symlink, and * rename vops (they all share a common path for permission checks in * VOP_LOOKUP) */ class Create: public DefaultPermissions {}; class Deleteextattr: public DefaultPermissions { public: void expect_removexattr() { EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { return (in.header.opcode == FUSE_REMOVEXATTR); }, Eq(true)), _) ).WillOnce(Invoke(ReturnErrno(0))); } }; class Getextattr: public DefaultPermissions { public: void expect_getxattr(ProcessMockerT r) { EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { return (in.header.opcode == FUSE_GETXATTR); }, Eq(true)), _) ).WillOnce(Invoke(r)); } }; class Listextattr: public DefaultPermissions { public: void expect_listxattr() { EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { return (in.header.opcode == FUSE_LISTXATTR); }, Eq(true)), _) ).WillOnce(Invoke(ReturnImmediate([](auto i __unused, auto& out) { out.body.listxattr.size = 0; SET_OUT_HEADER_LEN(out, listxattr); }))); } }; class Rename: public DefaultPermissions { public: /* * Expect a rename and respond with the given error. Don't both to * validate arguments; the tests in rename.cc do that. */ void expect_rename(int error) { EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { return (in.header.opcode == FUSE_RENAME); }, Eq(true)), _) ).WillOnce(Invoke(ReturnErrno(error))); } }; class Setextattr: public DefaultPermissions { public: void expect_setxattr(int error) { EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { return (in.header.opcode == FUSE_SETXATTR); }, Eq(true)), _) ).WillOnce(Invoke(ReturnErrno(error))); } }; /* Return a group to which this user does not belong */ static gid_t excluded_group() { int i, ngroups = 64; gid_t newgid, groups[ngroups]; getgrouplist(getlogin(), getegid(), groups, &ngroups); for (newgid = 0; ; newgid++) { bool belongs = false; for (i = 0; i < ngroups; i++) { if (groups[i] == newgid) belongs = true; } if (!belongs) break; } /* newgid is now a group to which the current user does not belong */ return newgid; } TEST_F(Access, eacces) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; uint64_t ino = 42; mode_t access_mode = X_OK; expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0755, UINT64_MAX, 1); expect_lookup(RELPATH, ino, S_IFREG | 0644, UINT64_MAX); ASSERT_NE(0, access(FULLPATH, access_mode)); ASSERT_EQ(EACCES, errno); } TEST_F(Access, eacces_no_cached_attrs) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; uint64_t ino = 42; mode_t access_mode = X_OK; expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0755, 0, 1); expect_lookup(RELPATH, ino, S_IFREG | 0644, 0); expect_getattr(ino, S_IFREG | 0644, 0, 1); /* * Once default_permissions is properly implemented, there might be * another FUSE_GETATTR or something in here. But there should not be * a FUSE_ACCESS */ ASSERT_NE(0, access(FULLPATH, access_mode)); ASSERT_EQ(EACCES, errno); } TEST_F(Access, ok) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; uint64_t ino = 42; mode_t access_mode = R_OK; expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0755, UINT64_MAX, 1); expect_lookup(RELPATH, ino, S_IFREG | 0644, UINT64_MAX); /* * Once default_permissions is properly implemented, there might be * another FUSE_GETATTR or something in here. */ ASSERT_EQ(0, access(FULLPATH, access_mode)) << strerror(errno); } /* Unprivileged users may chown a file to their own uid */ TEST_F(Chown, chown_to_self) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const uint64_t ino = 42; const mode_t mode = 0755; uid_t uid; uid = geteuid(); expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0755, UINT64_MAX, 1, uid); expect_lookup(RELPATH, ino, S_IFREG | mode, UINT64_MAX, uid); /* The OS may optimize chown by omitting the redundant setattr */ EXPECT_CALL(*m_mock, process( ResultOf([](auto in) { return (in.header.opcode == FUSE_SETATTR); }, Eq(true)), _) ).WillRepeatedly(Invoke(ReturnImmediate([=](auto in __unused, auto& out){ SET_OUT_HEADER_LEN(out, attr); out.body.attr.attr.mode = S_IFREG | mode; out.body.attr.attr.uid = uid; }))); EXPECT_EQ(0, chown(FULLPATH, uid, -1)) << strerror(errno); } /* * A successful chown by a non-privileged non-owner should clear a file's SUID * bit */ TEST_F(Chown, clear_suid) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; uint64_t ino = 42; const mode_t oldmode = 06755; const mode_t newmode = 0755; uid_t uid = geteuid(); uint32_t valid = FATTR_UID | FATTR_MODE; expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0755, UINT64_MAX, 1, uid); expect_lookup(RELPATH, ino, S_IFREG | oldmode, UINT64_MAX, uid); EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { return (in.header.opcode == FUSE_SETATTR && in.header.nodeid == ino && in.body.setattr.valid == valid && in.body.setattr.mode == newmode); }, Eq(true)), _) ).WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, attr); out.body.attr.attr.ino = ino; // Must match nodeid out.body.attr.attr.mode = S_IFREG | newmode; out.body.attr.attr_valid = UINT64_MAX; }))); EXPECT_EQ(0, chown(FULLPATH, uid, -1)) << strerror(errno); } /* Only root may change a file's owner */ TEST_F(Chown, eperm) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const uint64_t ino = 42; const mode_t mode = 0755; expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0755, UINT64_MAX, 1, geteuid()); expect_lookup(RELPATH, ino, S_IFREG | mode, UINT64_MAX, geteuid()); EXPECT_CALL(*m_mock, process( ResultOf([](auto in) { return (in.header.opcode == FUSE_SETATTR); }, Eq(true)), _) ).Times(0); EXPECT_NE(0, chown(FULLPATH, 0, -1)); EXPECT_EQ(EPERM, errno); } /* * A successful chgrp by a non-privileged non-owner should clear a file's SUID * bit */ TEST_F(Chgrp, clear_suid) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; uint64_t ino = 42; const mode_t oldmode = 06755; const mode_t newmode = 0755; uid_t uid = geteuid(); gid_t gid = getegid(); uint32_t valid = FATTR_GID | FATTR_MODE; expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0755, UINT64_MAX, 1, uid); expect_lookup(RELPATH, ino, S_IFREG | oldmode, UINT64_MAX, uid, gid); EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { return (in.header.opcode == FUSE_SETATTR && in.header.nodeid == ino && in.body.setattr.valid == valid && in.body.setattr.mode == newmode); }, Eq(true)), _) ).WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, attr); out.body.attr.attr.ino = ino; // Must match nodeid out.body.attr.attr.mode = S_IFREG | newmode; out.body.attr.attr_valid = UINT64_MAX; }))); EXPECT_EQ(0, chown(FULLPATH, -1, gid)) << strerror(errno); } /* non-root users may only chgrp a file to a group they belong to */ TEST_F(Chgrp, eperm) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const uint64_t ino = 42; const mode_t mode = 0755; uid_t uid; gid_t gid, newgid; uid = geteuid(); gid = getegid(); newgid = excluded_group(); expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0755, UINT64_MAX, 1, uid, gid); expect_lookup(RELPATH, ino, S_IFREG | mode, UINT64_MAX, uid, gid); EXPECT_CALL(*m_mock, process( ResultOf([](auto in) { return (in.header.opcode == FUSE_SETATTR); }, Eq(true)), _) ).Times(0); EXPECT_NE(0, chown(FULLPATH, -1, newgid)); EXPECT_EQ(EPERM, errno); } TEST_F(Chgrp, ok) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const uint64_t ino = 42; const mode_t mode = 0755; uid_t uid; gid_t gid, newgid; uid = geteuid(); gid = 0; newgid = getegid(); expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0755, UINT64_MAX, 1, uid, gid); expect_lookup(RELPATH, ino, S_IFREG | mode, UINT64_MAX, uid, gid); /* The OS may optimize chgrp by omitting the redundant setattr */ EXPECT_CALL(*m_mock, process( ResultOf([](auto in) { return (in.header.opcode == FUSE_SETATTR && in.header.nodeid == ino); }, Eq(true)), _) ).WillRepeatedly(Invoke(ReturnImmediate([=](auto in __unused, auto& out){ SET_OUT_HEADER_LEN(out, attr); out.body.attr.attr.mode = S_IFREG | mode; out.body.attr.attr.uid = uid; out.body.attr.attr.gid = newgid; }))); EXPECT_EQ(0, chown(FULLPATH, -1, newgid)) << strerror(errno); } +/* A write by a non-owner should clear a file's SGID bit */ +TEST_F(CopyFileRange, clear_guid) +{ + const char FULLPATH_IN[] = "mountpoint/in.txt"; + const char RELPATH_IN[] = "in.txt"; + const char FULLPATH_OUT[] = "mountpoint/out.txt"; + const char RELPATH_OUT[] = "out.txt"; + struct stat sb; + uint64_t ino_in = 42; + uint64_t ino_out = 43; + mode_t oldmode = 02777; + mode_t newmode = 0777; + off_t fsize = 16; + off_t off_in = 0; + off_t off_out = 8; + off_t len = 8; + int fd_in, fd_out; + + expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0755, UINT64_MAX, 1); + FuseTest::expect_lookup(RELPATH_IN, ino_in, S_IFREG | oldmode, fsize, 1, + UINT64_MAX, 0, 0); + expect_open(ino_in, 0, 1); + FuseTest::expect_lookup(RELPATH_OUT, ino_out, S_IFREG | oldmode, fsize, + 1, UINT64_MAX, 0, 0); + expect_open(ino_out, 0, 1); + expect_copy_file_range(ino_in, off_in, ino_out, off_out, len); + expect_chmod(ino_out, newmode, fsize); + + fd_in = open(FULLPATH_IN, O_RDONLY); + ASSERT_LE(0, fd_in) << strerror(errno); + fd_out = open(FULLPATH_OUT, O_WRONLY); + ASSERT_LE(0, fd_out) << strerror(errno); + ASSERT_EQ(len, + copy_file_range(fd_in, &off_in, fd_out, &off_out, len, 0)) + << strerror(errno); + ASSERT_EQ(0, fstat(fd_out, &sb)) << strerror(errno); + EXPECT_EQ(S_IFREG | newmode, sb.st_mode); + ASSERT_EQ(0, fstat(fd_in, &sb)) << strerror(errno); + EXPECT_EQ(S_IFREG | oldmode, sb.st_mode); + + leak(fd_in); + leak(fd_out); +} + +/* A write by a non-owner should clear a file's SUID bit */ +TEST_F(CopyFileRange, clear_suid) +{ + const char FULLPATH_IN[] = "mountpoint/in.txt"; + const char RELPATH_IN[] = "in.txt"; + const char FULLPATH_OUT[] = "mountpoint/out.txt"; + const char RELPATH_OUT[] = "out.txt"; + struct stat sb; + uint64_t ino_in = 42; + uint64_t ino_out = 43; + mode_t oldmode = 04777; + mode_t newmode = 0777; + off_t fsize = 16; + off_t off_in = 0; + off_t off_out = 8; + off_t len = 8; + int fd_in, fd_out; + + expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0755, UINT64_MAX, 1); + FuseTest::expect_lookup(RELPATH_IN, ino_in, S_IFREG | oldmode, fsize, 1, + UINT64_MAX, 0, 0); + expect_open(ino_in, 0, 1); + FuseTest::expect_lookup(RELPATH_OUT, ino_out, S_IFREG | oldmode, fsize, + 1, UINT64_MAX, 0, 0); + expect_open(ino_out, 0, 1); + expect_copy_file_range(ino_in, off_in, ino_out, off_out, len); + expect_chmod(ino_out, newmode, fsize); + + fd_in = open(FULLPATH_IN, O_RDONLY); + ASSERT_LE(0, fd_in) << strerror(errno); + fd_out = open(FULLPATH_OUT, O_WRONLY); + ASSERT_LE(0, fd_out) << strerror(errno); + ASSERT_EQ(len, + copy_file_range(fd_in, &off_in, fd_out, &off_out, len, 0)) + << strerror(errno); + ASSERT_EQ(0, fstat(fd_out, &sb)) << strerror(errno); + EXPECT_EQ(S_IFREG | newmode, sb.st_mode); + ASSERT_EQ(0, fstat(fd_in, &sb)) << strerror(errno); + EXPECT_EQ(S_IFREG | oldmode, sb.st_mode); + + leak(fd_in); + leak(fd_out); +} + TEST_F(Create, ok) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; uint64_t ino = 42; int fd; expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0777, UINT64_MAX, 1); EXPECT_LOOKUP(FUSE_ROOT_ID, RELPATH) .WillOnce(Invoke(ReturnErrno(ENOENT))); expect_create(RELPATH, ino); fd = open(FULLPATH, O_CREAT | O_EXCL, 0644); ASSERT_LE(0, fd) << strerror(errno); leak(fd); } TEST_F(Create, eacces) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0755, UINT64_MAX, 1); EXPECT_LOOKUP(FUSE_ROOT_ID, RELPATH) .WillOnce(Invoke(ReturnErrno(ENOENT))); ASSERT_EQ(-1, open(FULLPATH, O_CREAT | O_EXCL, 0644)); EXPECT_EQ(EACCES, errno); } TEST_F(Deleteextattr, eacces) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; uint64_t ino = 42; int ns = EXTATTR_NAMESPACE_USER; expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0755, UINT64_MAX, 1); expect_lookup(RELPATH, ino, S_IFREG | 0644, UINT64_MAX, 0); ASSERT_EQ(-1, extattr_delete_file(FULLPATH, ns, "foo")); ASSERT_EQ(EACCES, errno); } TEST_F(Deleteextattr, ok) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; uint64_t ino = 42; int ns = EXTATTR_NAMESPACE_USER; expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0755, UINT64_MAX, 1); expect_lookup(RELPATH, ino, S_IFREG | 0644, UINT64_MAX, geteuid()); expect_removexattr(); ASSERT_EQ(0, extattr_delete_file(FULLPATH, ns, "foo")) << strerror(errno); } /* Delete system attributes requires superuser privilege */ TEST_F(Deleteextattr, system) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; uint64_t ino = 42; int ns = EXTATTR_NAMESPACE_SYSTEM; expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0755, UINT64_MAX, 1); expect_lookup(RELPATH, ino, S_IFREG | 0666, UINT64_MAX, geteuid()); ASSERT_EQ(-1, extattr_delete_file(FULLPATH, ns, "foo")); ASSERT_EQ(EPERM, errno); } /* Anybody with write permission can set both timestamps to UTIME_NOW */ TEST_F(Utimensat, utime_now) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const uint64_t ino = 42; /* Write permissions for everybody */ const mode_t mode = 0666; uid_t owner = 0; const timespec times[2] = { {.tv_sec = 0, .tv_nsec = UTIME_NOW}, {.tv_sec = 0, .tv_nsec = UTIME_NOW}, }; expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0755, UINT64_MAX, 1); expect_lookup(RELPATH, ino, S_IFREG | mode, UINT64_MAX, owner); EXPECT_CALL(*m_mock, process( ResultOf([](auto in) { return (in.header.opcode == FUSE_SETATTR && in.header.nodeid == ino && in.body.setattr.valid & FATTR_ATIME && in.body.setattr.valid & FATTR_MTIME); }, Eq(true)), _) ).WillOnce(Invoke(ReturnImmediate([](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, attr); out.body.attr.attr.mode = S_IFREG | mode; }))); ASSERT_EQ(0, utimensat(AT_FDCWD, FULLPATH, ×[0], 0)) << strerror(errno); } /* Anybody can set both timestamps to UTIME_OMIT */ TEST_F(Utimensat, utime_omit) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const uint64_t ino = 42; /* Write permissions for no one */ const mode_t mode = 0444; uid_t owner = 0; const timespec times[2] = { {.tv_sec = 0, .tv_nsec = UTIME_OMIT}, {.tv_sec = 0, .tv_nsec = UTIME_OMIT}, }; expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0755, UINT64_MAX, 1); expect_lookup(RELPATH, ino, S_IFREG | mode, UINT64_MAX, owner); ASSERT_EQ(0, utimensat(AT_FDCWD, FULLPATH, ×[0], 0)) << strerror(errno); } /* Deleting user attributes merely requires WRITE privilege */ TEST_F(Deleteextattr, user) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; uint64_t ino = 42; int ns = EXTATTR_NAMESPACE_USER; expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0755, UINT64_MAX, 1); expect_lookup(RELPATH, ino, S_IFREG | 0666, UINT64_MAX, 0); expect_removexattr(); ASSERT_EQ(0, extattr_delete_file(FULLPATH, ns, "foo")) << strerror(errno); } TEST_F(Getextattr, eacces) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; uint64_t ino = 42; char data[80]; int ns = EXTATTR_NAMESPACE_USER; expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0755, UINT64_MAX, 1); expect_lookup(RELPATH, ino, S_IFREG | 0600, UINT64_MAX, 0); ASSERT_EQ(-1, extattr_get_file(FULLPATH, ns, "foo", data, sizeof(data))); ASSERT_EQ(EACCES, errno); } TEST_F(Getextattr, ok) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; uint64_t ino = 42; char data[80]; const char value[] = "whatever"; ssize_t value_len = strlen(value) + 1; int ns = EXTATTR_NAMESPACE_USER; ssize_t r; expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0755, UINT64_MAX, 1); /* Getting user attributes only requires read access */ expect_lookup(RELPATH, ino, S_IFREG | 0444, UINT64_MAX, 0); expect_getxattr( ReturnImmediate([&](auto in __unused, auto& out) { memcpy((void*)out.body.bytes, value, value_len); out.header.len = sizeof(out.header) + value_len; }) ); r = extattr_get_file(FULLPATH, ns, "foo", data, sizeof(data)); ASSERT_EQ(value_len, r) << strerror(errno); EXPECT_STREQ(value, data); } /* Getting system attributes requires superuser privileges */ TEST_F(Getextattr, system) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; uint64_t ino = 42; char data[80]; int ns = EXTATTR_NAMESPACE_SYSTEM; expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0755, UINT64_MAX, 1); expect_lookup(RELPATH, ino, S_IFREG | 0666, UINT64_MAX, geteuid()); ASSERT_EQ(-1, extattr_get_file(FULLPATH, ns, "foo", data, sizeof(data))); ASSERT_EQ(EPERM, errno); } TEST_F(Listextattr, eacces) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; uint64_t ino = 42; int ns = EXTATTR_NAMESPACE_USER; expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0777, UINT64_MAX, 1); expect_lookup(RELPATH, ino, S_IFREG | 0600, UINT64_MAX, 0); ASSERT_EQ(-1, extattr_list_file(FULLPATH, ns, NULL, 0)); ASSERT_EQ(EACCES, errno); } TEST_F(Listextattr, ok) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; uint64_t ino = 42; int ns = EXTATTR_NAMESPACE_USER; expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0777, UINT64_MAX, 1); /* Listing user extended attributes merely requires read access */ expect_lookup(RELPATH, ino, S_IFREG | 0644, UINT64_MAX, 0); expect_listxattr(); ASSERT_EQ(0, extattr_list_file(FULLPATH, ns, NULL, 0)) << strerror(errno); } /* Listing system xattrs requires superuser privileges */ TEST_F(Listextattr, system) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; uint64_t ino = 42; int ns = EXTATTR_NAMESPACE_SYSTEM; expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0777, UINT64_MAX, 1); /* Listing user extended attributes merely requires read access */ expect_lookup(RELPATH, ino, S_IFREG | 0644, UINT64_MAX, geteuid()); ASSERT_EQ(-1, extattr_list_file(FULLPATH, ns, NULL, 0)); ASSERT_EQ(EPERM, errno); } /* A component of the search path lacks execute permissions */ TEST_F(Lookup, eacces) { const char FULLPATH[] = "mountpoint/some_dir/some_file.txt"; const char RELDIRPATH[] = "some_dir"; uint64_t dir_ino = 42; expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0755, UINT64_MAX, 1); expect_lookup(RELDIRPATH, dir_ino, S_IFDIR | 0700, UINT64_MAX, 0); EXPECT_EQ(-1, access(FULLPATH, F_OK)); EXPECT_EQ(EACCES, errno); } TEST_F(Open, eacces) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; uint64_t ino = 42; expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0755, UINT64_MAX, 1); expect_lookup(RELPATH, ino, S_IFREG | 0644, UINT64_MAX); EXPECT_EQ(-1, open(FULLPATH, O_RDWR)); EXPECT_EQ(EACCES, errno); } TEST_F(Open, ok) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; uint64_t ino = 42; int fd; expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0755, UINT64_MAX, 1); expect_lookup(RELPATH, ino, S_IFREG | 0644, UINT64_MAX); expect_open(ino, 0, 1); fd = open(FULLPATH, O_RDONLY); ASSERT_LE(0, fd) << strerror(errno); leak(fd); } TEST_F(Rename, eacces_on_srcdir) { const char FULLDST[] = "mountpoint/d/dst"; const char RELDST[] = "d/dst"; const char FULLSRC[] = "mountpoint/src"; const char RELSRC[] = "src"; uint64_t ino = 42; expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0755, UINT64_MAX, 1, 0); expect_lookup(RELSRC, ino, S_IFREG | 0644, UINT64_MAX); EXPECT_LOOKUP(FUSE_ROOT_ID, RELDST) .Times(AnyNumber()) .WillRepeatedly(Invoke(ReturnErrno(ENOENT))); ASSERT_EQ(-1, rename(FULLSRC, FULLDST)); ASSERT_EQ(EACCES, errno); } TEST_F(Rename, eacces_on_dstdir_for_creating) { const char FULLDST[] = "mountpoint/d/dst"; const char RELDSTDIR[] = "d"; const char RELDST[] = "dst"; const char FULLSRC[] = "mountpoint/src"; const char RELSRC[] = "src"; uint64_t src_ino = 42; uint64_t dstdir_ino = 43; expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0777, UINT64_MAX, 1, 0); expect_lookup(RELSRC, src_ino, S_IFREG | 0644, UINT64_MAX); expect_lookup(RELDSTDIR, dstdir_ino, S_IFDIR | 0755, UINT64_MAX); EXPECT_LOOKUP(dstdir_ino, RELDST).WillOnce(Invoke(ReturnErrno(ENOENT))); ASSERT_EQ(-1, rename(FULLSRC, FULLDST)); ASSERT_EQ(EACCES, errno); } TEST_F(Rename, eacces_on_dstdir_for_removing) { const char FULLDST[] = "mountpoint/d/dst"; const char RELDSTDIR[] = "d"; const char RELDST[] = "dst"; const char FULLSRC[] = "mountpoint/src"; const char RELSRC[] = "src"; uint64_t src_ino = 42; uint64_t dstdir_ino = 43; expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0777, UINT64_MAX, 1, 0); expect_lookup(RELSRC, src_ino, S_IFREG | 0644, UINT64_MAX); expect_lookup(RELDSTDIR, dstdir_ino, S_IFDIR | 0755, UINT64_MAX); EXPECT_LOOKUP(dstdir_ino, RELDST).WillOnce(Invoke(ReturnErrno(ENOENT))); ASSERT_EQ(-1, rename(FULLSRC, FULLDST)); ASSERT_EQ(EACCES, errno); } TEST_F(Rename, eperm_on_sticky_srcdir) { const char FULLDST[] = "mountpoint/d/dst"; const char FULLSRC[] = "mountpoint/src"; const char RELSRC[] = "src"; uint64_t ino = 42; expect_getattr(FUSE_ROOT_ID, S_IFDIR | 01777, UINT64_MAX, 1, 0); expect_lookup(RELSRC, ino, S_IFREG | 0644, UINT64_MAX); ASSERT_EQ(-1, rename(FULLSRC, FULLDST)); ASSERT_EQ(EPERM, errno); } /* * A user cannot move out a subdirectory that he does not own, because that * would require changing the subdirectory's ".." dirent */ TEST_F(Rename, eperm_for_subdirectory) { const char FULLDST[] = "mountpoint/d/dst"; const char FULLSRC[] = "mountpoint/src"; const char RELDSTDIR[] = "d"; const char RELDST[] = "dst"; const char RELSRC[] = "src"; uint64_t ino = 42; uint64_t dstdir_ino = 43; expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0777, UINT64_MAX, 1, 0); expect_lookup(RELSRC, ino, S_IFDIR | 0755, UINT64_MAX, 0); expect_lookup(RELDSTDIR, dstdir_ino, S_IFDIR | 0777, UINT64_MAX, 0); EXPECT_LOOKUP(dstdir_ino, RELDST).WillOnce(Invoke(ReturnErrno(ENOENT))); ASSERT_EQ(-1, rename(FULLSRC, FULLDST)); ASSERT_EQ(EACCES, errno); } /* * A user _can_ rename a subdirectory to which he lacks write permissions, if * it will keep the same parent */ TEST_F(Rename, subdirectory_to_same_dir) { const char FULLDST[] = "mountpoint/dst"; const char FULLSRC[] = "mountpoint/src"; const char RELDST[] = "dst"; const char RELSRC[] = "src"; uint64_t ino = 42; expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0777, UINT64_MAX, 1, 0); expect_lookup(RELSRC, ino, S_IFDIR | 0755, UINT64_MAX, 0); EXPECT_LOOKUP(FUSE_ROOT_ID, RELDST) .WillOnce(Invoke(ReturnErrno(ENOENT))); expect_rename(0); ASSERT_EQ(0, rename(FULLSRC, FULLDST)) << strerror(errno); } TEST_F(Rename, eperm_on_sticky_dstdir) { const char FULLDST[] = "mountpoint/d/dst"; const char RELDSTDIR[] = "d"; const char RELDST[] = "dst"; const char FULLSRC[] = "mountpoint/src"; const char RELSRC[] = "src"; uint64_t src_ino = 42; uint64_t dstdir_ino = 43; uint64_t dst_ino = 44; expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0777, UINT64_MAX, 1, 0); expect_lookup(RELSRC, src_ino, S_IFREG | 0644, UINT64_MAX); expect_lookup(RELDSTDIR, dstdir_ino, S_IFDIR | 01777, UINT64_MAX); EXPECT_LOOKUP(dstdir_ino, RELDST) .WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, entry); out.body.entry.attr.mode = S_IFREG | 0644; out.body.entry.nodeid = dst_ino; out.body.entry.attr_valid = UINT64_MAX; out.body.entry.entry_valid = UINT64_MAX; out.body.entry.attr.uid = 0; }))); ASSERT_EQ(-1, rename(FULLSRC, FULLDST)); ASSERT_EQ(EPERM, errno); } /* Successfully rename a file, overwriting the destination */ TEST_F(Rename, ok) { const char FULLDST[] = "mountpoint/dst"; const char RELDST[] = "dst"; const char FULLSRC[] = "mountpoint/src"; const char RELSRC[] = "src"; // The inode of the already-existing destination file uint64_t dst_ino = 2; uint64_t ino = 42; expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0777, UINT64_MAX, 1, geteuid()); expect_lookup(RELSRC, ino, S_IFREG | 0644, UINT64_MAX); expect_lookup(RELDST, dst_ino, S_IFREG | 0644, UINT64_MAX); expect_rename(0); ASSERT_EQ(0, rename(FULLSRC, FULLDST)) << strerror(errno); } TEST_F(Rename, ok_to_remove_src_because_of_stickiness) { const char FULLDST[] = "mountpoint/dst"; const char RELDST[] = "dst"; const char FULLSRC[] = "mountpoint/src"; const char RELSRC[] = "src"; uint64_t ino = 42; expect_getattr(FUSE_ROOT_ID, S_IFDIR | 01777, UINT64_MAX, 1, 0); expect_lookup(RELSRC, ino, S_IFREG | 0644, UINT64_MAX, geteuid()); EXPECT_LOOKUP(FUSE_ROOT_ID, RELDST) .WillOnce(Invoke(ReturnErrno(ENOENT))); expect_rename(0); ASSERT_EQ(0, rename(FULLSRC, FULLDST)) << strerror(errno); } TEST_F(Setattr, ok) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const uint64_t ino = 42; const mode_t oldmode = 0755; const mode_t newmode = 0644; expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0755, UINT64_MAX, 1); expect_lookup(RELPATH, ino, S_IFREG | oldmode, UINT64_MAX, geteuid()); EXPECT_CALL(*m_mock, process( ResultOf([](auto in) { return (in.header.opcode == FUSE_SETATTR && in.header.nodeid == ino && in.body.setattr.mode == newmode); }, Eq(true)), _) ).WillOnce(Invoke(ReturnImmediate([](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, attr); out.body.attr.attr.mode = S_IFREG | newmode; }))); EXPECT_EQ(0, chmod(FULLPATH, newmode)) << strerror(errno); } TEST_F(Setattr, eacces) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const uint64_t ino = 42; const mode_t oldmode = 0755; const mode_t newmode = 0644; expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0755, UINT64_MAX, 1); expect_lookup(RELPATH, ino, S_IFREG | oldmode, UINT64_MAX, 0); EXPECT_CALL(*m_mock, process( ResultOf([](auto in) { return (in.header.opcode == FUSE_SETATTR); }, Eq(true)), _) ).Times(0); EXPECT_NE(0, chmod(FULLPATH, newmode)); EXPECT_EQ(EPERM, errno); } /* * ftruncate() of a file without writable permissions should succeed as long as * the file descriptor is writable. This is important when combined with * O_CREAT */ TEST_F(Setattr, ftruncate_of_newly_created_file) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const uint64_t ino = 42; const mode_t mode = 0000; int fd; expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0777, UINT64_MAX, 1); EXPECT_LOOKUP(FUSE_ROOT_ID, RELPATH) .WillOnce(Invoke(ReturnErrno(ENOENT))); expect_create(RELPATH, ino); EXPECT_CALL(*m_mock, process( ResultOf([](auto in) { return (in.header.opcode == FUSE_SETATTR && in.header.nodeid == ino && (in.body.setattr.valid & FATTR_SIZE)); }, Eq(true)), _) ).WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, attr); out.body.attr.attr.ino = ino; out.body.attr.attr.mode = S_IFREG | mode; out.body.attr.attr_valid = UINT64_MAX; }))); fd = open(FULLPATH, O_CREAT | O_RDWR, 0); ASSERT_LE(0, fd) << strerror(errno); ASSERT_EQ(0, ftruncate(fd, 100)) << strerror(errno); leak(fd); } /* * Setting the sgid bit should fail for an unprivileged user who doesn't belong * to the file's group */ TEST_F(Setattr, sgid_by_non_group_member) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const uint64_t ino = 42; const mode_t oldmode = 0755; const mode_t newmode = 02755; uid_t uid = geteuid(); gid_t gid = excluded_group(); expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0755, UINT64_MAX, 1); expect_lookup(RELPATH, ino, S_IFREG | oldmode, UINT64_MAX, uid, gid); EXPECT_CALL(*m_mock, process( ResultOf([](auto in) { return (in.header.opcode == FUSE_SETATTR); }, Eq(true)), _) ).Times(0); EXPECT_NE(0, chmod(FULLPATH, newmode)); EXPECT_EQ(EPERM, errno); } /* Only the superuser may set the sticky bit on a non-directory */ TEST_F(Setattr, sticky_regular_file) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const uint64_t ino = 42; const mode_t oldmode = 0644; const mode_t newmode = 01644; expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0755, UINT64_MAX, 1); expect_lookup(RELPATH, ino, S_IFREG | oldmode, UINT64_MAX, geteuid()); EXPECT_CALL(*m_mock, process( ResultOf([](auto in) { return (in.header.opcode == FUSE_SETATTR); }, Eq(true)), _) ).Times(0); EXPECT_NE(0, chmod(FULLPATH, newmode)); EXPECT_EQ(EFTYPE, errno); } TEST_F(Setextattr, ok) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; uint64_t ino = 42; const char value[] = "whatever"; ssize_t value_len = strlen(value) + 1; int ns = EXTATTR_NAMESPACE_USER; ssize_t r; expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0755, UINT64_MAX, 1); expect_lookup(RELPATH, ino, S_IFREG | 0644, UINT64_MAX, geteuid()); expect_setxattr(0); r = extattr_set_file(FULLPATH, ns, "foo", (const void*)value, value_len); ASSERT_EQ(value_len, r) << strerror(errno); } TEST_F(Setextattr, eacces) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; uint64_t ino = 42; const char value[] = "whatever"; ssize_t value_len = strlen(value) + 1; int ns = EXTATTR_NAMESPACE_USER; expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0755, UINT64_MAX, 1); expect_lookup(RELPATH, ino, S_IFREG | 0644, UINT64_MAX, 0); ASSERT_EQ(-1, extattr_set_file(FULLPATH, ns, "foo", (const void*)value, value_len)); ASSERT_EQ(EACCES, errno); } // Setting system attributes requires superuser privileges TEST_F(Setextattr, system) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; uint64_t ino = 42; const char value[] = "whatever"; ssize_t value_len = strlen(value) + 1; int ns = EXTATTR_NAMESPACE_SYSTEM; expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0755, UINT64_MAX, 1); expect_lookup(RELPATH, ino, S_IFREG | 0666, UINT64_MAX, geteuid()); ASSERT_EQ(-1, extattr_set_file(FULLPATH, ns, "foo", (const void*)value, value_len)); ASSERT_EQ(EPERM, errno); } // Setting user attributes merely requires write privileges TEST_F(Setextattr, user) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; uint64_t ino = 42; const char value[] = "whatever"; ssize_t value_len = strlen(value) + 1; int ns = EXTATTR_NAMESPACE_USER; ssize_t r; expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0755, UINT64_MAX, 1); expect_lookup(RELPATH, ino, S_IFREG | 0666, UINT64_MAX, 0); expect_setxattr(0); r = extattr_set_file(FULLPATH, ns, "foo", (const void*)value, value_len); ASSERT_EQ(value_len, r) << strerror(errno); } TEST_F(Unlink, ok) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; uint64_t ino = 42; sem_t sem; ASSERT_EQ(0, sem_init(&sem, 0, 0)) << strerror(errno); expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0777, UINT64_MAX, 1); expect_lookup(RELPATH, ino, S_IFREG | 0644, UINT64_MAX, geteuid()); expect_unlink(FUSE_ROOT_ID, RELPATH, 0); expect_forget(ino, 1, &sem); ASSERT_EQ(0, unlink(FULLPATH)) << strerror(errno); sem_wait(&sem); sem_destroy(&sem); } /* * Ensure that a cached name doesn't cause unlink to bypass permission checks * in VOP_LOOKUP. * * This test should pass because lookup(9) purges the namecache entry by doing * a vfs_cache_lookup with ~MAKEENTRY when nameiop == DELETE. */ TEST_F(Unlink, cached_unwritable_directory) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; uint64_t ino = 42; expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0755, UINT64_MAX, 1); EXPECT_LOOKUP(FUSE_ROOT_ID, RELPATH) .Times(AnyNumber()) .WillRepeatedly(Invoke( ReturnImmediate([=](auto i __unused, auto& out) { SET_OUT_HEADER_LEN(out, entry); out.body.entry.attr.mode = S_IFREG | 0644; out.body.entry.nodeid = ino; out.body.entry.entry_valid = UINT64_MAX; })) ); /* Fill name cache */ ASSERT_EQ(0, access(FULLPATH, F_OK)) << strerror(errno); /* Despite cached name , unlink should fail */ ASSERT_EQ(-1, unlink(FULLPATH)); ASSERT_EQ(EACCES, errno); } TEST_F(Unlink, unwritable_directory) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; uint64_t ino = 42; expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0755, UINT64_MAX, 1); expect_lookup(RELPATH, ino, S_IFREG | 0644, UINT64_MAX, geteuid()); ASSERT_EQ(-1, unlink(FULLPATH)); ASSERT_EQ(EACCES, errno); } TEST_F(Unlink, sticky_directory) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; uint64_t ino = 42; expect_getattr(FUSE_ROOT_ID, S_IFDIR | 01777, UINT64_MAX, 1); expect_lookup(RELPATH, ino, S_IFREG | 0644, UINT64_MAX, 0); ASSERT_EQ(-1, unlink(FULLPATH)); ASSERT_EQ(EPERM, errno); } /* A write by a non-owner should clear a file's SUID bit */ TEST_F(Write, clear_suid) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; struct stat sb; uint64_t ino = 42; mode_t oldmode = 04777; mode_t newmode = 0777; char wbuf[1] = {'x'}; int fd; expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0755, UINT64_MAX, 1); expect_lookup(RELPATH, ino, S_IFREG | oldmode, UINT64_MAX); expect_open(ino, 0, 1); expect_write(ino, 0, sizeof(wbuf), sizeof(wbuf), 0, 0, wbuf); expect_chmod(ino, newmode, sizeof(wbuf)); fd = open(FULLPATH, O_WRONLY); ASSERT_LE(0, fd) << strerror(errno); ASSERT_EQ(1, write(fd, wbuf, sizeof(wbuf))) << strerror(errno); ASSERT_EQ(0, fstat(fd, &sb)) << strerror(errno); EXPECT_EQ(S_IFREG | newmode, sb.st_mode); leak(fd); } /* A write by a non-owner should clear a file's SGID bit */ TEST_F(Write, clear_sgid) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; struct stat sb; uint64_t ino = 42; mode_t oldmode = 02777; mode_t newmode = 0777; char wbuf[1] = {'x'}; int fd; expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0755, UINT64_MAX, 1); expect_lookup(RELPATH, ino, S_IFREG | oldmode, UINT64_MAX); expect_open(ino, 0, 1); expect_write(ino, 0, sizeof(wbuf), sizeof(wbuf), 0, 0, wbuf); expect_chmod(ino, newmode, sizeof(wbuf)); fd = open(FULLPATH, O_WRONLY); ASSERT_LE(0, fd) << strerror(errno); ASSERT_EQ(1, write(fd, wbuf, sizeof(wbuf))) << strerror(errno); ASSERT_EQ(0, fstat(fd, &sb)) << strerror(errno); EXPECT_EQ(S_IFREG | newmode, sb.st_mode); leak(fd); } /* Regression test for a specific recurse-of-nonrecursive-lock panic * * With writeback caching, we can't call vtruncbuf from fuse_io_strategy, or it * may panic. That happens if the FUSE_SETATTR response indicates that the * file's size has changed since the write. */ TEST_F(Write, recursion_panic_while_clearing_suid) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; uint64_t ino = 42; mode_t oldmode = 04777; mode_t newmode = 0777; char wbuf[1] = {'x'}; int fd; expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0755, UINT64_MAX, 1); expect_lookup(RELPATH, ino, S_IFREG | oldmode, UINT64_MAX); expect_open(ino, 0, 1); expect_write(ino, 0, sizeof(wbuf), sizeof(wbuf), 0, 0, wbuf); /* XXX Return a smaller file size than what we just wrote! */ expect_chmod(ino, newmode, 0); fd = open(FULLPATH, O_WRONLY); ASSERT_LE(0, fd) << strerror(errno); ASSERT_EQ(1, write(fd, wbuf, sizeof(wbuf))) << strerror(errno); leak(fd); } - - diff --git a/tests/sys/fs/fusefs/mockfs.cc b/tests/sys/fs/fusefs/mockfs.cc index 0124d8765eb1..7e4991fb7bb9 100644 --- a/tests/sys/fs/fusefs/mockfs.cc +++ b/tests/sys/fs/fusefs/mockfs.cc @@ -1,954 +1,974 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2019 The FreeBSD Foundation * * This software was developed by BFF Storage Systems, LLC under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ extern "C" { #include #include #include #include #include #include #include #include #include #include #include #include #include #include "mntopts.h" // for build_iovec } #include #include #include "mockfs.hh" using namespace testing; int verbosity = 0; const char* opcode2opname(uint32_t opcode) { const char* table[] = { "Unknown (opcode 0)", "LOOKUP", "FORGET", "GETATTR", "SETATTR", "READLINK", "SYMLINK", "Unknown (opcode 7)", "MKNOD", "MKDIR", "UNLINK", "RMDIR", "RENAME", "LINK", "OPEN", "READ", "WRITE", "STATFS", "RELEASE", "Unknown (opcode 19)", "FSYNC", "SETXATTR", "GETXATTR", "LISTXATTR", "REMOVEXATTR", "FLUSH", "INIT", "OPENDIR", "READDIR", "RELEASEDIR", "FSYNCDIR", "GETLK", "SETLK", "SETLKW", "ACCESS", "CREATE", "INTERRUPT", "BMAP", "DESTROY", "IOCTL", "POLL", "NOTIFY_REPLY", "BATCH_FORGET", "FALLOCATE", "READDIRPLUS", "RENAME2", "LSEEK", + "COPY_FILE_RANGE", }; if (opcode >= nitems(table)) return ("Unknown (opcode > max)"); else return (table[opcode]); } ProcessMockerT ReturnErrno(int error) { return([=](auto in, auto &out) { std::unique_ptr out0(new mockfs_buf_out); out0->header.unique = in.header.unique; out0->header.error = -error; out0->header.len = sizeof(out0->header); out.push_back(std::move(out0)); }); } /* Helper function used for returning negative cache entries for LOOKUP */ ProcessMockerT ReturnNegativeCache(const struct timespec *entry_valid) { return([=](auto in, auto &out) { /* nodeid means ENOENT and cache it */ std::unique_ptr out0(new mockfs_buf_out); out0->body.entry.nodeid = 0; out0->header.unique = in.header.unique; out0->header.error = 0; out0->body.entry.entry_valid = entry_valid->tv_sec; out0->body.entry.entry_valid_nsec = entry_valid->tv_nsec; SET_OUT_HEADER_LEN(*out0, entry); out.push_back(std::move(out0)); }); } ProcessMockerT ReturnImmediate(std::function f) { return([=](auto& in, auto &out) { std::unique_ptr out0(new mockfs_buf_out); out0->header.unique = in.header.unique; f(in, *out0); out.push_back(std::move(out0)); }); } void sigint_handler(int __unused sig) { // Don't do anything except interrupt the daemon's read(2) call } void MockFS::debug_request(const mockfs_buf_in &in, ssize_t buflen) { printf("%-11s ino=%2" PRIu64, opcode2opname(in.header.opcode), in.header.nodeid); if (verbosity > 1) { printf(" uid=%5u gid=%5u pid=%5u unique=%" PRIu64 " len=%u" " buflen=%zd", in.header.uid, in.header.gid, in.header.pid, in.header.unique, in.header.len, buflen); } switch (in.header.opcode) { const char *name, *value; case FUSE_ACCESS: printf(" mask=%#x", in.body.access.mask); break; case FUSE_BMAP: printf(" block=%" PRIx64 " blocksize=%#x", in.body.bmap.block, in.body.bmap.blocksize); break; + case FUSE_COPY_FILE_RANGE: + printf(" off_in=%" PRIu64 " ino_out=%" PRIu64 + " off_out=%" PRIu64 " size=%" PRIu64, + in.body.copy_file_range.off_in, + in.body.copy_file_range.nodeid_out, + in.body.copy_file_range.off_out, + in.body.copy_file_range.len); + if (verbosity > 1) + printf(" fh_in=%" PRIu64 " fh_out=%" PRIu64 + " flags=%" PRIx64, + in.body.copy_file_range.fh_in, + in.body.copy_file_range.fh_out, + in.body.copy_file_range.flags); + break; case FUSE_CREATE: if (m_kernel_minor_version >= 12) name = (const char*)in.body.bytes + sizeof(fuse_create_in); else name = (const char*)in.body.bytes + sizeof(fuse_open_in); printf(" flags=%#x name=%s", in.body.open.flags, name); break; case FUSE_FLUSH: printf(" fh=%#" PRIx64 " lock_owner=%" PRIu64, in.body.flush.fh, in.body.flush.lock_owner); break; case FUSE_FORGET: printf(" nlookup=%" PRIu64, in.body.forget.nlookup); break; case FUSE_FSYNC: printf(" flags=%#x", in.body.fsync.fsync_flags); break; case FUSE_FSYNCDIR: printf(" flags=%#x", in.body.fsyncdir.fsync_flags); break; case FUSE_INTERRUPT: printf(" unique=%" PRIu64, in.body.interrupt.unique); break; case FUSE_LINK: printf(" oldnodeid=%" PRIu64, in.body.link.oldnodeid); break; case FUSE_LISTXATTR: printf(" size=%" PRIu32, in.body.listxattr.size); break; case FUSE_LOOKUP: printf(" %s", in.body.lookup); break; case FUSE_LSEEK: switch (in.body.lseek.whence) { case SEEK_HOLE: printf(" SEEK_HOLE offset=%jd", in.body.lseek.offset); break; case SEEK_DATA: printf(" SEEK_DATA offset=%jd", in.body.lseek.offset); break; default: printf(" whence=%u offset=%jd", in.body.lseek.whence, in.body.lseek.offset); break; } break; case FUSE_MKDIR: name = (const char*)in.body.bytes + sizeof(fuse_mkdir_in); printf(" name=%s mode=%#o umask=%#o", name, in.body.mkdir.mode, in.body.mkdir.umask); break; case FUSE_MKNOD: if (m_kernel_minor_version >= 12) name = (const char*)in.body.bytes + sizeof(fuse_mknod_in); else name = (const char*)in.body.bytes + FUSE_COMPAT_MKNOD_IN_SIZE; printf(" mode=%#o rdev=%x umask=%#o name=%s", in.body.mknod.mode, in.body.mknod.rdev, in.body.mknod.umask, name); break; case FUSE_OPEN: printf(" flags=%#x", in.body.open.flags); break; case FUSE_OPENDIR: printf(" flags=%#x", in.body.opendir.flags); break; case FUSE_READ: printf(" offset=%" PRIu64 " size=%u", in.body.read.offset, in.body.read.size); if (verbosity > 1) printf(" flags=%#x", in.body.read.flags); break; case FUSE_READDIR: printf(" fh=%#" PRIx64 " offset=%" PRIu64 " size=%u", in.body.readdir.fh, in.body.readdir.offset, in.body.readdir.size); break; case FUSE_RELEASE: printf(" fh=%#" PRIx64 " flags=%#x lock_owner=%" PRIu64, in.body.release.fh, in.body.release.flags, in.body.release.lock_owner); break; case FUSE_SETATTR: if (verbosity <= 1) { printf(" valid=%#x", in.body.setattr.valid); break; } if (in.body.setattr.valid & FATTR_MODE) printf(" mode=%#o", in.body.setattr.mode); if (in.body.setattr.valid & FATTR_UID) printf(" uid=%u", in.body.setattr.uid); if (in.body.setattr.valid & FATTR_GID) printf(" gid=%u", in.body.setattr.gid); if (in.body.setattr.valid & FATTR_SIZE) printf(" size=%" PRIu64, in.body.setattr.size); if (in.body.setattr.valid & FATTR_ATIME) printf(" atime=%" PRIu64 ".%u", in.body.setattr.atime, in.body.setattr.atimensec); if (in.body.setattr.valid & FATTR_MTIME) printf(" mtime=%" PRIu64 ".%u", in.body.setattr.mtime, in.body.setattr.mtimensec); if (in.body.setattr.valid & FATTR_FH) printf(" fh=%" PRIu64 "", in.body.setattr.fh); break; case FUSE_SETLK: printf(" fh=%#" PRIx64 " owner=%" PRIu64 " type=%u pid=%u", in.body.setlk.fh, in.body.setlk.owner, in.body.setlk.lk.type, in.body.setlk.lk.pid); if (verbosity >= 2) { printf(" range=[%" PRIu64 "-%" PRIu64 "]", in.body.setlk.lk.start, in.body.setlk.lk.end); } break; case FUSE_SETXATTR: /* * In theory neither the xattr name and value need be * ASCII, but in this test suite they always are. */ name = (const char*)in.body.bytes + sizeof(fuse_setxattr_in); value = name + strlen(name) + 1; printf(" %s=%s", name, value); break; case FUSE_WRITE: printf(" fh=%#" PRIx64 " offset=%" PRIu64 " size=%u write_flags=%u", in.body.write.fh, in.body.write.offset, in.body.write.size, in.body.write.write_flags); if (verbosity > 1) printf(" flags=%#x", in.body.write.flags); break; default: break; } printf("\n"); } /* * Debug a FUSE response. * * This is mostly useful for asynchronous notifications, which don't correspond * to any request */ void MockFS::debug_response(const mockfs_buf_out &out) { const char *name; if (verbosity == 0) return; switch (out.header.error) { case FUSE_NOTIFY_INVAL_ENTRY: name = (const char*)out.body.bytes + sizeof(fuse_notify_inval_entry_out); printf("<- INVAL_ENTRY parent=%" PRIu64 " %s\n", out.body.inval_entry.parent, name); break; case FUSE_NOTIFY_INVAL_INODE: printf("<- INVAL_INODE ino=%" PRIu64 " off=%" PRIi64 " len=%" PRIi64 "\n", out.body.inval_inode.ino, out.body.inval_inode.off, out.body.inval_inode.len); break; case FUSE_NOTIFY_STORE: printf("<- STORE ino=%" PRIu64 " off=%" PRIu64 " size=%" PRIu32 "\n", out.body.store.nodeid, out.body.store.offset, out.body.store.size); break; default: break; } } MockFS::MockFS(int max_readahead, bool allow_other, bool default_permissions, bool push_symlinks_in, bool ro, enum poll_method pm, uint32_t flags, uint32_t kernel_minor_version, uint32_t max_write, bool async, bool noclusterr, unsigned time_gran, bool nointr) { struct sigaction sa; struct iovec *iov = NULL; int iovlen = 0; char fdstr[15]; const bool trueval = true; m_daemon_id = NULL; m_kernel_minor_version = kernel_minor_version; m_maxreadahead = max_readahead; m_maxwrite = MIN(max_write, max_max_write); m_nready = -1; m_pm = pm; m_time_gran = time_gran; m_quit = false; if (m_pm == KQ) m_kq = kqueue(); else m_kq = -1; /* * Kyua sets pwd to a testcase-unique tempdir; no need to use * mkdtemp */ /* * googletest doesn't allow ASSERT_ in constructors, so we must throw * instead. */ if (mkdir("mountpoint" , 0755) && errno != EEXIST) throw(std::system_error(errno, std::system_category(), "Couldn't make mountpoint directory")); switch (m_pm) { case BLOCKING: m_fuse_fd = open("/dev/fuse", O_CLOEXEC | O_RDWR); break; default: m_fuse_fd = open("/dev/fuse", O_CLOEXEC | O_RDWR | O_NONBLOCK); break; } if (m_fuse_fd < 0) throw(std::system_error(errno, std::system_category(), "Couldn't open /dev/fuse")); m_pid = getpid(); m_child_pid = -1; build_iovec(&iov, &iovlen, "fstype", __DECONST(void *, "fusefs"), -1); build_iovec(&iov, &iovlen, "fspath", __DECONST(void *, "mountpoint"), -1); build_iovec(&iov, &iovlen, "from", __DECONST(void *, "/dev/fuse"), -1); sprintf(fdstr, "%d", m_fuse_fd); build_iovec(&iov, &iovlen, "fd", fdstr, -1); if (allow_other) { build_iovec(&iov, &iovlen, "allow_other", __DECONST(void*, &trueval), sizeof(bool)); } if (default_permissions) { build_iovec(&iov, &iovlen, "default_permissions", __DECONST(void*, &trueval), sizeof(bool)); } if (push_symlinks_in) { build_iovec(&iov, &iovlen, "push_symlinks_in", __DECONST(void*, &trueval), sizeof(bool)); } if (ro) { build_iovec(&iov, &iovlen, "ro", __DECONST(void*, &trueval), sizeof(bool)); } if (async) { build_iovec(&iov, &iovlen, "async", __DECONST(void*, &trueval), sizeof(bool)); } if (noclusterr) { build_iovec(&iov, &iovlen, "noclusterr", __DECONST(void*, &trueval), sizeof(bool)); } if (nointr) { build_iovec(&iov, &iovlen, "nointr", __DECONST(void*, &trueval), sizeof(bool)); } else { build_iovec(&iov, &iovlen, "intr", __DECONST(void*, &trueval), sizeof(bool)); } if (nmount(iov, iovlen, 0)) throw(std::system_error(errno, std::system_category(), "Couldn't mount filesystem")); // Setup default handler ON_CALL(*this, process(_, _)) .WillByDefault(Invoke(this, &MockFS::process_default)); init(flags); bzero(&sa, sizeof(sa)); sa.sa_handler = sigint_handler; sa.sa_flags = 0; /* Don't set SA_RESTART! */ if (0 != sigaction(SIGUSR1, &sa, NULL)) throw(std::system_error(errno, std::system_category(), "Couldn't handle SIGUSR1")); if (pthread_create(&m_daemon_id, NULL, service, (void*)this)) throw(std::system_error(errno, std::system_category(), "Couldn't Couldn't start fuse thread")); } MockFS::~MockFS() { kill_daemon(); if (m_daemon_id != NULL) { pthread_join(m_daemon_id, NULL); m_daemon_id = NULL; } ::unmount("mountpoint", MNT_FORCE); rmdir("mountpoint"); if (m_kq >= 0) close(m_kq); } void MockFS::audit_request(const mockfs_buf_in &in, ssize_t buflen) { uint32_t inlen = in.header.len; size_t fih = sizeof(in.header); switch (in.header.opcode) { case FUSE_LOOKUP: case FUSE_RMDIR: case FUSE_SYMLINK: case FUSE_UNLINK: EXPECT_GT(inlen, fih) << "Missing request filename"; // No redundant information for checking buflen break; case FUSE_FORGET: EXPECT_EQ(inlen, fih + sizeof(in.body.forget)); EXPECT_EQ((size_t)buflen, inlen); break; case FUSE_GETATTR: EXPECT_EQ(inlen, fih + sizeof(in.body.getattr)); EXPECT_EQ((size_t)buflen, inlen); break; case FUSE_SETATTR: EXPECT_EQ(inlen, fih + sizeof(in.body.setattr)); EXPECT_EQ((size_t)buflen, inlen); break; case FUSE_READLINK: EXPECT_EQ(inlen, fih) << "Unexpected request body"; EXPECT_EQ((size_t)buflen, inlen); break; case FUSE_MKNOD: { size_t s; if (m_kernel_minor_version >= 12) s = sizeof(in.body.mknod); else s = FUSE_COMPAT_MKNOD_IN_SIZE; EXPECT_GE(inlen, fih + s) << "Missing request body"; EXPECT_GT(inlen, fih + s) << "Missing request filename"; // No redundant information for checking buflen break; } case FUSE_MKDIR: EXPECT_GE(inlen, fih + sizeof(in.body.mkdir)) << "Missing request body"; EXPECT_GT(inlen, fih + sizeof(in.body.mkdir)) << "Missing request filename"; // No redundant information for checking buflen break; case FUSE_RENAME: EXPECT_GE(inlen, fih + sizeof(in.body.rename)) << "Missing request body"; EXPECT_GT(inlen, fih + sizeof(in.body.rename)) << "Missing request filename"; // No redundant information for checking buflen break; case FUSE_LINK: EXPECT_GE(inlen, fih + sizeof(in.body.link)) << "Missing request body"; EXPECT_GT(inlen, fih + sizeof(in.body.link)) << "Missing request filename"; // No redundant information for checking buflen break; case FUSE_OPEN: EXPECT_EQ(inlen, fih + sizeof(in.body.open)); EXPECT_EQ((size_t)buflen, inlen); break; case FUSE_READ: EXPECT_EQ(inlen, fih + sizeof(in.body.read)); EXPECT_EQ((size_t)buflen, inlen); break; case FUSE_WRITE: { size_t s; if (m_kernel_minor_version >= 9) s = sizeof(in.body.write); else s = FUSE_COMPAT_WRITE_IN_SIZE; // I suppose a 0-byte write should be allowed EXPECT_GE(inlen, fih + s) << "Missing request body"; EXPECT_EQ((size_t)buflen, fih + s + in.body.write.size); break; } case FUSE_DESTROY: case FUSE_STATFS: EXPECT_EQ(inlen, fih); EXPECT_EQ((size_t)buflen, inlen); break; case FUSE_RELEASE: EXPECT_EQ(inlen, fih + sizeof(in.body.release)); EXPECT_EQ((size_t)buflen, inlen); break; case FUSE_FSYNC: case FUSE_FSYNCDIR: EXPECT_EQ(inlen, fih + sizeof(in.body.fsync)); EXPECT_EQ((size_t)buflen, inlen); break; case FUSE_SETXATTR: EXPECT_GE(inlen, fih + sizeof(in.body.setxattr)) << "Missing request body"; EXPECT_GT(inlen, fih + sizeof(in.body.setxattr)) << "Missing request attribute name"; // No redundant information for checking buflen break; case FUSE_GETXATTR: EXPECT_GE(inlen, fih + sizeof(in.body.getxattr)) << "Missing request body"; EXPECT_GT(inlen, fih + sizeof(in.body.getxattr)) << "Missing request attribute name"; // No redundant information for checking buflen break; case FUSE_LISTXATTR: EXPECT_EQ(inlen, fih + sizeof(in.body.listxattr)); EXPECT_EQ((size_t)buflen, inlen); break; case FUSE_REMOVEXATTR: EXPECT_GT(inlen, fih) << "Missing request attribute name"; // No redundant information for checking buflen break; case FUSE_FLUSH: EXPECT_EQ(inlen, fih + sizeof(in.body.flush)); EXPECT_EQ((size_t)buflen, inlen); break; case FUSE_INIT: EXPECT_EQ(inlen, fih + sizeof(in.body.init)); EXPECT_EQ((size_t)buflen, inlen); break; case FUSE_OPENDIR: EXPECT_EQ(inlen, fih + sizeof(in.body.opendir)); EXPECT_EQ((size_t)buflen, inlen); break; case FUSE_READDIR: EXPECT_EQ(inlen, fih + sizeof(in.body.readdir)); EXPECT_EQ((size_t)buflen, inlen); break; case FUSE_RELEASEDIR: EXPECT_EQ(inlen, fih + sizeof(in.body.releasedir)); EXPECT_EQ((size_t)buflen, inlen); break; case FUSE_GETLK: EXPECT_EQ(inlen, fih + sizeof(in.body.getlk)); EXPECT_EQ((size_t)buflen, inlen); break; case FUSE_SETLK: case FUSE_SETLKW: EXPECT_EQ(inlen, fih + sizeof(in.body.setlk)); EXPECT_EQ((size_t)buflen, inlen); break; case FUSE_ACCESS: EXPECT_EQ(inlen, fih + sizeof(in.body.access)); EXPECT_EQ((size_t)buflen, inlen); break; case FUSE_CREATE: EXPECT_GE(inlen, fih + sizeof(in.body.create)) << "Missing request body"; EXPECT_GT(inlen, fih + sizeof(in.body.create)) << "Missing request filename"; // No redundant information for checking buflen break; case FUSE_INTERRUPT: EXPECT_EQ(inlen, fih + sizeof(in.body.interrupt)); EXPECT_EQ((size_t)buflen, inlen); break; case FUSE_BMAP: EXPECT_EQ(inlen, fih + sizeof(in.body.bmap)); EXPECT_EQ((size_t)buflen, inlen); break; case FUSE_LSEEK: EXPECT_EQ(inlen, fih + sizeof(in.body.lseek)); EXPECT_EQ((size_t)buflen, inlen); break; + case FUSE_COPY_FILE_RANGE: + EXPECT_EQ(inlen, fih + sizeof(in.body.copy_file_range)); + EXPECT_EQ(0ul, in.body.copy_file_range.flags); + EXPECT_EQ((size_t)buflen, inlen); + break; case FUSE_NOTIFY_REPLY: case FUSE_BATCH_FORGET: case FUSE_FALLOCATE: case FUSE_IOCTL: case FUSE_POLL: case FUSE_READDIRPLUS: FAIL() << "Unsupported opcode?"; default: FAIL() << "Unknown opcode " << in.header.opcode; } } void MockFS::init(uint32_t flags) { ssize_t buflen; std::unique_ptr in(new mockfs_buf_in); std::unique_ptr out(new mockfs_buf_out); read_request(*in, buflen); audit_request(*in, buflen); ASSERT_EQ(FUSE_INIT, in->header.opcode); out->header.unique = in->header.unique; out->header.error = 0; out->body.init.major = FUSE_KERNEL_VERSION; out->body.init.minor = m_kernel_minor_version;; out->body.init.flags = in->body.init.flags & flags; out->body.init.max_write = m_maxwrite; out->body.init.max_readahead = m_maxreadahead; if (m_kernel_minor_version < 23) { SET_OUT_HEADER_LEN(*out, init_7_22); } else { out->body.init.time_gran = m_time_gran; SET_OUT_HEADER_LEN(*out, init); } write(m_fuse_fd, out.get(), out->header.len); } void MockFS::kill_daemon() { m_quit = true; if (m_daemon_id != NULL) pthread_kill(m_daemon_id, SIGUSR1); // Closing the /dev/fuse file descriptor first allows unmount to // succeed even if the daemon doesn't correctly respond to commands // during the unmount sequence. close(m_fuse_fd); m_fuse_fd = -1; } void MockFS::loop() { std::vector> out; std::unique_ptr in(new mockfs_buf_in); ASSERT_TRUE(in != NULL); while (!m_quit) { ssize_t buflen; bzero(in.get(), sizeof(*in)); read_request(*in, buflen); if (m_quit) break; if (verbosity > 0) debug_request(*in, buflen); audit_request(*in, buflen); if (pid_ok((pid_t)in->header.pid)) { process(*in, out); } else { /* * Reject any requests from unknown processes. Because * we actually do mount a filesystem, plenty of * unrelated system daemons may try to access it. */ if (verbosity > 1) printf("\tREJECTED (wrong pid %d)\n", in->header.pid); process_default(*in, out); } for (auto &it: out) write_response(*it); out.clear(); } } int MockFS::notify_inval_entry(ino_t parent, const char *name, size_t namelen) { std::unique_ptr out(new mockfs_buf_out); out->header.unique = 0; /* 0 means asynchronous notification */ out->header.error = FUSE_NOTIFY_INVAL_ENTRY; out->body.inval_entry.parent = parent; out->body.inval_entry.namelen = namelen; strlcpy((char*)&out->body.bytes + sizeof(out->body.inval_entry), name, sizeof(out->body.bytes) - sizeof(out->body.inval_entry)); out->header.len = sizeof(out->header) + sizeof(out->body.inval_entry) + namelen; debug_response(*out); write_response(*out); return 0; } int MockFS::notify_inval_inode(ino_t ino, off_t off, ssize_t len) { std::unique_ptr out(new mockfs_buf_out); out->header.unique = 0; /* 0 means asynchronous notification */ out->header.error = FUSE_NOTIFY_INVAL_INODE; out->body.inval_inode.ino = ino; out->body.inval_inode.off = off; out->body.inval_inode.len = len; out->header.len = sizeof(out->header) + sizeof(out->body.inval_inode); debug_response(*out); write_response(*out); return 0; } int MockFS::notify_store(ino_t ino, off_t off, const void* data, ssize_t size) { std::unique_ptr out(new mockfs_buf_out); out->header.unique = 0; /* 0 means asynchronous notification */ out->header.error = FUSE_NOTIFY_STORE; out->body.store.nodeid = ino; out->body.store.offset = off; out->body.store.size = size; bcopy(data, (char*)&out->body.bytes + sizeof(out->body.store), size); out->header.len = sizeof(out->header) + sizeof(out->body.store) + size; debug_response(*out); write_response(*out); return 0; } bool MockFS::pid_ok(pid_t pid) { if (pid == m_pid) { return (true); } else if (pid == m_child_pid) { return (true); } else { struct kinfo_proc *ki; bool ok = false; ki = kinfo_getproc(pid); if (ki == NULL) return (false); /* * Allow access by the aio daemon processes so that our tests * can use aio functions */ if (0 == strncmp("aiod", ki->ki_comm, 4)) ok = true; free(ki); return (ok); } } void MockFS::process_default(const mockfs_buf_in& in, std::vector> &out) { std::unique_ptr out0(new mockfs_buf_out); out0->header.unique = in.header.unique; out0->header.error = -EOPNOTSUPP; out0->header.len = sizeof(out0->header); out.push_back(std::move(out0)); } void MockFS::read_request(mockfs_buf_in &in, ssize_t &res) { int nready = 0; fd_set readfds; pollfd fds[1]; struct kevent changes[1]; struct kevent events[1]; struct timespec timeout_ts; struct timeval timeout_tv; const int timeout_ms = 999; int timeout_int, nfds; switch (m_pm) { case BLOCKING: break; case KQ: timeout_ts.tv_sec = 0; timeout_ts.tv_nsec = timeout_ms * 1'000'000; while (nready == 0) { EV_SET(&changes[0], m_fuse_fd, EVFILT_READ, EV_ADD, 0, 0, 0); nready = kevent(m_kq, &changes[0], 1, &events[0], 1, &timeout_ts); if (m_quit) return; } ASSERT_LE(0, nready) << strerror(errno); ASSERT_EQ(events[0].ident, (uintptr_t)m_fuse_fd); if (events[0].flags & EV_ERROR) FAIL() << strerror(events[0].data); else if (events[0].flags & EV_EOF) FAIL() << strerror(events[0].fflags); m_nready = events[0].data; break; case POLL: timeout_int = timeout_ms; fds[0].fd = m_fuse_fd; fds[0].events = POLLIN; while (nready == 0) { nready = poll(fds, 1, timeout_int); if (m_quit) return; } ASSERT_LE(0, nready) << strerror(errno); ASSERT_TRUE(fds[0].revents & POLLIN); break; case SELECT: timeout_tv.tv_sec = 0; timeout_tv.tv_usec = timeout_ms * 1'000; nfds = m_fuse_fd + 1; while (nready == 0) { FD_ZERO(&readfds); FD_SET(m_fuse_fd, &readfds); nready = select(nfds, &readfds, NULL, NULL, &timeout_tv); if (m_quit) return; } ASSERT_LE(0, nready) << strerror(errno); ASSERT_TRUE(FD_ISSET(m_fuse_fd, &readfds)); break; default: FAIL() << "not yet implemented"; } res = read(m_fuse_fd, &in, sizeof(in)); if (res < 0 && !m_quit) { m_quit = true; FAIL() << "read: " << strerror(errno); } ASSERT_TRUE(res >= static_cast(sizeof(in.header)) || m_quit); /* * Inconsistently, fuse_in_header.len is the size of the entire * request,including header, even though fuse_out_header.len excludes * the size of the header. */ ASSERT_TRUE(res == static_cast(in.header.len) || m_quit); } void MockFS::write_response(const mockfs_buf_out &out) { fd_set writefds; pollfd fds[1]; int nready, nfds; ssize_t r; switch (m_pm) { case BLOCKING: case KQ: /* EVFILT_WRITE is not supported */ break; case POLL: fds[0].fd = m_fuse_fd; fds[0].events = POLLOUT; nready = poll(fds, 1, INFTIM); ASSERT_LE(0, nready) << strerror(errno); ASSERT_EQ(1, nready) << "NULL timeout expired?"; ASSERT_TRUE(fds[0].revents & POLLOUT); break; case SELECT: FD_ZERO(&writefds); FD_SET(m_fuse_fd, &writefds); nfds = m_fuse_fd + 1; nready = select(nfds, NULL, &writefds, NULL, NULL); ASSERT_LE(0, nready) << strerror(errno); ASSERT_EQ(1, nready) << "NULL timeout expired?"; ASSERT_TRUE(FD_ISSET(m_fuse_fd, &writefds)); break; default: FAIL() << "not yet implemented"; } r = write(m_fuse_fd, &out, out.header.len); ASSERT_TRUE(r > 0 || errno == EAGAIN) << strerror(errno); } void* MockFS::service(void *pthr_data) { MockFS *mock_fs = (MockFS*)pthr_data; mock_fs->loop(); return (NULL); } void MockFS::unmount() { ::unmount("mountpoint", 0); } diff --git a/tests/sys/fs/fusefs/mockfs.hh b/tests/sys/fs/fusefs/mockfs.hh index 6fb15089bc23..a7a6a55922c7 100644 --- a/tests/sys/fs/fusefs/mockfs.hh +++ b/tests/sys/fs/fusefs/mockfs.hh @@ -1,423 +1,424 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2019 The FreeBSD Foundation * * This software was developed by BFF Storage Systems, LLC under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ extern "C" { #include #include #include "fuse_kernel.h" } #include #define TIME_T_MAX (std::numeric_limits::max()) /* * A pseudo-fuse errno used indicate that a fuse operation should have no * response, at least not immediately */ #define FUSE_NORESPONSE 9999 #define SET_OUT_HEADER_LEN(out, variant) { \ (out).header.len = (sizeof((out).header) + \ sizeof((out).body.variant)); \ } /* * Create an expectation on FUSE_LOOKUP and return it so the caller can set * actions. * * This must be a macro instead of a method because EXPECT_CALL returns a type * with a deleted constructor. */ #define EXPECT_LOOKUP(parent, path) \ EXPECT_CALL(*m_mock, process( \ ResultOf([=](auto in) { \ return (in.header.opcode == FUSE_LOOKUP && \ in.header.nodeid == (parent) && \ strcmp(in.body.lookup, (path)) == 0); \ }, Eq(true)), \ _) \ ) extern int verbosity; /* * The maximum that a test case can set max_write, limited by the buffer * supplied when reading from /dev/fuse. This limitation is imposed by * fusefs-libs, but not by the FUSE protocol. */ const uint32_t max_max_write = 0x20000; /* This struct isn't defined by fuse_kernel.h or libfuse, but it should be */ struct fuse_create_out { struct fuse_entry_out entry; struct fuse_open_out open; }; /* Protocol 7.8 version of struct fuse_attr */ struct fuse_attr_7_8 { uint64_t ino; uint64_t size; uint64_t blocks; uint64_t atime; uint64_t mtime; uint64_t ctime; uint32_t atimensec; uint32_t mtimensec; uint32_t ctimensec; uint32_t mode; uint32_t nlink; uint32_t uid; uint32_t gid; uint32_t rdev; }; /* Protocol 7.8 version of struct fuse_attr_out */ struct fuse_attr_out_7_8 { uint64_t attr_valid; uint32_t attr_valid_nsec; uint32_t dummy; struct fuse_attr_7_8 attr; }; /* Protocol 7.8 version of struct fuse_entry_out */ struct fuse_entry_out_7_8 { uint64_t nodeid; /* Inode ID */ uint64_t generation; /* Inode generation: nodeid:gen must be unique for the fs's lifetime */ uint64_t entry_valid; /* Cache timeout for the name */ uint64_t attr_valid; /* Cache timeout for the attributes */ uint32_t entry_valid_nsec; uint32_t attr_valid_nsec; struct fuse_attr_7_8 attr; }; /* Output struct for FUSE_CREATE for protocol 7.8 servers */ struct fuse_create_out_7_8 { struct fuse_entry_out_7_8 entry; struct fuse_open_out open; }; /* Output struct for FUSE_INIT for protocol 7.22 and earlier servers */ struct fuse_init_out_7_22 { uint32_t major; uint32_t minor; uint32_t max_readahead; uint32_t flags; uint16_t max_background; uint16_t congestion_threshold; uint32_t max_write; }; union fuse_payloads_in { fuse_access_in access; fuse_bmap_in bmap; /* * In fusefs-libs 3.4.2 and below the buffer size is fixed at 0x21000 * minus the header sizes. fusefs-libs 3.4.3 (and FUSE Protocol 7.29) * add a FUSE_MAX_PAGES option that allows it to be greater. * * See fuse_kern_chan.c in fusefs-libs 2.9.9 and below, or * FUSE_DEFAULT_MAX_PAGES_PER_REQ in fusefs-libs 3.4.3 and above. */ uint8_t bytes[ max_max_write + 0x1000 - sizeof(struct fuse_in_header) ]; + fuse_copy_file_range_in copy_file_range; fuse_create_in create; fuse_flush_in flush; fuse_fsync_in fsync; fuse_fsync_in fsyncdir; fuse_forget_in forget; fuse_getattr_in getattr; fuse_interrupt_in interrupt; fuse_lk_in getlk; fuse_getxattr_in getxattr; fuse_init_in init; fuse_link_in link; fuse_listxattr_in listxattr; char lookup[0]; fuse_lseek_in lseek; fuse_mkdir_in mkdir; fuse_mknod_in mknod; fuse_open_in open; fuse_open_in opendir; fuse_read_in read; fuse_read_in readdir; fuse_release_in release; fuse_release_in releasedir; fuse_rename_in rename; char rmdir[0]; fuse_setattr_in setattr; fuse_setxattr_in setxattr; fuse_lk_in setlk; fuse_lk_in setlkw; char unlink[0]; fuse_write_in write; }; struct mockfs_buf_in { fuse_in_header header; union fuse_payloads_in body; }; union fuse_payloads_out { fuse_attr_out attr; fuse_attr_out_7_8 attr_7_8; fuse_bmap_out bmap; fuse_create_out create; fuse_create_out_7_8 create_7_8; /* * The protocol places no limits on the size of bytes. Choose * a size big enough for anything we'll test. */ uint8_t bytes[0x20000]; fuse_entry_out entry; fuse_entry_out_7_8 entry_7_8; fuse_lk_out getlk; fuse_getxattr_out getxattr; fuse_init_out init; fuse_init_out_7_22 init_7_22; fuse_lseek_out lseek; /* The inval_entry structure should be followed by the entry's name */ fuse_notify_inval_entry_out inval_entry; fuse_notify_inval_inode_out inval_inode; /* The store structure should be followed by the data to store */ fuse_notify_store_out store; fuse_listxattr_out listxattr; fuse_open_out open; fuse_statfs_out statfs; /* * The protocol places no limits on the length of the string. This is * merely convenient for testing. */ char str[80]; fuse_write_out write; }; struct mockfs_buf_out { fuse_out_header header; union fuse_payloads_out body; /* Default constructor: zero everything */ mockfs_buf_out() { memset(this, 0, sizeof(*this)); } }; /* A function that can be invoked in place of MockFS::process */ typedef std::function> &out)> ProcessMockerT; /* * Helper function used for setting an error expectation for any fuse operation. * The operation will return the supplied error */ ProcessMockerT ReturnErrno(int error); /* Helper function used for returning negative cache entries for LOOKUP */ ProcessMockerT ReturnNegativeCache(const struct timespec *entry_valid); /* Helper function used for returning a single immediate response */ ProcessMockerT ReturnImmediate( std::function f); /* How the daemon should check /dev/fuse for readiness */ enum poll_method { BLOCKING, SELECT, POLL, KQ }; /* * Fake FUSE filesystem * * "Mounts" a filesystem to a temporary directory and services requests * according to the programmed expectations. * * Operates directly on the fusefs(4) kernel API, not the libfuse(3) user api. */ class MockFS { /* * thread id of the fuse daemon thread * * It must run in a separate thread so it doesn't deadlock with the * client test code. */ pthread_t m_daemon_id; /* file descriptor of /dev/fuse control device */ int m_fuse_fd; /* The minor version of the kernel API that this mock daemon targets */ uint32_t m_kernel_minor_version; int m_kq; /* The max_readahead file system option */ uint32_t m_maxreadahead; /* pid of the test process */ pid_t m_pid; /* Method the daemon should use for I/O to and from /dev/fuse */ enum poll_method m_pm; /* Timestamp granularity in nanoseconds */ unsigned m_time_gran; void audit_request(const mockfs_buf_in &in, ssize_t buflen); void debug_request(const mockfs_buf_in&, ssize_t buflen); void debug_response(const mockfs_buf_out&); /* Initialize a session after mounting */ void init(uint32_t flags); /* Is pid from a process that might be involved in the test? */ bool pid_ok(pid_t pid); /* Default request handler */ void process_default(const mockfs_buf_in&, std::vector>&); /* Entry point for the daemon thread */ static void* service(void*); /* * Read, but do not process, a single request from the kernel * * @param in Return storage for the FUSE request * @param res Return value of read(2). If positive, the amount of * data read from the fuse device. */ void read_request(mockfs_buf_in& in, ssize_t& res); /* Write a single response back to the kernel */ void write_response(const mockfs_buf_out &out); public: /* pid of child process, for two-process test cases */ pid_t m_child_pid; /* Maximum size of a FUSE_WRITE write */ uint32_t m_maxwrite; /* * Number of events that were available from /dev/fuse after the last * kevent call. Only valid when m_pm = KQ. */ int m_nready; /* Tell the daemon to shut down ASAP */ bool m_quit; /* Create a new mockfs and mount it to a tempdir */ MockFS(int max_readahead, bool allow_other, bool default_permissions, bool push_symlinks_in, bool ro, enum poll_method pm, uint32_t flags, uint32_t kernel_minor_version, uint32_t max_write, bool async, bool no_clusterr, unsigned time_gran, bool nointr); virtual ~MockFS(); /* Kill the filesystem daemon without unmounting the filesystem */ void kill_daemon(); /* Process FUSE requests endlessly */ void loop(); /* * Send an asynchronous notification to invalidate a directory entry. * Similar to libfuse's fuse_lowlevel_notify_inval_entry * * This method will block until the client has responded, so it should * generally be run in a separate thread from request processing. * * @param parent Parent directory's inode number * @param name name of dirent to invalidate * @param namelen size of name, including the NUL */ int notify_inval_entry(ino_t parent, const char *name, size_t namelen); /* * Send an asynchronous notification to invalidate an inode's cached * data and/or attributes. Similar to libfuse's * fuse_lowlevel_notify_inval_inode. * * This method will block until the client has responded, so it should * generally be run in a separate thread from request processing. * * @param ino File's inode number * @param off offset at which to begin invalidation. A * negative offset means to invalidate attributes * only. * @param len Size of region of data to invalidate. 0 means * to invalidate all cached data. */ int notify_inval_inode(ino_t ino, off_t off, ssize_t len); /* * Send an asynchronous notification to store data directly into an * inode's cache. Similar to libfuse's fuse_lowlevel_notify_store. * * This method will block until the client has responded, so it should * generally be run in a separate thread from request processing. * * @param ino File's inode number * @param off Offset at which to store data * @param data Pointer to the data to cache * @param len Size of data */ int notify_store(ino_t ino, off_t off, const void* data, ssize_t size); /* * Request handler * * This method is expected to provide the responses to each FUSE * operation. For an immediate response, push one buffer into out. * For a delayed response, push nothing. For an immediate response * plus a delayed response to an earlier operation, push two bufs. * Test cases must define each response using Googlemock expectations */ MOCK_METHOD2(process, void(const mockfs_buf_in&, std::vector>&)); /* Gracefully unmount */ void unmount(); }; diff --git a/tests/sys/fs/fusefs/write.cc b/tests/sys/fs/fusefs/write.cc index 7529b76847d7..f0f9685000fb 100644 --- a/tests/sys/fs/fusefs/write.cc +++ b/tests/sys/fs/fusefs/write.cc @@ -1,1403 +1,1402 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2019 The FreeBSD Foundation * * This software was developed by BFF Storage Systems, LLC under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ extern "C" { #include #include #include #include #include #include #include #include #include #include } #include "mockfs.hh" #include "utils.hh" using namespace testing; class Write: public FuseTest { public: static sig_atomic_t s_sigxfsz; void SetUp() { s_sigxfsz = 0; FuseTest::SetUp(); } void TearDown() { struct sigaction sa; bzero(&sa, sizeof(sa)); sa.sa_handler = SIG_DFL; sigaction(SIGXFSZ, &sa, NULL); FuseTest::TearDown(); } void expect_lookup(const char *relpath, uint64_t ino, uint64_t size) { FuseTest::expect_lookup(relpath, ino, S_IFREG | 0644, size, 1); } void expect_release(uint64_t ino, ProcessMockerT r) { EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { return (in.header.opcode == FUSE_RELEASE && in.header.nodeid == ino); }, Eq(true)), _) ).WillRepeatedly(Invoke(r)); } void expect_write(uint64_t ino, uint64_t offset, uint64_t isize, uint64_t osize, const void *contents) { FuseTest::expect_write(ino, offset, isize, osize, 0, 0, contents); } /* Expect a write that may or may not come, depending on the cache mode */ void maybe_expect_write(uint64_t ino, uint64_t offset, uint64_t size, const void *contents) { EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { const char *buf = (const char*)in.body.bytes + sizeof(struct fuse_write_in); return (in.header.opcode == FUSE_WRITE && in.header.nodeid == ino && in.body.write.offset == offset && in.body.write.size == size && 0 == bcmp(buf, contents, size)); }, Eq(true)), _) ).Times(AtMost(1)) .WillRepeatedly(Invoke( ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, write); out.body.write.size = size; }) )); } }; sig_atomic_t Write::s_sigxfsz = 0; class Write_7_8: public FuseTest { public: virtual void SetUp() { m_kernel_minor_version = 8; FuseTest::SetUp(); } void expect_lookup(const char *relpath, uint64_t ino, uint64_t size) { FuseTest::expect_lookup_7_8(relpath, ino, S_IFREG | 0644, size, 1); } }; class AioWrite: public Write { virtual void SetUp() { if (!is_unsafe_aio_enabled()) GTEST_SKIP() << "vfs.aio.enable_unsafe must be set for this test"; FuseTest::SetUp(); } }; /* Tests for the writeback cache mode */ class WriteBack: public Write { public: virtual void SetUp() { m_init_flags |= FUSE_WRITEBACK_CACHE; FuseTest::SetUp(); if (IsSkipped()) return; } void expect_write(uint64_t ino, uint64_t offset, uint64_t isize, uint64_t osize, const void *contents) { FuseTest::expect_write(ino, offset, isize, osize, FUSE_WRITE_CACHE, 0, contents); } }; class WriteBackAsync: public WriteBack { public: virtual void SetUp() { m_async = true; m_maxwrite = 65536; WriteBack::SetUp(); } }; class TimeGran: public WriteBackAsync, public WithParamInterface { public: virtual void SetUp() { m_time_gran = 1 << GetParam(); WriteBackAsync::SetUp(); } }; /* Tests for clustered writes with WriteBack cacheing */ class WriteCluster: public WriteBack { public: virtual void SetUp() { m_async = true; m_maxwrite = 1 << 25; // Anything larger than MAXPHYS will suffice WriteBack::SetUp(); if (m_maxphys < 2 * DFLTPHYS) GTEST_SKIP() << "MAXPHYS must be at least twice DFLTPHYS" << " for this test"; if (m_maxphys < 2 * m_maxbcachebuf) GTEST_SKIP() << "MAXPHYS must be at least twice maxbcachebuf" << " for this test"; } }; /* Tests relating to the server's max_write property */ class WriteMaxWrite: public Write { public: virtual void SetUp() { /* * For this test, m_maxwrite must be less than either m_maxbcachebuf or * maxphys. */ m_maxwrite = 32768; Write::SetUp(); } }; void sigxfsz_handler(int __unused sig) { Write::s_sigxfsz = 1; } /* AIO writes need to set the header's pid field correctly */ /* https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=236379 */ TEST_F(AioWrite, DISABLED_aio_write) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefgh"; uint64_t ino = 42; uint64_t offset = 4096; int fd; ssize_t bufsize = strlen(CONTENTS); struct aiocb iocb, *piocb; expect_lookup(RELPATH, ino, 0); expect_open(ino, 0, 1); expect_write(ino, offset, bufsize, bufsize, CONTENTS); fd = open(FULLPATH, O_WRONLY); ASSERT_LE(0, fd) << strerror(errno); iocb.aio_nbytes = bufsize; iocb.aio_fildes = fd; iocb.aio_buf = __DECONST(void *, CONTENTS); iocb.aio_offset = offset; iocb.aio_sigevent.sigev_notify = SIGEV_NONE; ASSERT_EQ(0, aio_write(&iocb)) << strerror(errno); ASSERT_EQ(bufsize, aio_waitcomplete(&piocb, NULL)) << strerror(errno); leak(fd); } /* * When a file is opened with O_APPEND, we should forward that flag to * FUSE_OPEN (tested by Open.o_append) but still attempt to calculate the * offset internally. That way we'll work both with filesystems that * understand O_APPEND (and ignore the offset) and filesystems that don't (and * simply use the offset). * * Note that verifying the O_APPEND flag in FUSE_OPEN is done in the * Open.o_append test. */ TEST_F(Write, append) { const ssize_t BUFSIZE = 9; const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char CONTENTS[BUFSIZE] = "abcdefgh"; uint64_t ino = 42; /* * Set offset to a maxbcachebuf boundary so we don't need to RMW when * using writeback caching */ uint64_t initial_offset = m_maxbcachebuf; int fd; expect_lookup(RELPATH, ino, initial_offset); expect_open(ino, 0, 1); expect_write(ino, initial_offset, BUFSIZE, BUFSIZE, CONTENTS); /* Must open O_RDWR or fuse(4) implicitly sets direct_io */ fd = open(FULLPATH, O_RDWR | O_APPEND); ASSERT_LE(0, fd) << strerror(errno); ASSERT_EQ(BUFSIZE, write(fd, CONTENTS, BUFSIZE)) << strerror(errno); leak(fd); } /* If a file is cached, then appending to the end should not cause a read */ TEST_F(Write, append_to_cached) { const ssize_t BUFSIZE = 9; const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; char *oldcontents, *oldbuf; const char CONTENTS[BUFSIZE] = "abcdefgh"; uint64_t ino = 42; /* * Set offset in between maxbcachebuf boundary to test buffer handling */ uint64_t oldsize = m_maxbcachebuf / 2; int fd; oldcontents = (char*)calloc(1, oldsize); ASSERT_NE(nullptr, oldcontents) << strerror(errno); oldbuf = (char*)malloc(oldsize); ASSERT_NE(nullptr, oldbuf) << strerror(errno); expect_lookup(RELPATH, ino, oldsize); expect_open(ino, 0, 1); expect_read(ino, 0, oldsize, oldsize, oldcontents); maybe_expect_write(ino, oldsize, BUFSIZE, CONTENTS); /* Must open O_RDWR or fuse(4) implicitly sets direct_io */ fd = open(FULLPATH, O_RDWR | O_APPEND); ASSERT_LE(0, fd) << strerror(errno); /* Read the old data into the cache */ ASSERT_EQ((ssize_t)oldsize, read(fd, oldbuf, oldsize)) << strerror(errno); /* Write the new data. There should be no more read operations */ ASSERT_EQ(BUFSIZE, write(fd, CONTENTS, BUFSIZE)) << strerror(errno); leak(fd); free(oldbuf); free(oldcontents); } TEST_F(Write, append_direct_io) { const ssize_t BUFSIZE = 9; const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char CONTENTS[BUFSIZE] = "abcdefgh"; uint64_t ino = 42; uint64_t initial_offset = 4096; int fd; expect_lookup(RELPATH, ino, initial_offset); expect_open(ino, FOPEN_DIRECT_IO, 1); expect_write(ino, initial_offset, BUFSIZE, BUFSIZE, CONTENTS); fd = open(FULLPATH, O_WRONLY | O_APPEND); ASSERT_LE(0, fd) << strerror(errno); ASSERT_EQ(BUFSIZE, write(fd, CONTENTS, BUFSIZE)) << strerror(errno); leak(fd); } /* A direct write should evict any overlapping cached data */ TEST_F(Write, direct_io_evicts_cache) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char CONTENTS0[] = "abcdefgh"; const char CONTENTS1[] = "ijklmnop"; uint64_t ino = 42; int fd; ssize_t bufsize = strlen(CONTENTS0) + 1; char readbuf[bufsize]; expect_lookup(RELPATH, ino, bufsize); expect_open(ino, 0, 1); expect_read(ino, 0, bufsize, bufsize, CONTENTS0); expect_write(ino, 0, bufsize, bufsize, CONTENTS1); fd = open(FULLPATH, O_RDWR); ASSERT_LE(0, fd) << strerror(errno); // Prime cache ASSERT_EQ(bufsize, read(fd, readbuf, bufsize)) << strerror(errno); // Write directly, evicting cache ASSERT_EQ(0, fcntl(fd, F_SETFL, O_DIRECT)) << strerror(errno); ASSERT_EQ(0, lseek(fd, 0, SEEK_SET)) << strerror(errno); ASSERT_EQ(bufsize, write(fd, CONTENTS1, bufsize)) << strerror(errno); // Read again. Cache should be bypassed expect_read(ino, 0, bufsize, bufsize, CONTENTS1); ASSERT_EQ(0, fcntl(fd, F_SETFL, 0)) << strerror(errno); ASSERT_EQ(0, lseek(fd, 0, SEEK_SET)) << strerror(errno); ASSERT_EQ(bufsize, read(fd, readbuf, bufsize)) << strerror(errno); ASSERT_STREQ(readbuf, CONTENTS1); leak(fd); } /* * If the server doesn't return FOPEN_DIRECT_IO during FUSE_OPEN, then it's not * allowed to return a short write for that file handle. However, if it does * then we should still do our darndest to handle it by resending the unwritten * portion. */ TEST_F(Write, indirect_io_short_write) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefghijklmnop"; uint64_t ino = 42; int fd; ssize_t bufsize = strlen(CONTENTS); ssize_t bufsize0 = 11; ssize_t bufsize1 = strlen(CONTENTS) - bufsize0; const char *contents1 = CONTENTS + bufsize0; expect_lookup(RELPATH, ino, 0); expect_open(ino, 0, 1); expect_write(ino, 0, bufsize, bufsize0, CONTENTS); expect_write(ino, bufsize0, bufsize1, bufsize1, contents1); fd = open(FULLPATH, O_WRONLY); ASSERT_LE(0, fd) << strerror(errno); ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno); leak(fd); } /* * When the direct_io option is used, filesystems are allowed to write less * data than requested. We should return the short write to userland. */ TEST_F(Write, direct_io_short_write) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefghijklmnop"; uint64_t ino = 42; int fd; ssize_t bufsize = strlen(CONTENTS); ssize_t halfbufsize = bufsize / 2; expect_lookup(RELPATH, ino, 0); expect_open(ino, FOPEN_DIRECT_IO, 1); expect_write(ino, 0, bufsize, halfbufsize, CONTENTS); fd = open(FULLPATH, O_WRONLY); ASSERT_LE(0, fd) << strerror(errno); ASSERT_EQ(halfbufsize, write(fd, CONTENTS, bufsize)) << strerror(errno); leak(fd); } /* * An insidious edge case: the filesystem returns a short write, and the * difference between what we requested and what it actually wrote crosses an * iov element boundary */ TEST_F(Write, direct_io_short_write_iov) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS0 = "abcdefgh"; const char *CONTENTS1 = "ijklmnop"; const char *EXPECTED0 = "abcdefghijklmnop"; uint64_t ino = 42; int fd; ssize_t size0 = strlen(CONTENTS0) - 1; ssize_t size1 = strlen(CONTENTS1) + 1; ssize_t totalsize = size0 + size1; struct iovec iov[2]; expect_lookup(RELPATH, ino, 0); expect_open(ino, FOPEN_DIRECT_IO, 1); expect_write(ino, 0, totalsize, size0, EXPECTED0); fd = open(FULLPATH, O_WRONLY); ASSERT_LE(0, fd) << strerror(errno); iov[0].iov_base = __DECONST(void*, CONTENTS0); iov[0].iov_len = strlen(CONTENTS0); iov[1].iov_base = __DECONST(void*, CONTENTS1); iov[1].iov_len = strlen(CONTENTS1); ASSERT_EQ(size0, writev(fd, iov, 2)) << strerror(errno); leak(fd); } /* fusefs should respect RLIMIT_FSIZE */ TEST_F(Write, rlimit_fsize) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefgh"; struct rlimit rl; ssize_t bufsize = strlen(CONTENTS); off_t offset = 1'000'000'000; uint64_t ino = 42; int fd; expect_lookup(RELPATH, ino, 0); expect_open(ino, 0, 1); rl.rlim_cur = offset; rl.rlim_max = 10 * offset; ASSERT_EQ(0, setrlimit(RLIMIT_FSIZE, &rl)) << strerror(errno); ASSERT_NE(SIG_ERR, signal(SIGXFSZ, sigxfsz_handler)) << strerror(errno); fd = open(FULLPATH, O_WRONLY); ASSERT_LE(0, fd) << strerror(errno); ASSERT_EQ(-1, pwrite(fd, CONTENTS, bufsize, offset)); EXPECT_EQ(EFBIG, errno); EXPECT_EQ(1, s_sigxfsz); leak(fd); } /* * A short read indicates EOF. Test that nothing bad happens if we get EOF * during the R of a RMW operation. */ TEST_F(Write, eof_during_rmw) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefgh"; const char *INITIAL = "XXXXXXXXXX"; uint64_t ino = 42; uint64_t offset = 1; ssize_t bufsize = strlen(CONTENTS) + 1; off_t orig_fsize = 10; off_t truncated_fsize = 5; - off_t final_fsize = bufsize; int fd; FuseTest::expect_lookup(RELPATH, ino, S_IFREG | 0644, orig_fsize, 1); expect_open(ino, 0, 1); expect_read(ino, 0, orig_fsize, truncated_fsize, INITIAL, O_RDWR); maybe_expect_write(ino, offset, bufsize, CONTENTS); fd = open(FULLPATH, O_RDWR); ASSERT_LE(0, fd) << strerror(errno); ASSERT_EQ(bufsize, pwrite(fd, CONTENTS, bufsize, offset)) << strerror(errno); leak(fd); } /* * If the kernel cannot be sure which uid, gid, or pid was responsible for a * write, then it must set the FUSE_WRITE_CACHE bit */ /* https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=236378 */ TEST_F(Write, mmap) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefgh"; uint64_t ino = 42; int fd; ssize_t bufsize = strlen(CONTENTS); void *p; uint64_t offset = 10; size_t len; void *zeros, *expected; len = getpagesize(); zeros = calloc(1, len); ASSERT_NE(nullptr, zeros); expected = calloc(1, len); ASSERT_NE(nullptr, expected); memmove((uint8_t*)expected + offset, CONTENTS, bufsize); expect_lookup(RELPATH, ino, len); expect_open(ino, 0, 1); expect_read(ino, 0, len, len, zeros); /* * Writes from the pager may or may not be associated with the correct * pid, so they must set FUSE_WRITE_CACHE. */ FuseTest::expect_write(ino, 0, len, len, FUSE_WRITE_CACHE, 0, expected); expect_flush(ino, 1, ReturnErrno(0)); expect_release(ino, ReturnErrno(0)); fd = open(FULLPATH, O_RDWR); ASSERT_LE(0, fd) << strerror(errno); p = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); ASSERT_NE(MAP_FAILED, p) << strerror(errno); memmove((uint8_t*)p + offset, CONTENTS, bufsize); ASSERT_EQ(0, munmap(p, len)) << strerror(errno); close(fd); // Write mmap'd data on close free(expected); free(zeros); leak(fd); } TEST_F(Write, pwrite) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefgh"; uint64_t ino = 42; uint64_t offset = m_maxbcachebuf; int fd; ssize_t bufsize = strlen(CONTENTS); expect_lookup(RELPATH, ino, 0); expect_open(ino, 0, 1); expect_write(ino, offset, bufsize, bufsize, CONTENTS); fd = open(FULLPATH, O_WRONLY); ASSERT_LE(0, fd) << strerror(errno); ASSERT_EQ(bufsize, pwrite(fd, CONTENTS, bufsize, offset)) << strerror(errno); leak(fd); } /* Writing a file should update its cached mtime and ctime */ TEST_F(Write, timestamps) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefgh"; ssize_t bufsize = strlen(CONTENTS); uint64_t ino = 42; struct stat sb0, sb1; int fd; expect_lookup(RELPATH, ino, 0); expect_open(ino, 0, 1); maybe_expect_write(ino, 0, bufsize, CONTENTS); fd = open(FULLPATH, O_RDWR); ASSERT_LE(0, fd) << strerror(errno); ASSERT_EQ(0, fstat(fd, &sb0)) << strerror(errno); ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno); nap(); ASSERT_EQ(0, fstat(fd, &sb1)) << strerror(errno); EXPECT_EQ(sb0.st_atime, sb1.st_atime); EXPECT_NE(sb0.st_mtime, sb1.st_mtime); EXPECT_NE(sb0.st_ctime, sb1.st_ctime); leak(fd); } TEST_F(Write, write) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefgh"; uint64_t ino = 42; int fd; ssize_t bufsize = strlen(CONTENTS); expect_lookup(RELPATH, ino, 0); expect_open(ino, 0, 1); expect_write(ino, 0, bufsize, bufsize, CONTENTS); fd = open(FULLPATH, O_WRONLY); ASSERT_LE(0, fd) << strerror(errno); ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno); leak(fd); } /* fuse(4) should not issue writes of greater size than the daemon requests */ TEST_F(WriteMaxWrite, write) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; int *contents; uint64_t ino = 42; int fd; ssize_t halfbufsize, bufsize; halfbufsize = m_mock->m_maxwrite; if (halfbufsize >= m_maxbcachebuf || halfbufsize >= m_maxphys) GTEST_SKIP() << "Must lower m_maxwrite for this test"; bufsize = halfbufsize * 2; contents = (int*)malloc(bufsize); ASSERT_NE(nullptr, contents); for (int i = 0; i < (int)bufsize / (int)sizeof(i); i++) { contents[i] = i; } expect_lookup(RELPATH, ino, 0); expect_open(ino, 0, 1); maybe_expect_write(ino, 0, halfbufsize, contents); maybe_expect_write(ino, halfbufsize, halfbufsize, &contents[halfbufsize / sizeof(int)]); fd = open(FULLPATH, O_WRONLY); ASSERT_LE(0, fd) << strerror(errno); ASSERT_EQ(bufsize, write(fd, contents, bufsize)) << strerror(errno); leak(fd); free(contents); } TEST_F(Write, write_nothing) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = ""; uint64_t ino = 42; int fd; ssize_t bufsize = 0; expect_lookup(RELPATH, ino, 0); expect_open(ino, 0, 1); fd = open(FULLPATH, O_WRONLY); ASSERT_LE(0, fd) << strerror(errno); ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno); leak(fd); } TEST_F(Write_7_8, write) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefgh"; uint64_t ino = 42; int fd; ssize_t bufsize = strlen(CONTENTS); expect_lookup(RELPATH, ino, 0); expect_open(ino, 0, 1); expect_write_7_8(ino, 0, bufsize, bufsize, CONTENTS); fd = open(FULLPATH, O_WRONLY); ASSERT_LE(0, fd) << strerror(errno); ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno); leak(fd); } /* In writeback mode, dirty data should be written on close */ TEST_F(WriteBackAsync, close) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefgh"; uint64_t ino = 42; int fd; ssize_t bufsize = strlen(CONTENTS); expect_lookup(RELPATH, ino, 0); expect_open(ino, 0, 1); expect_write(ino, 0, bufsize, bufsize, CONTENTS); EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { return (in.header.opcode == FUSE_SETATTR); }, Eq(true)), _) ).WillRepeatedly(Invoke(ReturnImmediate([=](auto i __unused, auto& out) { SET_OUT_HEADER_LEN(out, attr); out.body.attr.attr.ino = ino; // Must match nodeid }))); expect_flush(ino, 1, ReturnErrno(0)); expect_release(ino, ReturnErrno(0)); fd = open(FULLPATH, O_RDWR); ASSERT_LE(0, fd) << strerror(errno); ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno); close(fd); } /* In writeback mode, adjacent writes will be clustered together */ TEST_F(WriteCluster, clustering) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; uint64_t ino = 42; int i, fd; void *wbuf, *wbuf2x; ssize_t bufsize = m_maxbcachebuf; off_t filesize = 5 * bufsize; wbuf = malloc(bufsize); ASSERT_NE(nullptr, wbuf) << strerror(errno); memset(wbuf, 'X', bufsize); wbuf2x = malloc(2 * bufsize); ASSERT_NE(nullptr, wbuf2x) << strerror(errno); memset(wbuf2x, 'X', 2 * bufsize); expect_lookup(RELPATH, ino, filesize); expect_open(ino, 0, 1); /* * Writes of bufsize-bytes each should be clustered into greater sizes. * The amount of clustering is adaptive, so the first write actually * issued will be 2x bufsize and subsequent writes may be larger */ expect_write(ino, 0, 2 * bufsize, 2 * bufsize, wbuf2x); expect_write(ino, 2 * bufsize, 2 * bufsize, 2 * bufsize, wbuf2x); expect_flush(ino, 1, ReturnErrno(0)); expect_release(ino, ReturnErrno(0)); fd = open(FULLPATH, O_RDWR); ASSERT_LE(0, fd) << strerror(errno); for (i = 0; i < 4; i++) { ASSERT_EQ(bufsize, write(fd, wbuf, bufsize)) << strerror(errno); } close(fd); free(wbuf2x); free(wbuf); } /* * When clustering writes, an I/O error to any of the cluster's children should * not panic the system on unmount */ /* * Disabled because it panics. * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=238565 */ TEST_F(WriteCluster, DISABLED_cluster_write_err) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; uint64_t ino = 42; int i, fd; void *wbuf; ssize_t bufsize = m_maxbcachebuf; off_t filesize = 4 * bufsize; wbuf = malloc(bufsize); ASSERT_NE(nullptr, wbuf) << strerror(errno); memset(wbuf, 'X', bufsize); expect_lookup(RELPATH, ino, filesize); expect_open(ino, 0, 1); EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { return (in.header.opcode == FUSE_WRITE); }, Eq(true)), _) ).WillRepeatedly(Invoke(ReturnErrno(EIO))); expect_flush(ino, 1, ReturnErrno(0)); expect_release(ino, ReturnErrno(0)); fd = open(FULLPATH, O_RDWR); ASSERT_LE(0, fd) << strerror(errno); for (i = 0; i < 3; i++) { ASSERT_EQ(bufsize, write(fd, wbuf, bufsize)) << strerror(errno); } close(fd); free(wbuf); } /* * In writeback mode, writes to an O_WRONLY file could trigger reads from the * server. The FUSE protocol explicitly allows that. */ TEST_F(WriteBack, rmw) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefgh"; const char *INITIAL = "XXXXXXXXXX"; uint64_t ino = 42; uint64_t offset = 1; off_t fsize = 10; int fd; ssize_t bufsize = strlen(CONTENTS); FuseTest::expect_lookup(RELPATH, ino, S_IFREG | 0644, fsize, 1); expect_open(ino, 0, 1); expect_read(ino, 0, fsize, fsize, INITIAL, O_WRONLY); maybe_expect_write(ino, offset, bufsize, CONTENTS); fd = open(FULLPATH, O_WRONLY); ASSERT_LE(0, fd) << strerror(errno); ASSERT_EQ(bufsize, pwrite(fd, CONTENTS, bufsize, offset)) << strerror(errno); leak(fd); } /* * Without direct_io, writes should be committed to cache */ TEST_F(WriteBack, cache) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefgh"; uint64_t ino = 42; int fd; ssize_t bufsize = strlen(CONTENTS); uint8_t readbuf[bufsize]; expect_lookup(RELPATH, ino, 0); expect_open(ino, 0, 1); expect_write(ino, 0, bufsize, bufsize, CONTENTS); fd = open(FULLPATH, O_RDWR); ASSERT_LE(0, fd) << strerror(errno); ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno); /* * A subsequent read should be serviced by cache, without querying the * filesystem daemon */ ASSERT_EQ(0, lseek(fd, 0, SEEK_SET)) << strerror(errno); ASSERT_EQ(bufsize, read(fd, readbuf, bufsize)) << strerror(errno); leak(fd); } /* * With O_DIRECT, writes should be not committed to cache. Admittedly this is * an odd test, because it would be unusual to use O_DIRECT for writes but not * reads. */ TEST_F(WriteBack, o_direct) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefgh"; uint64_t ino = 42; int fd; ssize_t bufsize = strlen(CONTENTS); uint8_t readbuf[bufsize]; expect_lookup(RELPATH, ino, 0); expect_open(ino, 0, 1); FuseTest::expect_write(ino, 0, bufsize, bufsize, 0, FUSE_WRITE_CACHE, CONTENTS); expect_read(ino, 0, bufsize, bufsize, CONTENTS); fd = open(FULLPATH, O_RDWR | O_DIRECT); ASSERT_LE(0, fd) << strerror(errno); ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno); /* A subsequent read must query the daemon because cache is empty */ ASSERT_EQ(0, lseek(fd, 0, SEEK_SET)) << strerror(errno); ASSERT_EQ(0, fcntl(fd, F_SETFL, 0)) << strerror(errno); ASSERT_EQ(bufsize, read(fd, readbuf, bufsize)) << strerror(errno); leak(fd); } TEST_F(WriteBack, direct_io) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefgh"; uint64_t ino = 42; int fd; ssize_t bufsize = strlen(CONTENTS); uint8_t readbuf[bufsize]; expect_lookup(RELPATH, ino, 0); expect_open(ino, FOPEN_DIRECT_IO, 1); FuseTest::expect_write(ino, 0, bufsize, bufsize, 0, FUSE_WRITE_CACHE, CONTENTS); expect_read(ino, 0, bufsize, bufsize, CONTENTS); fd = open(FULLPATH, O_RDWR); ASSERT_LE(0, fd) << strerror(errno); ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno); /* A subsequent read must query the daemon because cache is empty */ ASSERT_EQ(0, lseek(fd, 0, SEEK_SET)) << strerror(errno); ASSERT_EQ(0, fcntl(fd, F_SETFL, 0)) << strerror(errno); ASSERT_EQ(bufsize, read(fd, readbuf, bufsize)) << strerror(errno); leak(fd); } /* * mmap should still be possible even if the server used direct_io. Mmap will * still use the cache, though. * * Regression test for bug 247276 * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=247276 */ TEST_F(WriteBack, mmap_direct_io) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefgh"; uint64_t ino = 42; int fd; size_t len; ssize_t bufsize = strlen(CONTENTS); void *p, *zeros; len = getpagesize(); zeros = calloc(1, len); ASSERT_NE(nullptr, zeros); expect_lookup(RELPATH, ino, len); expect_open(ino, FOPEN_DIRECT_IO, 1); expect_read(ino, 0, len, len, zeros); expect_flush(ino, 1, ReturnErrno(0)); FuseTest::expect_write(ino, 0, len, len, FUSE_WRITE_CACHE, 0, zeros); expect_release(ino, ReturnErrno(0)); fd = open(FULLPATH, O_RDWR); ASSERT_LE(0, fd) << strerror(errno); p = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); ASSERT_NE(MAP_FAILED, p) << strerror(errno); memmove((uint8_t*)p, CONTENTS, bufsize); ASSERT_EQ(0, munmap(p, len)) << strerror(errno); close(fd); // Write mmap'd data on close free(zeros); } /* * When mounted with -o async, the writeback cache mode should delay writes */ TEST_F(WriteBackAsync, delay) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefgh"; uint64_t ino = 42; int fd; ssize_t bufsize = strlen(CONTENTS); expect_lookup(RELPATH, ino, 0); expect_open(ino, 0, 1); /* Write should be cached, but FUSE_WRITE shouldn't be sent */ EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { return (in.header.opcode == FUSE_WRITE); }, Eq(true)), _) ).Times(0); fd = open(FULLPATH, O_RDWR); ASSERT_LE(0, fd) << strerror(errno); ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno); /* Don't close the file because that would flush the cache */ leak(fd); } /* * A direct write should not evict dirty cached data from outside of its own * byte range. */ TEST_F(WriteBackAsync, direct_io_ignores_unrelated_cached) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char CONTENTS0[] = "abcdefgh"; const char CONTENTS1[] = "ijklmnop"; uint64_t ino = 42; int fd; ssize_t bufsize = strlen(CONTENTS0) + 1; ssize_t fsize = 2 * m_maxbcachebuf; char readbuf[bufsize]; void *zeros; zeros = calloc(1, m_maxbcachebuf); ASSERT_NE(nullptr, zeros); expect_lookup(RELPATH, ino, fsize); expect_open(ino, 0, 1); expect_read(ino, 0, m_maxbcachebuf, m_maxbcachebuf, zeros); FuseTest::expect_write(ino, m_maxbcachebuf, bufsize, bufsize, 0, 0, CONTENTS1); fd = open(FULLPATH, O_RDWR); ASSERT_LE(0, fd) << strerror(errno); // Cache first block with dirty data. This will entail first reading // the existing data. ASSERT_EQ(bufsize, pwrite(fd, CONTENTS0, bufsize, 0)) << strerror(errno); // Write directly to second block ASSERT_EQ(0, fcntl(fd, F_SETFL, O_DIRECT)) << strerror(errno); ASSERT_EQ(bufsize, pwrite(fd, CONTENTS1, bufsize, m_maxbcachebuf)) << strerror(errno); // Read from the first block again. Should be serviced by cache. ASSERT_EQ(0, fcntl(fd, F_SETFL, 0)) << strerror(errno); ASSERT_EQ(bufsize, pread(fd, readbuf, bufsize, 0)) << strerror(errno); ASSERT_STREQ(readbuf, CONTENTS0); leak(fd); free(zeros); } /* * If a direct io write partially overlaps one or two blocks of dirty cached * data, No dirty data should be lost. Admittedly this is a weird test, * because it would be unusual to use O_DIRECT and the writeback cache. */ TEST_F(WriteBackAsync, direct_io_partially_overlaps_cached_block) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; uint64_t ino = 42; int fd; off_t bs = m_maxbcachebuf; ssize_t fsize = 3 * bs; void *readbuf, *zeros, *ones, *zeroones, *onezeros; readbuf = malloc(bs); ASSERT_NE(nullptr, readbuf) << strerror(errno); zeros = calloc(1, 3 * bs); ASSERT_NE(nullptr, zeros); ones = calloc(1, 2 * bs); ASSERT_NE(nullptr, ones); memset(ones, 1, 2 * bs); zeroones = calloc(1, bs); ASSERT_NE(nullptr, zeroones); memset((uint8_t*)zeroones + bs / 2, 1, bs / 2); onezeros = calloc(1, bs); ASSERT_NE(nullptr, onezeros); memset(onezeros, 1, bs / 2); expect_lookup(RELPATH, ino, fsize); expect_open(ino, 0, 1); fd = open(FULLPATH, O_RDWR); ASSERT_LE(0, fd) << strerror(errno); /* Cache first and third blocks with dirty data. */ ASSERT_EQ(3 * bs, pwrite(fd, zeros, 3 * bs, 0)) << strerror(errno); /* * Write directly to all three blocks. The partially written blocks * will be flushed because they're dirty. */ FuseTest::expect_write(ino, 0, bs, bs, 0, 0, zeros); FuseTest::expect_write(ino, 2 * bs, bs, bs, 0, 0, zeros); /* The direct write is split in two because of the m_maxwrite value */ FuseTest::expect_write(ino, bs / 2, bs, bs, 0, 0, ones); FuseTest::expect_write(ino, 3 * bs / 2, bs, bs, 0, 0, ones); ASSERT_EQ(0, fcntl(fd, F_SETFL, O_DIRECT)) << strerror(errno); ASSERT_EQ(2 * bs, pwrite(fd, ones, 2 * bs, bs / 2)) << strerror(errno); /* * Read from both the valid and invalid portions of the first and third * blocks again. This will entail FUSE_READ operations because these * blocks were invalidated by the direct write. */ expect_read(ino, 0, bs, bs, zeroones); expect_read(ino, 2 * bs, bs, bs, onezeros); ASSERT_EQ(0, fcntl(fd, F_SETFL, 0)) << strerror(errno); ASSERT_EQ(bs / 2, pread(fd, readbuf, bs / 2, 0)) << strerror(errno); EXPECT_EQ(0, memcmp(zeros, readbuf, bs / 2)); ASSERT_EQ(bs / 2, pread(fd, readbuf, bs / 2, 5 * bs / 2)) << strerror(errno); EXPECT_EQ(0, memcmp(zeros, readbuf, bs / 2)); ASSERT_EQ(bs / 2, pread(fd, readbuf, bs / 2, bs / 2)) << strerror(errno); EXPECT_EQ(0, memcmp(ones, readbuf, bs / 2)); ASSERT_EQ(bs / 2, pread(fd, readbuf, bs / 2, 2 * bs)) << strerror(errno); EXPECT_EQ(0, memcmp(ones, readbuf, bs / 2)); leak(fd); free(zeroones); free(onezeros); free(ones); free(zeros); free(readbuf); } /* * In WriteBack mode, writes may be cached beyond what the server thinks is the * EOF. In this case, a short read at EOF should _not_ cause fusefs to update * the file's size. */ TEST_F(WriteBackAsync, eof) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS0 = "abcdefgh"; const char *CONTENTS1 = "ijklmnop"; uint64_t ino = 42; int fd; off_t offset = m_maxbcachebuf; ssize_t wbufsize = strlen(CONTENTS1); off_t old_filesize = (off_t)strlen(CONTENTS0); ssize_t rbufsize = 2 * old_filesize; char readbuf[rbufsize]; size_t holesize = rbufsize - old_filesize; char hole[holesize]; struct stat sb; ssize_t r; expect_lookup(RELPATH, ino, 0); expect_open(ino, 0, 1); expect_read(ino, 0, m_maxbcachebuf, old_filesize, CONTENTS0); fd = open(FULLPATH, O_RDWR); ASSERT_LE(0, fd) << strerror(errno); /* Write and cache data beyond EOF */ ASSERT_EQ(wbufsize, pwrite(fd, CONTENTS1, wbufsize, offset)) << strerror(errno); /* Read from the old EOF */ r = pread(fd, readbuf, rbufsize, 0); ASSERT_LE(0, r) << strerror(errno); EXPECT_EQ(rbufsize, r) << "read should've synthesized a hole"; EXPECT_EQ(0, memcmp(CONTENTS0, readbuf, old_filesize)); bzero(hole, holesize); EXPECT_EQ(0, memcmp(hole, readbuf + old_filesize, holesize)); /* The file's size should still be what was established by pwrite */ ASSERT_EQ(0, fstat(fd, &sb)) << strerror(errno); EXPECT_EQ(offset + wbufsize, sb.st_size); leak(fd); } /* * When a file has dirty writes that haven't been flushed, the server's notion * of its mtime and ctime will be wrong. The kernel should ignore those if it * gets them from a FUSE_GETATTR before flushing. */ TEST_F(WriteBackAsync, timestamps) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefgh"; ssize_t bufsize = strlen(CONTENTS); uint64_t ino = 42; uint64_t attr_valid = 0; uint64_t attr_valid_nsec = 0; uint64_t server_time = 12345; mode_t mode = S_IFREG | 0644; int fd; struct stat sb; EXPECT_LOOKUP(FUSE_ROOT_ID, RELPATH) .WillRepeatedly(Invoke( ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, entry); out.body.entry.attr.mode = mode; out.body.entry.nodeid = ino; out.body.entry.attr.nlink = 1; out.body.entry.attr_valid = attr_valid; out.body.entry.attr_valid_nsec = attr_valid_nsec; }))); expect_open(ino, 0, 1); EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { return (in.header.opcode == FUSE_GETATTR && in.header.nodeid == ino); }, Eq(true)), _) ).WillRepeatedly(Invoke( ReturnImmediate([=](auto i __unused, auto& out) { SET_OUT_HEADER_LEN(out, attr); out.body.attr.attr.ino = ino; out.body.attr.attr.mode = mode; out.body.attr.attr_valid = attr_valid; out.body.attr.attr_valid_nsec = attr_valid_nsec; out.body.attr.attr.atime = server_time; out.body.attr.attr.mtime = server_time; out.body.attr.attr.ctime = server_time; }))); fd = open(FULLPATH, O_RDWR); ASSERT_LE(0, fd) << strerror(errno); ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno); ASSERT_EQ(0, fstat(fd, &sb)) << strerror(errno); EXPECT_EQ((time_t)server_time, sb.st_atime); EXPECT_NE((time_t)server_time, sb.st_mtime); EXPECT_NE((time_t)server_time, sb.st_ctime); leak(fd); } /* Any dirty timestamp fields should be flushed during a SETATTR */ TEST_F(WriteBackAsync, timestamps_during_setattr) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefgh"; ssize_t bufsize = strlen(CONTENTS); uint64_t ino = 42; const mode_t newmode = 0755; int fd; expect_lookup(RELPATH, ino, 0); expect_open(ino, 0, 1); EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { uint32_t valid = FATTR_MODE | FATTR_MTIME | FATTR_CTIME; return (in.header.opcode == FUSE_SETATTR && in.header.nodeid == ino && in.body.setattr.valid == valid); }, Eq(true)), _) ).WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, attr); out.body.attr.attr.ino = ino; out.body.attr.attr.mode = S_IFREG | newmode; }))); fd = open(FULLPATH, O_RDWR); ASSERT_LE(0, fd) << strerror(errno); ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno); ASSERT_EQ(0, fchmod(fd, newmode)) << strerror(errno); leak(fd); } /* fuse_init_out.time_gran controls the granularity of timestamps */ TEST_P(TimeGran, timestamps_during_setattr) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefgh"; ssize_t bufsize = strlen(CONTENTS); uint64_t ino = 42; const mode_t newmode = 0755; int fd; expect_lookup(RELPATH, ino, 0); expect_open(ino, 0, 1); EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { uint32_t valid = FATTR_MODE | FATTR_MTIME | FATTR_CTIME; return (in.header.opcode == FUSE_SETATTR && in.header.nodeid == ino && in.body.setattr.valid == valid && in.body.setattr.mtimensec % m_time_gran == 0 && in.body.setattr.ctimensec % m_time_gran == 0); }, Eq(true)), _) ).WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, attr); out.body.attr.attr.ino = ino; out.body.attr.attr.mode = S_IFREG | newmode; }))); fd = open(FULLPATH, O_RDWR); ASSERT_LE(0, fd) << strerror(errno); ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno); ASSERT_EQ(0, fchmod(fd, newmode)) << strerror(errno); leak(fd); } INSTANTIATE_TEST_CASE_P(RA, TimeGran, Range(0u, 10u)); /* * Without direct_io, writes should be committed to cache */ TEST_F(Write, writethrough) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefgh"; uint64_t ino = 42; int fd; ssize_t bufsize = strlen(CONTENTS); uint8_t readbuf[bufsize]; expect_lookup(RELPATH, ino, 0); expect_open(ino, 0, 1); expect_write(ino, 0, bufsize, bufsize, CONTENTS); fd = open(FULLPATH, O_RDWR); ASSERT_LE(0, fd) << strerror(errno); ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno); /* * A subsequent read should be serviced by cache, without querying the * filesystem daemon */ ASSERT_EQ(0, lseek(fd, 0, SEEK_SET)) << strerror(errno); ASSERT_EQ(bufsize, read(fd, readbuf, bufsize)) << strerror(errno); leak(fd); } /* Writes that extend a file should update the cached file size */ TEST_F(Write, update_file_size) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const char *CONTENTS = "abcdefgh"; struct stat sb; uint64_t ino = 42; int fd; ssize_t bufsize = strlen(CONTENTS); expect_lookup(RELPATH, ino, 0); expect_open(ino, 0, 1); expect_write(ino, 0, bufsize, bufsize, CONTENTS); fd = open(FULLPATH, O_RDWR); ASSERT_LE(0, fd) << strerror(errno); ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno); /* Get cached attributes */ ASSERT_EQ(0, fstat(fd, &sb)) << strerror(errno); ASSERT_EQ(bufsize, sb.st_size); leak(fd); }