diff --git a/sys/fs/fuse/fuse_internal.c b/sys/fs/fuse/fuse_internal.c index a5f646e5f449..82e37ed5a466 100644 --- a/sys/fs/fuse/fuse_internal.c +++ b/sys/fs/fuse/fuse_internal.c @@ -1,1308 +1,1305 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2007-2009 Google Inc. and Amit Singh * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. * * Neither the name of Google Inc. nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Copyright (C) 2005 Csaba Henk. * All rights reserved. * * Copyright (c) 2019 The FreeBSD Foundation * * Portions of this software were developed by BFF Storage Systems, LLC under * sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "fuse.h" #include "fuse_file.h" #include "fuse_internal.h" #include "fuse_io.h" #include "fuse_ipc.h" #include "fuse_node.h" #include "fuse_file.h" SDT_PROVIDER_DECLARE(fusefs); /* * Fuse trace probe: * arg0: verbosity. Higher numbers give more verbose messages * arg1: Textual message */ SDT_PROBE_DEFINE2(fusefs, , internal, trace, "int", "char*"); #ifdef ZERO_PAD_INCOMPLETE_BUFS static int isbzero(void *buf, size_t len); #endif counter_u64_t fuse_lookup_cache_hits; counter_u64_t fuse_lookup_cache_misses; SYSCTL_COUNTER_U64(_vfs_fusefs_stats, OID_AUTO, lookup_cache_hits, CTLFLAG_RD, &fuse_lookup_cache_hits, "number of positive cache hits in lookup"); SYSCTL_COUNTER_U64(_vfs_fusefs_stats, OID_AUTO, lookup_cache_misses, CTLFLAG_RD, &fuse_lookup_cache_misses, "number of cache misses in lookup"); int fuse_internal_get_cached_vnode(struct mount* mp, ino_t ino, int flags, struct vnode **vpp) { struct bintime now; struct thread *td = curthread; uint64_t nodeid = ino; int error; *vpp = NULL; error = vfs_hash_get(mp, fuse_vnode_hash(nodeid), flags, td, vpp, fuse_vnode_cmp, &nodeid); if (error) return error; /* * Check the entry cache timeout. We have to do this within fusefs * instead of by using cache_enter_time/cache_lookup because those * routines are only intended to work with pathnames, not inodes */ if (*vpp != NULL) { getbinuptime(&now); if (bintime_cmp(&(VTOFUD(*vpp)->entry_cache_timeout), &now, >)){ counter_u64_add(fuse_lookup_cache_hits, 1); return 0; } else { /* Entry cache timeout */ counter_u64_add(fuse_lookup_cache_misses, 1); cache_purge(*vpp); vput(*vpp); *vpp = NULL; } } return 0; } SDT_PROBE_DEFINE0(fusefs, , internal, access_vadmin); /* Synchronously send a FUSE_ACCESS operation */ int fuse_internal_access(struct vnode *vp, accmode_t mode, struct thread *td, struct ucred *cred) { int err = 0; uint32_t mask = F_OK; int dataflags; int vtype; struct mount *mp; struct fuse_dispatcher fdi; struct fuse_access_in *fai; struct fuse_data *data; mp = vnode_mount(vp); vtype = vnode_vtype(vp); data = fuse_get_mpdata(mp); dataflags = data->dataflags; if (mode == 0) return 0; if (mode & VMODIFY_PERMS && vfs_isrdonly(mp)) { switch (vp->v_type) { case VDIR: /* FALLTHROUGH */ case VLNK: /* FALLTHROUGH */ case VREG: return EROFS; default: break; } } /* Unless explicitly permitted, deny everyone except the fs owner. */ if (!(dataflags & FSESS_DAEMON_CAN_SPY)) { if (fuse_match_cred(data->daemoncred, cred)) return EPERM; } if (dataflags & FSESS_DEFAULT_PERMISSIONS) { struct vattr va; fuse_internal_getattr(vp, &va, cred, td); return vaccess(vp->v_type, va.va_mode, va.va_uid, va.va_gid, mode, cred); } if (mode & VADMIN) { /* * The FUSE protocol doesn't have an equivalent of VADMIN, so * it's a bug if we ever reach this point with that bit set. */ SDT_PROBE0(fusefs, , internal, access_vadmin); } if (fsess_not_impl(mp, FUSE_ACCESS)) return 0; if ((mode & (VWRITE | VAPPEND)) != 0) mask |= W_OK; if ((mode & VREAD) != 0) mask |= R_OK; if ((mode & VEXEC) != 0) mask |= X_OK; fdisp_init(&fdi, sizeof(*fai)); fdisp_make_vp(&fdi, FUSE_ACCESS, vp, td, cred); fai = fdi.indata; fai->mask = mask; err = fdisp_wait_answ(&fdi); fdisp_destroy(&fdi); if (err == ENOSYS) { fsess_set_notimpl(mp, FUSE_ACCESS); err = 0; } return err; } /* * Cache FUSE attributes from attr, in attribute cache associated with vnode * 'vp'. Optionally, if argument 'vap' is not NULL, store a copy of the * converted attributes there as well. * * If the nominal attribute cache TTL is zero, do not cache on the 'vp' (but do * return the result to the caller). */ void fuse_internal_cache_attrs(struct vnode *vp, struct fuse_attr *attr, uint64_t attr_valid, uint32_t attr_valid_nsec, struct vattr *vap) { struct mount *mp; struct fuse_vnode_data *fvdat; struct fuse_data *data; struct vattr *vp_cache_at; mp = vnode_mount(vp); fvdat = VTOFUD(vp); data = fuse_get_mpdata(mp); ASSERT_VOP_ELOCKED(vp, "fuse_internal_cache_attrs"); fuse_validity_2_bintime(attr_valid, attr_valid_nsec, &fvdat->attr_cache_timeout); /* Fix our buffers if the filesize changed without us knowing */ if (vnode_isreg(vp) && attr->size != fvdat->cached_attrs.va_size) { (void)fuse_vnode_setsize(vp, attr->size); fvdat->cached_attrs.va_size = attr->size; } if (attr_valid > 0 || attr_valid_nsec > 0) vp_cache_at = &(fvdat->cached_attrs); else if (vap != NULL) vp_cache_at = vap; else return; vattr_null(vp_cache_at); vp_cache_at->va_fsid = mp->mnt_stat.f_fsid.val[0]; vp_cache_at->va_fileid = attr->ino; vp_cache_at->va_mode = attr->mode & ~S_IFMT; vp_cache_at->va_nlink = attr->nlink; vp_cache_at->va_uid = attr->uid; vp_cache_at->va_gid = attr->gid; vp_cache_at->va_rdev = attr->rdev; vp_cache_at->va_size = attr->size; /* XXX on i386, seconds are truncated to 32 bits */ vp_cache_at->va_atime.tv_sec = attr->atime; vp_cache_at->va_atime.tv_nsec = attr->atimensec; vp_cache_at->va_mtime.tv_sec = attr->mtime; vp_cache_at->va_mtime.tv_nsec = attr->mtimensec; vp_cache_at->va_ctime.tv_sec = attr->ctime; vp_cache_at->va_ctime.tv_nsec = attr->ctimensec; if (fuse_libabi_geq(data, 7, 9) && attr->blksize > 0) vp_cache_at->va_blocksize = attr->blksize; else vp_cache_at->va_blocksize = PAGE_SIZE; vp_cache_at->va_type = IFTOVT(attr->mode); vp_cache_at->va_bytes = attr->blocks * S_BLKSIZE; vp_cache_at->va_flags = 0; if (vap != vp_cache_at && vap != NULL) memcpy(vap, vp_cache_at, sizeof(*vap)); } /* fsync */ int fuse_internal_fsync_callback(struct fuse_ticket *tick, struct uio *uio) { if (tick->tk_aw_ohead.error == ENOSYS) { fsess_set_notimpl(tick->tk_data->mp, fticket_opcode(tick)); } return 0; } int fuse_internal_fsync(struct vnode *vp, struct thread *td, int waitfor, bool datasync) { struct fuse_fsync_in *ffsi = NULL; struct fuse_dispatcher fdi; struct fuse_filehandle *fufh; struct fuse_vnode_data *fvdat = VTOFUD(vp); struct mount *mp = vnode_mount(vp); int op = FUSE_FSYNC; int err = 0; if (fsess_not_impl(vnode_mount(vp), (vnode_vtype(vp) == VDIR ? FUSE_FSYNCDIR : FUSE_FSYNC))) { return 0; } if (vnode_isdir(vp)) op = FUSE_FSYNCDIR; if (fsess_not_impl(mp, op)) return 0; fdisp_init(&fdi, sizeof(*ffsi)); /* * fsync every open file handle for this file, because we can't be sure * which file handle the caller is really referring to. */ LIST_FOREACH(fufh, &fvdat->handles, next) { fdi.iosize = sizeof(*ffsi); if (ffsi == NULL) fdisp_make_vp(&fdi, op, vp, td, NULL); else fdisp_refresh_vp(&fdi, op, vp, td, NULL); ffsi = fdi.indata; ffsi->fh = fufh->fh_id; ffsi->fsync_flags = 0; if (datasync) ffsi->fsync_flags = 1; if (waitfor == MNT_WAIT) { err = fdisp_wait_answ(&fdi); } else { fuse_insert_callback(fdi.tick, fuse_internal_fsync_callback); fuse_insert_message(fdi.tick, false); } if (err == ENOSYS) { /* ENOSYS means "success, and don't call again" */ fsess_set_notimpl(mp, op); err = 0; break; } } fdisp_destroy(&fdi); return err; } /* Asynchronous invalidation */ SDT_PROBE_DEFINE3(fusefs, , internal, invalidate_entry, "struct vnode*", "struct fuse_notify_inval_entry_out*", "char*"); int fuse_internal_invalidate_entry(struct mount *mp, struct uio *uio) { struct fuse_notify_inval_entry_out fnieo; struct componentname cn; struct vnode *dvp, *vp; char name[PATH_MAX]; int err; if ((err = uiomove(&fnieo, sizeof(fnieo), uio)) != 0) return (err); if (fnieo.namelen >= sizeof(name)) return (EINVAL); if ((err = uiomove(name, fnieo.namelen, uio)) != 0) return (err); name[fnieo.namelen] = '\0'; /* fusefs does not cache "." or ".." entries */ if (strncmp(name, ".", sizeof(".")) == 0 || strncmp(name, "..", sizeof("..")) == 0) return (0); if (fnieo.parent == FUSE_ROOT_ID) err = VFS_ROOT(mp, LK_SHARED, &dvp); else err = fuse_internal_get_cached_vnode( mp, fnieo.parent, LK_SHARED, &dvp); SDT_PROBE3(fusefs, , internal, invalidate_entry, dvp, &fnieo, name); /* * If dvp is not in the cache, then it must've been reclaimed. And * since fuse_vnop_reclaim does a cache_purge, name's entry must've * been invalidated already. So we can safely return if dvp == NULL */ if (err != 0 || dvp == NULL) return (err); /* * XXX we can't check dvp's generation because the FUSE invalidate * entry message doesn't include it. Worse case is that we invalidate * an entry that didn't need to be invalidated. */ cn.cn_nameiop = LOOKUP; cn.cn_flags = 0; /* !MAKEENTRY means free cached entry */ cn.cn_thread = curthread; cn.cn_cred = curthread->td_ucred; cn.cn_lkflags = LK_SHARED; cn.cn_pnbuf = NULL; cn.cn_nameptr = name; cn.cn_namelen = fnieo.namelen; err = cache_lookup(dvp, &vp, &cn, NULL, NULL); MPASS(err == 0); fuse_vnode_clear_attr_cache(dvp); vput(dvp); return (0); } SDT_PROBE_DEFINE2(fusefs, , internal, invalidate_inode, "struct vnode*", "struct fuse_notify_inval_inode_out *"); int fuse_internal_invalidate_inode(struct mount *mp, struct uio *uio) { struct fuse_notify_inval_inode_out fniio; struct vnode *vp; int err; if ((err = uiomove(&fniio, sizeof(fniio), uio)) != 0) return (err); if (fniio.ino == FUSE_ROOT_ID) err = VFS_ROOT(mp, LK_EXCLUSIVE, &vp); else err = fuse_internal_get_cached_vnode(mp, fniio.ino, LK_SHARED, &vp); SDT_PROBE2(fusefs, , internal, invalidate_inode, vp, &fniio); if (err != 0 || vp == NULL) return (err); /* * XXX we can't check vp's generation because the FUSE invalidate * entry message doesn't include it. Worse case is that we invalidate * an inode that didn't need to be invalidated. */ /* * Flush and invalidate buffers if off >= 0. Technically we only need * to flush and invalidate the range of offsets [off, off + len), but * for simplicity's sake we do everything. */ if (fniio.off >= 0) fuse_io_invalbuf(vp, curthread); fuse_vnode_clear_attr_cache(vp); vput(vp); return (0); } /* mknod */ int fuse_internal_mknod(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct vattr *vap) { struct fuse_data *data; struct fuse_mknod_in fmni; size_t insize; data = fuse_get_mpdata(dvp->v_mount); fmni.mode = MAKEIMODE(vap->va_type, vap->va_mode); fmni.rdev = vap->va_rdev; if (fuse_libabi_geq(data, 7, 12)) { insize = sizeof(fmni); fmni.umask = curthread->td_proc->p_pd->pd_cmask; } else { insize = FUSE_COMPAT_MKNOD_IN_SIZE; } return (fuse_internal_newentry(dvp, vpp, cnp, FUSE_MKNOD, &fmni, insize, vap->va_type)); } /* readdir */ int fuse_internal_readdir(struct vnode *vp, struct uio *uio, off_t startoff, struct fuse_filehandle *fufh, struct fuse_iov *cookediov, int *ncookies, u_long *cookies) { int err = 0; struct fuse_dispatcher fdi; struct fuse_read_in *fri = NULL; int fnd_start; if (uio_resid(uio) == 0) return 0; fdisp_init(&fdi, 0); /* * Note that we DO NOT have a UIO_SYSSPACE here (so no need for p2p * I/O). */ /* * fnd_start is set non-zero once the offset in the directory gets * to the startoff. This is done because directories must be read * from the beginning (offset == 0) when fuse_vnop_readdir() needs * to do an open of the directory. * If it is not set non-zero here, it will be set non-zero in * fuse_internal_readdir_processdata() when uio_offset == startoff. */ fnd_start = 0; if (uio->uio_offset == startoff) fnd_start = 1; while (uio_resid(uio) > 0) { fdi.iosize = sizeof(*fri); if (fri == NULL) fdisp_make_vp(&fdi, FUSE_READDIR, vp, NULL, NULL); else fdisp_refresh_vp(&fdi, FUSE_READDIR, vp, NULL, NULL); fri = fdi.indata; fri->fh = fufh->fh_id; fri->offset = uio_offset(uio); fri->size = MIN(uio->uio_resid, fuse_get_mpdata(vp->v_mount)->max_read); if ((err = fdisp_wait_answ(&fdi))) break; if ((err = fuse_internal_readdir_processdata(uio, startoff, &fnd_start, fri->size, fdi.answ, fdi.iosize, cookediov, ncookies, &cookies))) break; } fdisp_destroy(&fdi); return ((err == -1) ? 0 : err); } /* * Return -1 to indicate that this readdir is finished, 0 if it copied * all the directory data read in and it may be possible to read more * and greater than 0 for a failure. */ int fuse_internal_readdir_processdata(struct uio *uio, off_t startoff, int *fnd_start, size_t reqsize, void *buf, size_t bufsize, struct fuse_iov *cookediov, int *ncookies, u_long **cookiesp) { int err = 0; int oreclen; size_t freclen; struct dirent *de; struct fuse_dirent *fudge; u_long *cookies; cookies = *cookiesp; if (bufsize < FUSE_NAME_OFFSET) return -1; for (;;) { if (bufsize < FUSE_NAME_OFFSET) { err = -1; break; } fudge = (struct fuse_dirent *)buf; freclen = FUSE_DIRENT_SIZE(fudge); if (bufsize < freclen) { /* * This indicates a partial directory entry at the * end of the directory data. */ err = -1; break; } #ifdef ZERO_PAD_INCOMPLETE_BUFS if (isbzero(buf, FUSE_NAME_OFFSET)) { err = -1; break; } #endif if (!fudge->namelen || fudge->namelen > MAXNAMLEN) { err = EINVAL; break; } oreclen = GENERIC_DIRSIZ((struct pseudo_dirent *) &fudge->namelen); if (oreclen > uio_resid(uio)) { /* Out of space for the dir so we are done. */ err = -1; break; } /* * Don't start to copy the directory entries out until * the requested offset in the directory is found. */ if (*fnd_start != 0) { fiov_adjust(cookediov, oreclen); bzero(cookediov->base, oreclen); de = (struct dirent *)cookediov->base; de->d_fileno = fudge->ino; de->d_off = fudge->off; de->d_reclen = oreclen; de->d_type = fudge->type; de->d_namlen = fudge->namelen; memcpy((char *)cookediov->base + sizeof(struct dirent) - MAXNAMLEN - 1, (char *)buf + FUSE_NAME_OFFSET, fudge->namelen); dirent_terminate(de); err = uiomove(cookediov->base, cookediov->len, uio); if (err) break; if (cookies != NULL) { if (*ncookies == 0) { err = -1; break; } *cookies = fudge->off; cookies++; (*ncookies)--; } } else if (startoff == fudge->off) *fnd_start = 1; buf = (char *)buf + freclen; bufsize -= freclen; uio_setoffset(uio, fudge->off); } *cookiesp = cookies; return err; } /* remove */ int fuse_internal_remove(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, enum fuse_opcode op) { struct fuse_dispatcher fdi; nlink_t nlink; int err = 0; fdisp_init(&fdi, cnp->cn_namelen + 1); fdisp_make_vp(&fdi, op, dvp, cnp->cn_thread, cnp->cn_cred); memcpy(fdi.indata, cnp->cn_nameptr, cnp->cn_namelen); ((char *)fdi.indata)[cnp->cn_namelen] = '\0'; err = fdisp_wait_answ(&fdi); fdisp_destroy(&fdi); if (err) return (err); /* * Access the cached nlink even if the attr cached has expired. If * it's inaccurate, the worst that will happen is: * 1) We'll recycle the vnode even though the file has another link we * don't know about, costing a bit of cpu time, or * 2) We won't recycle the vnode even though all of its links are gone. * It will linger around until vnlru reclaims it, costing a bit of * temporary memory. */ nlink = VTOFUD(vp)->cached_attrs.va_nlink--; /* * Purge the parent's attribute cache because the daemon * should've updated its mtime and ctime. */ fuse_vnode_clear_attr_cache(dvp); /* NB: nlink could be zero if it was never cached */ if (nlink <= 1 || vnode_vtype(vp) == VDIR) { fuse_internal_vnode_disappear(vp); } else { cache_purge(vp); fuse_vnode_update(vp, FN_CTIMECHANGE); } return err; } /* rename */ int fuse_internal_rename(struct vnode *fdvp, struct componentname *fcnp, struct vnode *tdvp, struct componentname *tcnp) { struct fuse_dispatcher fdi; struct fuse_rename_in *fri; int err = 0; fdisp_init(&fdi, sizeof(*fri) + fcnp->cn_namelen + tcnp->cn_namelen + 2); fdisp_make_vp(&fdi, FUSE_RENAME, fdvp, tcnp->cn_thread, tcnp->cn_cred); fri = fdi.indata; fri->newdir = VTOI(tdvp); memcpy((char *)fdi.indata + sizeof(*fri), fcnp->cn_nameptr, fcnp->cn_namelen); ((char *)fdi.indata)[sizeof(*fri) + fcnp->cn_namelen] = '\0'; memcpy((char *)fdi.indata + sizeof(*fri) + fcnp->cn_namelen + 1, tcnp->cn_nameptr, tcnp->cn_namelen); ((char *)fdi.indata)[sizeof(*fri) + fcnp->cn_namelen + tcnp->cn_namelen + 1] = '\0'; err = fdisp_wait_answ(&fdi); fdisp_destroy(&fdi); return err; } /* strategy */ /* entity creation */ void fuse_internal_newentry_makerequest(struct mount *mp, uint64_t dnid, struct componentname *cnp, enum fuse_opcode op, void *buf, size_t bufsize, struct fuse_dispatcher *fdip) { fdip->iosize = bufsize + cnp->cn_namelen + 1; fdisp_make(fdip, op, mp, dnid, cnp->cn_thread, cnp->cn_cred); memcpy(fdip->indata, buf, bufsize); memcpy((char *)fdip->indata + bufsize, cnp->cn_nameptr, cnp->cn_namelen); ((char *)fdip->indata)[bufsize + cnp->cn_namelen] = '\0'; } int fuse_internal_newentry_core(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, enum vtype vtyp, struct fuse_dispatcher *fdip) { int err = 0; struct fuse_entry_out *feo; struct mount *mp = vnode_mount(dvp); if ((err = fdisp_wait_answ(fdip))) { return err; } feo = fdip->answ; if ((err = fuse_internal_checkentry(feo, vtyp))) { return err; } err = fuse_vnode_get(mp, feo, feo->nodeid, dvp, vpp, cnp, vtyp); if (err) { fuse_internal_forget_send(mp, cnp->cn_thread, cnp->cn_cred, feo->nodeid, 1); return err; } /* * Purge the parent's attribute cache because the daemon should've * updated its mtime and ctime */ fuse_vnode_clear_attr_cache(dvp); fuse_internal_cache_attrs(*vpp, &feo->attr, feo->attr_valid, feo->attr_valid_nsec, NULL); return err; } int fuse_internal_newentry(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, enum fuse_opcode op, void *buf, size_t bufsize, enum vtype vtype) { int err; struct fuse_dispatcher fdi; struct mount *mp = vnode_mount(dvp); fdisp_init(&fdi, 0); fuse_internal_newentry_makerequest(mp, VTOI(dvp), cnp, op, buf, bufsize, &fdi); err = fuse_internal_newentry_core(dvp, vpp, cnp, vtype, &fdi); fdisp_destroy(&fdi); return err; } /* entity destruction */ int fuse_internal_forget_callback(struct fuse_ticket *ftick, struct uio *uio) { fuse_internal_forget_send(ftick->tk_data->mp, curthread, NULL, ((struct fuse_in_header *)ftick->tk_ms_fiov.base)->nodeid, 1); return 0; } void fuse_internal_forget_send(struct mount *mp, struct thread *td, struct ucred *cred, uint64_t nodeid, uint64_t nlookup) { struct fuse_dispatcher fdi; struct fuse_forget_in *ffi; /* * KASSERT(nlookup > 0, ("zero-times forget for vp #%llu", * (long long unsigned) nodeid)); */ fdisp_init(&fdi, sizeof(*ffi)); fdisp_make(&fdi, FUSE_FORGET, mp, nodeid, td, cred); ffi = fdi.indata; ffi->nlookup = nlookup; fuse_insert_message(fdi.tick, false); fdisp_destroy(&fdi); } -SDT_PROBE_DEFINE2(fusefs, , internal, getattr_cache_incoherent, - "struct vnode*", "struct fuse_attr_out*"); - /* Fetch the vnode's attributes from the daemon*/ int fuse_internal_do_getattr(struct vnode *vp, struct vattr *vap, struct ucred *cred, struct thread *td) { struct fuse_dispatcher fdi; struct fuse_vnode_data *fvdat = VTOFUD(vp); struct fuse_getattr_in *fgai; struct fuse_attr_out *fao; off_t old_filesize = fvdat->cached_attrs.va_size; struct timespec old_ctime = fvdat->cached_attrs.va_ctime; struct timespec old_mtime = fvdat->cached_attrs.va_mtime; enum vtype vtyp; int err; fdisp_init(&fdi, sizeof(*fgai)); fdisp_make_vp(&fdi, FUSE_GETATTR, vp, td, cred); fgai = fdi.indata; /* * We could look up a file handle and set it in fgai->fh, but that * involves extra runtime work and I'm unaware of any file systems that * care. */ fgai->getattr_flags = 0; if ((err = fdisp_wait_answ(&fdi))) { if (err == ENOENT) fuse_internal_vnode_disappear(vp); goto out; } fao = (struct fuse_attr_out *)fdi.answ; vtyp = IFTOVT(fao->attr.mode); if (fvdat->flag & FN_SIZECHANGE) fao->attr.size = old_filesize; if (fvdat->flag & FN_CTIMECHANGE) { fao->attr.ctime = old_ctime.tv_sec; fao->attr.ctimensec = old_ctime.tv_nsec; } if (fvdat->flag & FN_MTIMECHANGE) { fao->attr.mtime = old_mtime.tv_sec; fao->attr.mtimensec = old_mtime.tv_nsec; } if (vnode_isreg(vp) && fvdat->cached_attrs.va_size != VNOVAL && fao->attr.size != fvdat->cached_attrs.va_size) { /* * The server changed the file's size even though we had it * cached! That's a server bug. */ - SDT_PROBE2(fusefs, , internal, getattr_cache_incoherent, vp, - fao); - printf("%s: cache incoherent on %s! " - "Buggy FUSE server detected. To prevent data corruption, " - "disable the data cache by mounting with -o direct_io, or " - "as directed otherwise by your FUSE server's " - "documentation\n", __func__, - vnode_mount(vp)->mnt_stat.f_mntonname); + struct mount *mp = vnode_mount(vp); + struct fuse_data *data = fuse_get_mpdata(mp); + + fuse_warn(data, FSESS_WARN_CACHE_INCOHERENT, + "cache incoherent! " + "To prevent data corruption, disable the data cache " + "by mounting with -o direct_io, or as directed " + "otherwise by your FUSE server's documentation."); int iosize = fuse_iosize(vp); v_inval_buf_range(vp, 0, INT64_MAX, iosize); } fuse_internal_cache_attrs(vp, &fao->attr, fao->attr_valid, fao->attr_valid_nsec, vap); if (vtyp != vnode_vtype(vp)) { fuse_internal_vnode_disappear(vp); err = ENOENT; } out: fdisp_destroy(&fdi); return err; } /* Read a vnode's attributes from cache or fetch them from the fuse daemon */ int fuse_internal_getattr(struct vnode *vp, struct vattr *vap, struct ucred *cred, struct thread *td) { struct vattr *attrs; if ((attrs = VTOVA(vp)) != NULL) { *vap = *attrs; /* struct copy */ return 0; } return fuse_internal_do_getattr(vp, vap, cred, td); } void fuse_internal_vnode_disappear(struct vnode *vp) { struct fuse_vnode_data *fvdat = VTOFUD(vp); ASSERT_VOP_ELOCKED(vp, "fuse_internal_vnode_disappear"); fvdat->flag |= FN_REVOKED; cache_purge(vp); } /* fuse start/stop */ SDT_PROBE_DEFINE2(fusefs, , internal, init_done, "struct fuse_data*", "struct fuse_init_out*"); int fuse_internal_init_callback(struct fuse_ticket *tick, struct uio *uio) { int err = 0; struct fuse_data *data = tick->tk_data; struct fuse_init_out *fiio; if ((err = tick->tk_aw_ohead.error)) { goto out; } if ((err = fticket_pull(tick, uio))) { goto out; } fiio = fticket_resp(tick)->base; data->fuse_libabi_major = fiio->major; data->fuse_libabi_minor = fiio->minor; if (!fuse_libabi_geq(data, 7, 4)) { /* * With a little work we could support servers as old as 7.1. * But there would be little payoff. */ SDT_PROBE2(fusefs, , internal, trace, 1, "userpace version too low"); err = EPROTONOSUPPORT; goto out; } if (fuse_libabi_geq(data, 7, 5)) { if (fticket_resp(tick)->len == sizeof(struct fuse_init_out) || fticket_resp(tick)->len == FUSE_COMPAT_22_INIT_OUT_SIZE) { data->max_write = fiio->max_write; if (fiio->flags & FUSE_ASYNC_READ) data->dataflags |= FSESS_ASYNC_READ; if (fiio->flags & FUSE_POSIX_LOCKS) data->dataflags |= FSESS_POSIX_LOCKS; if (fiio->flags & FUSE_EXPORT_SUPPORT) data->dataflags |= FSESS_EXPORT_SUPPORT; /* * Don't bother to check FUSE_BIG_WRITES, because it's * redundant with max_write */ /* * max_background and congestion_threshold are not * implemented */ } else { err = EINVAL; } } else { /* Old fixed values */ data->max_write = 4096; } if (fuse_libabi_geq(data, 7, 6)) data->max_readahead_blocks = fiio->max_readahead / maxbcachebuf; if (!fuse_libabi_geq(data, 7, 7)) fsess_set_notimpl(data->mp, FUSE_INTERRUPT); if (!fuse_libabi_geq(data, 7, 8)) { fsess_set_notimpl(data->mp, FUSE_BMAP); fsess_set_notimpl(data->mp, FUSE_DESTROY); } if (fuse_libabi_geq(data, 7, 23) && fiio->time_gran >= 1 && fiio->time_gran <= 1000000000) data->time_gran = fiio->time_gran; else data->time_gran = 1; if (!fuse_libabi_geq(data, 7, 23)) data->cache_mode = fuse_data_cache_mode; else if (fiio->flags & FUSE_WRITEBACK_CACHE) data->cache_mode = FUSE_CACHE_WB; else data->cache_mode = FUSE_CACHE_WT; if (!fuse_libabi_geq(data, 7, 24)) fsess_set_notimpl(data->mp, FUSE_LSEEK); if (!fuse_libabi_geq(data, 7, 28)) fsess_set_notimpl(data->mp, FUSE_COPY_FILE_RANGE); out: if (err) { fdata_set_dead(data); } FUSE_LOCK(); data->dataflags |= FSESS_INITED; SDT_PROBE2(fusefs, , internal, init_done, data, fiio); wakeup(&data->ticketer); FUSE_UNLOCK(); return 0; } void fuse_internal_send_init(struct fuse_data *data, struct thread *td) { struct fuse_init_in *fiii; struct fuse_dispatcher fdi; fdisp_init(&fdi, sizeof(*fiii)); fdisp_make(&fdi, FUSE_INIT, data->mp, 0, td, NULL); fiii = fdi.indata; fiii->major = FUSE_KERNEL_VERSION; fiii->minor = FUSE_KERNEL_MINOR_VERSION; /* * fusefs currently reads ahead no more than one cache block at a time. * See fuse_read_biobackend */ fiii->max_readahead = maxbcachebuf; /* * Unsupported features: * FUSE_FILE_OPS: No known FUSE server or client supports it * FUSE_ATOMIC_O_TRUNC: our VFS cannot support it * FUSE_DONT_MASK: unlike Linux, FreeBSD always applies the umask, even * when default ACLs are in use. * FUSE_SPLICE_WRITE, FUSE_SPLICE_MOVE, FUSE_SPLICE_READ: FreeBSD * doesn't have splice(2). * FUSE_FLOCK_LOCKS: not yet implemented * FUSE_HAS_IOCTL_DIR: not yet implemented * FUSE_AUTO_INVAL_DATA: not yet implemented * FUSE_DO_READDIRPLUS: not yet implemented * FUSE_READDIRPLUS_AUTO: not yet implemented * FUSE_ASYNC_DIO: not yet implemented * FUSE_NO_OPEN_SUPPORT: not yet implemented * FUSE_PARALLEL_DIROPS: not yet implemented * FUSE_HANDLE_KILLPRIV: not yet implemented * FUSE_POSIX_ACL: not yet implemented * FUSE_ABORT_ERROR: not yet implemented * FUSE_CACHE_SYMLINKS: not yet implemented * FUSE_MAX_PAGES: not yet implemented */ fiii->flags = FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_WRITEBACK_CACHE; fuse_insert_callback(fdi.tick, fuse_internal_init_callback); fuse_insert_message(fdi.tick, false); fdisp_destroy(&fdi); } /* * Send a FUSE_SETATTR operation with no permissions checks. If cred is NULL, * send the request with root credentials */ int fuse_internal_setattr(struct vnode *vp, struct vattr *vap, struct thread *td, struct ucred *cred) { struct fuse_vnode_data *fvdat; struct fuse_dispatcher fdi; struct fuse_setattr_in *fsai; struct mount *mp; pid_t pid = td->td_proc->p_pid; struct fuse_data *data; int dataflags; int err = 0; enum vtype vtyp; int sizechanged = -1; uint64_t newsize = 0; mp = vnode_mount(vp); fvdat = VTOFUD(vp); data = fuse_get_mpdata(mp); dataflags = data->dataflags; fdisp_init(&fdi, sizeof(*fsai)); fdisp_make_vp(&fdi, FUSE_SETATTR, vp, td, cred); if (!cred) { fdi.finh->uid = 0; fdi.finh->gid = 0; } fsai = fdi.indata; fsai->valid = 0; if (vap->va_uid != (uid_t)VNOVAL) { fsai->uid = vap->va_uid; fsai->valid |= FATTR_UID; } if (vap->va_gid != (gid_t)VNOVAL) { fsai->gid = vap->va_gid; fsai->valid |= FATTR_GID; } if (vap->va_size != VNOVAL) { struct fuse_filehandle *fufh = NULL; /*Truncate to a new value. */ fsai->size = vap->va_size; sizechanged = 1; newsize = vap->va_size; fsai->valid |= FATTR_SIZE; fuse_filehandle_getrw(vp, FWRITE, &fufh, cred, pid); if (fufh) { fsai->fh = fufh->fh_id; fsai->valid |= FATTR_FH; } VTOFUD(vp)->flag &= ~FN_SIZECHANGE; } if (vap->va_atime.tv_sec != VNOVAL) { fsai->atime = vap->va_atime.tv_sec; fsai->atimensec = vap->va_atime.tv_nsec; fsai->valid |= FATTR_ATIME; if (vap->va_vaflags & VA_UTIMES_NULL) fsai->valid |= FATTR_ATIME_NOW; } if (vap->va_mtime.tv_sec != VNOVAL) { fsai->mtime = vap->va_mtime.tv_sec; fsai->mtimensec = vap->va_mtime.tv_nsec; fsai->valid |= FATTR_MTIME; if (vap->va_vaflags & VA_UTIMES_NULL) fsai->valid |= FATTR_MTIME_NOW; } else if (fvdat->flag & FN_MTIMECHANGE) { fsai->mtime = fvdat->cached_attrs.va_mtime.tv_sec; fsai->mtimensec = fvdat->cached_attrs.va_mtime.tv_nsec; fsai->valid |= FATTR_MTIME; } if (fuse_libabi_geq(data, 7, 23) && fvdat->flag & FN_CTIMECHANGE) { fsai->ctime = fvdat->cached_attrs.va_ctime.tv_sec; fsai->ctimensec = fvdat->cached_attrs.va_ctime.tv_nsec; fsai->valid |= FATTR_CTIME; } if (vap->va_mode != (mode_t)VNOVAL) { fsai->mode = vap->va_mode & ALLPERMS; fsai->valid |= FATTR_MODE; } if (!fsai->valid) { goto out; } if ((err = fdisp_wait_answ(&fdi))) goto out; vtyp = IFTOVT(((struct fuse_attr_out *)fdi.answ)->attr.mode); if (vnode_vtype(vp) != vtyp) { if (vnode_vtype(vp) == VNON && vtyp != VNON) { SDT_PROBE2(fusefs, , internal, trace, 1, "FUSE: Dang! " "vnode_vtype is VNON and vtype isn't."); } else { /* * STALE vnode, ditch * * The vnode has changed its type "behind our back". * There's nothing really we can do, so let us just * force an internal revocation and tell the caller to * try again, if interested. */ fuse_internal_vnode_disappear(vp); err = EAGAIN; } } if (err == 0) { struct fuse_attr_out *fao = (struct fuse_attr_out*)fdi.answ; fuse_vnode_undirty_cached_timestamps(vp); fuse_internal_cache_attrs(vp, &fao->attr, fao->attr_valid, fao->attr_valid_nsec, NULL); } out: fdisp_destroy(&fdi); return err; } /* * FreeBSD clears the SUID and SGID bits on any write by a non-root user. */ void fuse_internal_clear_suid_on_write(struct vnode *vp, struct ucred *cred, struct thread *td) { struct fuse_data *data; struct mount *mp; struct vattr va; int dataflags; mp = vnode_mount(vp); data = fuse_get_mpdata(mp); dataflags = data->dataflags; ASSERT_VOP_LOCKED(vp, __func__); if (dataflags & FSESS_DEFAULT_PERMISSIONS) { if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID)) { fuse_internal_getattr(vp, &va, cred, td); if (va.va_mode & (S_ISUID | S_ISGID)) { mode_t mode = va.va_mode & ~(S_ISUID | S_ISGID); /* Clear all vattr fields except mode */ vattr_null(&va); va.va_mode = mode; /* * Ignore fuse_internal_setattr's return value, * because at this point the write operation has * already succeeded and we don't want to return * failing status for that. */ (void)fuse_internal_setattr(vp, &va, td, NULL); } } } } #ifdef ZERO_PAD_INCOMPLETE_BUFS static int isbzero(void *buf, size_t len) { int i; for (i = 0; i < len; i++) { if (((char *)buf)[i]) return (0); } return (1); } #endif void fuse_internal_init(void) { fuse_lookup_cache_misses = counter_u64_alloc(M_WAITOK); fuse_lookup_cache_hits = counter_u64_alloc(M_WAITOK); } void fuse_internal_destroy(void) { counter_u64_free(fuse_lookup_cache_hits); counter_u64_free(fuse_lookup_cache_misses); } diff --git a/sys/fs/fuse/fuse_io.c b/sys/fs/fuse/fuse_io.c index bcf6e5f601bf..47c7fb97e556 100644 --- a/sys/fs/fuse/fuse_io.c +++ b/sys/fs/fuse/fuse_io.c @@ -1,1127 +1,1127 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2007-2009 Google Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. * * Neither the name of Google Inc. nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Copyright (C) 2005 Csaba Henk. * All rights reserved. * * Copyright (c) 2019 The FreeBSD Foundation * * Portions of this software were developed by BFF Storage Systems, LLC under * sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "fuse.h" #include "fuse_file.h" #include "fuse_node.h" #include "fuse_internal.h" #include "fuse_ipc.h" #include "fuse_io.h" /* * Set in a struct buf to indicate that the write came from the buffer cache * and the originating cred and pid are no longer known. */ #define B_FUSEFS_WRITE_CACHE B_FS_FLAG1 SDT_PROVIDER_DECLARE(fusefs); /* * Fuse trace probe: * arg0: verbosity. Higher numbers give more verbose messages * arg1: Textual message */ SDT_PROBE_DEFINE2(fusefs, , io, trace, "int", "char*"); static int fuse_inval_buf_range(struct vnode *vp, off_t filesize, off_t start, off_t end); static int fuse_read_directbackend(struct vnode *vp, struct uio *uio, struct ucred *cred, struct fuse_filehandle *fufh); static int fuse_read_biobackend(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred, struct fuse_filehandle *fufh, pid_t pid); static int fuse_write_directbackend(struct vnode *vp, struct uio *uio, struct ucred *cred, struct fuse_filehandle *fufh, off_t filesize, int ioflag, bool pages); static int fuse_write_biobackend(struct vnode *vp, struct uio *uio, struct ucred *cred, struct fuse_filehandle *fufh, int ioflag, pid_t pid); /* Invalidate a range of cached data, whether dirty of not */ static int fuse_inval_buf_range(struct vnode *vp, off_t filesize, off_t start, off_t end) { struct buf *bp; daddr_t left_lbn, end_lbn, right_lbn; off_t new_filesize; int iosize, left_on, right_on, right_blksize; iosize = fuse_iosize(vp); left_lbn = start / iosize; end_lbn = howmany(end, iosize); left_on = start & (iosize - 1); if (left_on != 0) { bp = getblk(vp, left_lbn, iosize, PCATCH, 0, 0); if ((bp->b_flags & B_CACHE) != 0 && bp->b_dirtyend >= left_on) { /* * Flush the dirty buffer, because we don't have a * byte-granular way to record which parts of the * buffer are valid. */ bwrite(bp); if (bp->b_error) return (bp->b_error); } else { brelse(bp); } } right_on = end & (iosize - 1); if (right_on != 0) { right_lbn = end / iosize; new_filesize = MAX(filesize, end); right_blksize = MIN(iosize, new_filesize - iosize * right_lbn); bp = getblk(vp, right_lbn, right_blksize, PCATCH, 0, 0); if ((bp->b_flags & B_CACHE) != 0 && bp->b_dirtyoff < right_on) { /* * Flush the dirty buffer, because we don't have a * byte-granular way to record which parts of the * buffer are valid. */ bwrite(bp); if (bp->b_error) return (bp->b_error); } else { brelse(bp); } } v_inval_buf_range(vp, left_lbn, end_lbn, iosize); return (0); } SDT_PROBE_DEFINE5(fusefs, , io, io_dispatch, "struct vnode*", "struct uio*", "int", "struct ucred*", "struct fuse_filehandle*"); SDT_PROBE_DEFINE4(fusefs, , io, io_dispatch_filehandles_closed, "struct vnode*", "struct uio*", "int", "struct ucred*"); int fuse_io_dispatch(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred, pid_t pid) { struct fuse_filehandle *fufh; int err, directio; int fflag; bool closefufh = false; MPASS(vp->v_type == VREG || vp->v_type == VDIR); fflag = (uio->uio_rw == UIO_READ) ? FREAD : FWRITE; err = fuse_filehandle_getrw(vp, fflag, &fufh, cred, pid); if (err == EBADF && vnode_mount(vp)->mnt_flag & MNT_EXPORTED) { /* * nfsd will do I/O without first doing VOP_OPEN. We * must implicitly open the file here */ err = fuse_filehandle_open(vp, fflag, &fufh, curthread, cred); closefufh = true; } else if (err) { SDT_PROBE4(fusefs, , io, io_dispatch_filehandles_closed, vp, uio, ioflag, cred); printf("FUSE: io dispatch: filehandles are closed\n"); return err; } if (err) goto out; SDT_PROBE5(fusefs, , io, io_dispatch, vp, uio, ioflag, cred, fufh); /* * Ideally, when the daemon asks for direct io at open time, the * standard file flag should be set according to this, so that would * just change the default mode, which later on could be changed via * fcntl(2). * But this doesn't work, the O_DIRECT flag gets cleared at some point * (don't know where). So to make any use of the Fuse direct_io option, * we hardwire it into the file's private data (similarly to Linux, * btw.). */ directio = (ioflag & IO_DIRECT) || !fsess_opt_datacache(vnode_mount(vp)); switch (uio->uio_rw) { case UIO_READ: if (directio) { SDT_PROBE2(fusefs, , io, trace, 1, "direct read of vnode"); err = fuse_read_directbackend(vp, uio, cred, fufh); } else { SDT_PROBE2(fusefs, , io, trace, 1, "buffered read of vnode"); err = fuse_read_biobackend(vp, uio, ioflag, cred, fufh, pid); } break; case UIO_WRITE: fuse_vnode_update(vp, FN_MTIMECHANGE | FN_CTIMECHANGE); if (directio) { off_t start, end, filesize; bool pages = (ioflag & IO_VMIO) != 0; SDT_PROBE2(fusefs, , io, trace, 1, "direct write of vnode"); err = fuse_vnode_size(vp, &filesize, cred, curthread); if (err) goto out; start = uio->uio_offset; end = start + uio->uio_resid; if (!pages) { err = fuse_inval_buf_range(vp, filesize, start, end); if (err) return (err); } err = fuse_write_directbackend(vp, uio, cred, fufh, filesize, ioflag, pages); } else { SDT_PROBE2(fusefs, , io, trace, 1, "buffered write of vnode"); if (!fsess_opt_writeback(vnode_mount(vp))) ioflag |= IO_SYNC; err = fuse_write_biobackend(vp, uio, cred, fufh, ioflag, pid); } fuse_internal_clear_suid_on_write(vp, cred, uio->uio_td); break; default: panic("uninterpreted mode passed to fuse_io_dispatch"); } out: if (closefufh) fuse_filehandle_close(vp, fufh, curthread, cred); return (err); } SDT_PROBE_DEFINE4(fusefs, , io, read_bio_backend_start, "int", "int", "int", "int"); SDT_PROBE_DEFINE2(fusefs, , io, read_bio_backend_feed, "int", "struct buf*"); SDT_PROBE_DEFINE4(fusefs, , io, read_bio_backend_end, "int", "ssize_t", "int", "struct buf*"); static int fuse_read_biobackend(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred, struct fuse_filehandle *fufh, pid_t pid) { struct buf *bp; struct mount *mp; struct fuse_data *data; daddr_t lbn, nextlbn; int bcount, nextsize; int err, n = 0, on = 0, seqcount; off_t filesize; const int biosize = fuse_iosize(vp); mp = vnode_mount(vp); data = fuse_get_mpdata(mp); if (uio->uio_offset < 0) return (EINVAL); seqcount = ioflag >> IO_SEQSHIFT; err = fuse_vnode_size(vp, &filesize, cred, curthread); if (err) return err; for (err = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { if (fuse_isdeadfs(vp)) { err = ENXIO; break; } if (filesize - uio->uio_offset <= 0) break; lbn = uio->uio_offset / biosize; on = uio->uio_offset & (biosize - 1); if ((off_t)lbn * biosize >= filesize) { bcount = 0; } else if ((off_t)(lbn + 1) * biosize > filesize) { bcount = filesize - (off_t)lbn *biosize; } else { bcount = biosize; } nextlbn = lbn + 1; nextsize = MIN(biosize, filesize - nextlbn * biosize); SDT_PROBE4(fusefs, , io, read_bio_backend_start, biosize, (int)lbn, on, bcount); if (bcount < biosize) { /* If near EOF, don't do readahead */ err = bread(vp, lbn, bcount, NOCRED, &bp); } else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { /* Try clustered read */ long totread = uio->uio_resid + on; seqcount = MIN(seqcount, data->max_readahead_blocks + 1); err = cluster_read(vp, filesize, lbn, bcount, NOCRED, totread, seqcount, 0, &bp); } else if (seqcount > 1 && data->max_readahead_blocks >= 1) { /* Try non-clustered readahead */ err = breadn(vp, lbn, bcount, &nextlbn, &nextsize, 1, NOCRED, &bp); } else { /* Just read what was requested */ err = bread(vp, lbn, bcount, NOCRED, &bp); } if (err) { brelse(bp); bp = NULL; break; } /* * on is the offset into the current bp. Figure out how many * bytes we can copy out of the bp. Note that bcount is * NOT DEV_BSIZE aligned. * * Then figure out how many bytes we can copy into the uio. */ n = 0; if (on < bcount - bp->b_resid) n = MIN((unsigned)(bcount - bp->b_resid - on), uio->uio_resid); if (n > 0) { SDT_PROBE2(fusefs, , io, read_bio_backend_feed, n, bp); err = uiomove(bp->b_data + on, n, uio); } vfs_bio_brelse(bp, ioflag); SDT_PROBE4(fusefs, , io, read_bio_backend_end, err, uio->uio_resid, n, bp); if (bp->b_resid > 0) { /* Short read indicates EOF */ break; } } return (err); } SDT_PROBE_DEFINE1(fusefs, , io, read_directbackend_start, "struct fuse_read_in*"); SDT_PROBE_DEFINE3(fusefs, , io, read_directbackend_complete, "struct fuse_dispatcher*", "struct fuse_read_in*", "struct uio*"); static int fuse_read_directbackend(struct vnode *vp, struct uio *uio, struct ucred *cred, struct fuse_filehandle *fufh) { struct fuse_data *data; struct fuse_dispatcher fdi; struct fuse_read_in *fri; int err = 0; data = fuse_get_mpdata(vp->v_mount); if (uio->uio_resid == 0) return (0); fdisp_init(&fdi, 0); /* * XXX In "normal" case we use an intermediate kernel buffer for * transmitting data from daemon's context to ours. Eventually, we should * get rid of this. Anyway, if the target uio lives in sysspace (we are * called from pageops), and the input data doesn't need kernel-side * processing (we are not called from readdir) we can already invoke * an optimized, "peer-to-peer" I/O routine. */ while (uio->uio_resid > 0) { fdi.iosize = sizeof(*fri); fdisp_make_vp(&fdi, FUSE_READ, vp, uio->uio_td, cred); fri = fdi.indata; fri->fh = fufh->fh_id; fri->offset = uio->uio_offset; fri->size = MIN(uio->uio_resid, fuse_get_mpdata(vp->v_mount)->max_read); if (fuse_libabi_geq(data, 7, 9)) { /* See comment regarding FUSE_WRITE_LOCKOWNER */ fri->read_flags = 0; fri->flags = fufh_type_2_fflags(fufh->fufh_type); } SDT_PROBE1(fusefs, , io, read_directbackend_start, fri); if ((err = fdisp_wait_answ(&fdi))) goto out; SDT_PROBE3(fusefs, , io, read_directbackend_complete, &fdi, fri, uio); if ((err = uiomove(fdi.answ, MIN(fri->size, fdi.iosize), uio))) break; if (fdi.iosize < fri->size) { /* * Short read. Should only happen at EOF or with * direct io. */ break; } } out: fdisp_destroy(&fdi); return (err); } static int fuse_write_directbackend(struct vnode *vp, struct uio *uio, struct ucred *cred, struct fuse_filehandle *fufh, off_t filesize, int ioflag, bool pages) { struct fuse_vnode_data *fvdat = VTOFUD(vp); struct fuse_data *data; struct fuse_write_in *fwi; struct fuse_write_out *fwo; struct fuse_dispatcher fdi; size_t chunksize; void *fwi_data; off_t as_written_offset; int diff; int err = 0; bool direct_io = fufh->fuse_open_flags & FOPEN_DIRECT_IO; bool wrote_anything = false; uint32_t write_flags; data = fuse_get_mpdata(vp->v_mount); /* * Don't set FUSE_WRITE_LOCKOWNER in write_flags. It can't be set * accurately when using POSIX AIO, libfuse doesn't use it, and I'm not * aware of any file systems that do. It was an attempt to add * Linux-style mandatory locking to the FUSE protocol, but mandatory * locking is deprecated even on Linux. See Linux commit * f33321141b273d60cbb3a8f56a5489baad82ba5e . */ /* * Set FUSE_WRITE_CACHE whenever we don't know the uid, gid, and/or pid * that originated a write. For example when writing from the * writeback cache. I don't know of a single file system that cares, * but the protocol says we're supposed to do this. */ write_flags = !pages && ( (ioflag & IO_DIRECT) || !fsess_opt_datacache(vnode_mount(vp)) || !fsess_opt_writeback(vnode_mount(vp))) ? 0 : FUSE_WRITE_CACHE; if (uio->uio_resid == 0) return (0); if (ioflag & IO_APPEND) uio_setoffset(uio, filesize); if (vn_rlimit_fsize(vp, uio, uio->uio_td)) return (EFBIG); fdisp_init(&fdi, 0); while (uio->uio_resid > 0) { size_t sizeof_fwi; if (fuse_libabi_geq(data, 7, 9)) { sizeof_fwi = sizeof(*fwi); } else { sizeof_fwi = FUSE_COMPAT_WRITE_IN_SIZE; } chunksize = MIN(uio->uio_resid, data->max_write); fdi.iosize = sizeof_fwi + chunksize; fdisp_make_vp(&fdi, FUSE_WRITE, vp, uio->uio_td, cred); fwi = fdi.indata; fwi->fh = fufh->fh_id; fwi->offset = uio->uio_offset; fwi->size = chunksize; fwi->write_flags = write_flags; if (fuse_libabi_geq(data, 7, 9)) { fwi->flags = fufh_type_2_fflags(fufh->fufh_type); } fwi_data = (char *)fdi.indata + sizeof_fwi; if ((err = uiomove(fwi_data, chunksize, uio))) break; retry: err = fdisp_wait_answ(&fdi); if (err == ERESTART || err == EINTR || err == EWOULDBLOCK) { /* * Rewind the uio so dofilewrite will know it's * incomplete */ uio->uio_resid += fwi->size; uio->uio_offset -= fwi->size; /* * Change ERESTART into EINTR because we can't rewind * uio->uio_iov. Basically, once uiomove(9) has been * called, it's impossible to restart a syscall. */ if (err == ERESTART) err = EINTR; break; } else if (err) { break; } else { wrote_anything = true; } fwo = ((struct fuse_write_out *)fdi.answ); /* Adjust the uio in the case of short writes */ diff = fwi->size - fwo->size; as_written_offset = uio->uio_offset - diff; if (as_written_offset - diff > filesize) fuse_vnode_setsize(vp, as_written_offset); if (as_written_offset - diff >= filesize) fvdat->flag &= ~FN_SIZECHANGE; if (diff < 0) { - printf("WARNING: misbehaving FUSE filesystem " - "wrote more data than we provided it\n"); + fuse_warn(data, FSESS_WARN_WROTE_LONG, + "wrote more data than we provided it."); err = EINVAL; break; } else if (diff > 0) { /* Short write */ if (!direct_io) { - printf("WARNING: misbehaving FUSE filesystem: " + fuse_warn(data, FSESS_WARN_SHORT_WRITE, "short writes are only allowed with " - "direct_io\n"); + "direct_io."); } if (ioflag & IO_DIRECT) { /* Return early */ uio->uio_resid += diff; uio->uio_offset -= diff; break; } else { /* Resend the unwritten portion of data */ fdi.iosize = sizeof_fwi + diff; /* Refresh fdi without clearing data buffer */ fdisp_refresh_vp(&fdi, FUSE_WRITE, vp, uio->uio_td, cred); fwi = fdi.indata; MPASS2(fwi == fdi.indata, "FUSE dispatcher " "reallocated despite no increase in " "size?"); void *src = (char*)fwi_data + fwo->size; memmove(fwi_data, src, diff); fwi->fh = fufh->fh_id; fwi->offset = as_written_offset; fwi->size = diff; fwi->write_flags = write_flags; goto retry; } } } fdisp_destroy(&fdi); if (wrote_anything) fuse_vnode_undirty_cached_timestamps(vp); return (err); } SDT_PROBE_DEFINE6(fusefs, , io, write_biobackend_start, "int64_t", "int", "int", "struct uio*", "int", "bool"); SDT_PROBE_DEFINE2(fusefs, , io, write_biobackend_append_race, "long", "int"); SDT_PROBE_DEFINE2(fusefs, , io, write_biobackend_issue, "int", "struct buf*"); static int fuse_write_biobackend(struct vnode *vp, struct uio *uio, struct ucred *cred, struct fuse_filehandle *fufh, int ioflag, pid_t pid) { struct fuse_vnode_data *fvdat = VTOFUD(vp); struct buf *bp; daddr_t lbn; off_t filesize; int bcount; int n, on, seqcount, err = 0; bool last_page; const int biosize = fuse_iosize(vp); seqcount = ioflag >> IO_SEQSHIFT; KASSERT(uio->uio_rw == UIO_WRITE, ("fuse_write_biobackend mode")); if (vp->v_type != VREG) return (EIO); if (uio->uio_offset < 0) return (EINVAL); if (uio->uio_resid == 0) return (0); err = fuse_vnode_size(vp, &filesize, cred, curthread); if (err) return err; if (ioflag & IO_APPEND) uio_setoffset(uio, filesize); if (vn_rlimit_fsize(vp, uio, uio->uio_td)) return (EFBIG); do { bool direct_append, extending; if (fuse_isdeadfs(vp)) { err = ENXIO; break; } lbn = uio->uio_offset / biosize; on = uio->uio_offset & (biosize - 1); n = MIN((unsigned)(biosize - on), uio->uio_resid); again: /* Get or create a buffer for the write */ direct_append = uio->uio_offset == filesize && n; if (uio->uio_offset + n < filesize) { extending = false; if ((off_t)(lbn + 1) * biosize < filesize) { /* Not the file's last block */ bcount = biosize; } else { /* The file's last block */ bcount = filesize - (off_t)lbn * biosize; } } else { extending = true; bcount = on + n; } if (howmany(((off_t)lbn * biosize + on + n - 1), PAGE_SIZE) >= howmany(filesize, PAGE_SIZE)) last_page = true; else last_page = false; if (direct_append) { /* * Take care to preserve the buffer's B_CACHE state so * as not to cause an unnecessary read. */ bp = getblk(vp, lbn, on, PCATCH, 0, 0); if (bp != NULL) { uint32_t save = bp->b_flags & B_CACHE; allocbuf(bp, bcount); bp->b_flags |= save; } } else { bp = getblk(vp, lbn, bcount, PCATCH, 0, 0); } if (!bp) { err = EINTR; break; } if (extending) { /* * Extend file _after_ locking buffer so we won't race * with other readers */ err = fuse_vnode_setsize(vp, uio->uio_offset + n); filesize = uio->uio_offset + n; fvdat->flag |= FN_SIZECHANGE; if (err) { brelse(bp); break; } } SDT_PROBE6(fusefs, , io, write_biobackend_start, lbn, on, n, uio, bcount, direct_append); /* * Issue a READ if B_CACHE is not set. In special-append * mode, B_CACHE is based on the buffer prior to the write * op and is typically set, avoiding the read. If a read * is required in special append mode, the server will * probably send us a short-read since we extended the file * on our end, resulting in b_resid == 0 and, thusly, * B_CACHE getting set. * * We can also avoid issuing the read if the write covers * the entire buffer. We have to make sure the buffer state * is reasonable in this case since we will not be initiating * I/O. See the comments in kern/vfs_bio.c's getblk() for * more information. * * B_CACHE may also be set due to the buffer being cached * normally. */ if (on == 0 && n == bcount) { bp->b_flags |= B_CACHE; bp->b_flags &= ~B_INVAL; bp->b_ioflags &= ~BIO_ERROR; } if ((bp->b_flags & B_CACHE) == 0) { bp->b_iocmd = BIO_READ; vfs_busy_pages(bp, 0); fuse_io_strategy(vp, bp); if ((err = bp->b_error)) { brelse(bp); break; } if (bp->b_resid > 0) { /* * Short read indicates EOF. Update file size * from the server and try again. */ SDT_PROBE2(fusefs, , io, trace, 1, "Short read during a RMW"); brelse(bp); err = fuse_vnode_size(vp, &filesize, cred, curthread); if (err) break; else goto again; } } if (bp->b_wcred == NOCRED) bp->b_wcred = crhold(cred); /* * If dirtyend exceeds file size, chop it down. This should * not normally occur but there is an append race where it * might occur XXX, so we log it. * * If the chopping creates a reverse-indexed or degenerate * situation with dirtyoff/end, we 0 both of them. */ if (bp->b_dirtyend > bcount) { SDT_PROBE2(fusefs, , io, write_biobackend_append_race, (long)bp->b_blkno * biosize, bp->b_dirtyend - bcount); bp->b_dirtyend = bcount; } if (bp->b_dirtyoff >= bp->b_dirtyend) bp->b_dirtyoff = bp->b_dirtyend = 0; /* * If the new write will leave a contiguous dirty * area, just update the b_dirtyoff and b_dirtyend, * otherwise force a write rpc of the old dirty area. * * While it is possible to merge discontiguous writes due to * our having a B_CACHE buffer ( and thus valid read data * for the hole), we don't because it could lead to * significant cache coherency problems with multiple clients, * especially if locking is implemented later on. * * as an optimization we could theoretically maintain * a linked list of discontinuous areas, but we would still * have to commit them separately so there isn't much * advantage to it except perhaps a bit of asynchronization. */ if (bp->b_dirtyend > 0 && (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) { /* * Yes, we mean it. Write out everything to "storage" * immediately, without hesitation. (Apart from other * reasons: the only way to know if a write is valid * if its actually written out.) */ SDT_PROBE2(fusefs, , io, write_biobackend_issue, 0, bp); bwrite(bp); if (bp->b_error == EINTR) { err = EINTR; break; } goto again; } err = uiomove((char *)bp->b_data + on, n, uio); if (err) { bp->b_ioflags |= BIO_ERROR; bp->b_error = err; brelse(bp); break; /* TODO: vfs_bio_clrbuf like ffs_write does? */ } /* * Only update dirtyoff/dirtyend if not a degenerate * condition. */ if (n) { if (bp->b_dirtyend > 0) { bp->b_dirtyoff = MIN(on, bp->b_dirtyoff); bp->b_dirtyend = MAX((on + n), bp->b_dirtyend); } else { bp->b_dirtyoff = on; bp->b_dirtyend = on + n; } vfs_bio_set_valid(bp, on, n); } vfs_bio_set_flags(bp, ioflag); bp->b_flags |= B_FUSEFS_WRITE_CACHE; if (ioflag & IO_SYNC) { SDT_PROBE2(fusefs, , io, write_biobackend_issue, 2, bp); if (!(ioflag & IO_VMIO)) bp->b_flags &= ~B_FUSEFS_WRITE_CACHE; err = bwrite(bp); } else if (vm_page_count_severe() || buf_dirty_count_severe() || (ioflag & IO_ASYNC)) { bp->b_flags |= B_CLUSTEROK; SDT_PROBE2(fusefs, , io, write_biobackend_issue, 3, bp); bawrite(bp); } else if (on == 0 && n == bcount) { if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) { bp->b_flags |= B_CLUSTEROK; SDT_PROBE2(fusefs, , io, write_biobackend_issue, 4, bp); cluster_write(vp, &fvdat->clusterw, bp, filesize, seqcount, 0); } else { SDT_PROBE2(fusefs, , io, write_biobackend_issue, 5, bp); bawrite(bp); } } else if (ioflag & IO_DIRECT) { bp->b_flags |= B_CLUSTEROK; SDT_PROBE2(fusefs, , io, write_biobackend_issue, 6, bp); bawrite(bp); } else { bp->b_flags &= ~B_CLUSTEROK; SDT_PROBE2(fusefs, , io, write_biobackend_issue, 7, bp); bdwrite(bp); } if (err) break; } while (uio->uio_resid > 0 && n > 0); return (err); } int fuse_io_strategy(struct vnode *vp, struct buf *bp) { struct fuse_vnode_data *fvdat = VTOFUD(vp); struct fuse_filehandle *fufh; struct ucred *cred; struct uio *uiop; struct uio uio; struct iovec io; off_t filesize; int error = 0; int fflag; /* We don't know the true pid when we're dealing with the cache */ pid_t pid = 0; const int biosize = fuse_iosize(vp); MPASS(vp->v_type == VREG || vp->v_type == VDIR); MPASS(bp->b_iocmd == BIO_READ || bp->b_iocmd == BIO_WRITE); fflag = bp->b_iocmd == BIO_READ ? FREAD : FWRITE; cred = bp->b_iocmd == BIO_READ ? bp->b_rcred : bp->b_wcred; error = fuse_filehandle_getrw(vp, fflag, &fufh, cred, pid); if (bp->b_iocmd == BIO_READ && error == EBADF) { /* * This may be a read-modify-write operation on a cached file * opened O_WRONLY. The FUSE protocol allows this. */ error = fuse_filehandle_get(vp, FWRITE, &fufh, cred, pid); } if (error) { printf("FUSE: strategy: filehandles are closed\n"); bp->b_ioflags |= BIO_ERROR; bp->b_error = error; bufdone(bp); return (error); } uiop = &uio; uiop->uio_iov = &io; uiop->uio_iovcnt = 1; uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_td = curthread; /* * clear BIO_ERROR and B_INVAL state prior to initiating the I/O. We * do this here so we do not have to do it in all the code that * calls us. */ bp->b_flags &= ~B_INVAL; bp->b_ioflags &= ~BIO_ERROR; KASSERT(!(bp->b_flags & B_DONE), ("fuse_io_strategy: bp %p already marked done", bp)); if (bp->b_iocmd == BIO_READ) { ssize_t left; io.iov_len = uiop->uio_resid = bp->b_bcount; io.iov_base = bp->b_data; uiop->uio_rw = UIO_READ; uiop->uio_offset = ((off_t)bp->b_lblkno) * biosize; error = fuse_read_directbackend(vp, uiop, cred, fufh); /* * Store the amount we failed to read in the buffer's private * field, so callers can truncate the file if necessary' */ if (!error && uiop->uio_resid) { int nread = bp->b_bcount - uiop->uio_resid; left = uiop->uio_resid; bzero((char *)bp->b_data + nread, left); if ((fvdat->flag & FN_SIZECHANGE) == 0) { /* * A short read with no error, when not using * direct io, and when no writes are cached, * indicates EOF caused by a server-side * truncation. Clear the attr cache so we'll * pick up the new file size and timestamps. * * We must still bzero the remaining buffer so * uninitialized data doesn't get exposed by a * future truncate that extends the file. * * To prevent lock order problems, we must * truncate the file upstack, not here. */ SDT_PROBE2(fusefs, , io, trace, 1, "Short read of a clean file"); fuse_vnode_clear_attr_cache(vp); } else { /* * If dirty writes _are_ cached beyond EOF, * that indicates a newly created hole that the * server doesn't know about. Those don't pose * any problem. * XXX: we don't currently track whether dirty * writes are cached beyond EOF, before EOF, or * both. */ SDT_PROBE2(fusefs, , io, trace, 1, "Short read of a dirty file"); uiop->uio_resid = 0; } } if (error) { bp->b_ioflags |= BIO_ERROR; bp->b_error = error; } } else { /* * Setup for actual write */ error = fuse_vnode_size(vp, &filesize, cred, curthread); if (error) { bp->b_ioflags |= BIO_ERROR; bp->b_error = error; bufdone(bp); return (error); } if ((off_t)bp->b_lblkno * biosize + bp->b_dirtyend > filesize) bp->b_dirtyend = filesize - (off_t)bp->b_lblkno * biosize; if (bp->b_dirtyend > bp->b_dirtyoff) { io.iov_len = uiop->uio_resid = bp->b_dirtyend - bp->b_dirtyoff; uiop->uio_offset = (off_t)bp->b_lblkno * biosize + bp->b_dirtyoff; io.iov_base = (char *)bp->b_data + bp->b_dirtyoff; uiop->uio_rw = UIO_WRITE; bool pages = bp->b_flags & B_FUSEFS_WRITE_CACHE; error = fuse_write_directbackend(vp, uiop, cred, fufh, filesize, 0, pages); if (error == EINTR || error == ETIMEDOUT) { bp->b_flags &= ~(B_INVAL | B_NOCACHE); if ((bp->b_flags & B_PAGING) == 0) { bdirty(bp); bp->b_flags &= ~B_DONE; } if ((error == EINTR || error == ETIMEDOUT) && (bp->b_flags & B_ASYNC) == 0) bp->b_flags |= B_EINTR; } else { if (error) { bp->b_ioflags |= BIO_ERROR; bp->b_flags |= B_INVAL; bp->b_error = error; } bp->b_dirtyoff = bp->b_dirtyend = 0; } } else { bp->b_resid = 0; bufdone(bp); return (0); } } bp->b_resid = uiop->uio_resid; bufdone(bp); return (error); } int fuse_io_flushbuf(struct vnode *vp, int waitfor, struct thread *td) { return (vn_fsync_buf(vp, waitfor)); } /* * Flush and invalidate all dirty buffers. If another process is already * doing the flush, just wait for completion. */ int fuse_io_invalbuf(struct vnode *vp, struct thread *td) { struct fuse_vnode_data *fvdat = VTOFUD(vp); int error = 0; if (VN_IS_DOOMED(vp)) return 0; ASSERT_VOP_ELOCKED(vp, "fuse_io_invalbuf"); while (fvdat->flag & FN_FLUSHINPROG) { struct proc *p = td->td_proc; if (vp->v_mount->mnt_kern_flag & MNTK_UNMOUNTF) return EIO; fvdat->flag |= FN_FLUSHWANT; tsleep(&fvdat->flag, PRIBIO + 2, "fusevinv", 2 * hz); error = 0; if (p != NULL) { PROC_LOCK(p); if (SIGNOTEMPTY(p->p_siglist) || SIGNOTEMPTY(td->td_siglist)) error = EINTR; PROC_UNLOCK(p); } if (error == EINTR) return EINTR; } fvdat->flag |= FN_FLUSHINPROG; if (vp->v_bufobj.bo_object != NULL) { VM_OBJECT_WLOCK(vp->v_bufobj.bo_object); vm_object_page_clean(vp->v_bufobj.bo_object, 0, 0, OBJPC_SYNC); VM_OBJECT_WUNLOCK(vp->v_bufobj.bo_object); } error = vinvalbuf(vp, V_SAVE, PCATCH, 0); while (error) { if (error == ERESTART || error == EINTR) { fvdat->flag &= ~FN_FLUSHINPROG; if (fvdat->flag & FN_FLUSHWANT) { fvdat->flag &= ~FN_FLUSHWANT; wakeup(&fvdat->flag); } return EINTR; } error = vinvalbuf(vp, V_SAVE, PCATCH, 0); } fvdat->flag &= ~FN_FLUSHINPROG; if (fvdat->flag & FN_FLUSHWANT) { fvdat->flag &= ~FN_FLUSHWANT; wakeup(&fvdat->flag); } return (error); } diff --git a/sys/fs/fuse/fuse_ipc.c b/sys/fs/fuse/fuse_ipc.c index 791ee9f38444..6b4a29214a96 100644 --- a/sys/fs/fuse/fuse_ipc.c +++ b/sys/fs/fuse/fuse_ipc.c @@ -1,1075 +1,1089 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2007-2009 Google Inc. and Amit Singh * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. * * Neither the name of Google Inc. nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Copyright (C) 2005 Csaba Henk. * All rights reserved. * * Copyright (c) 2019 The FreeBSD Foundation * * Portions of this software were developed by BFF Storage Systems, LLC under * sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "fuse.h" #include "fuse_node.h" #include "fuse_ipc.h" #include "fuse_internal.h" SDT_PROVIDER_DECLARE(fusefs); /* * Fuse trace probe: * arg0: verbosity. Higher numbers give more verbose messages * arg1: Textual message */ SDT_PROBE_DEFINE2(fusefs, , ipc, trace, "int", "char*"); static void fdisp_make_pid(struct fuse_dispatcher *fdip, enum fuse_opcode op, struct fuse_data *data, uint64_t nid, pid_t pid, struct ucred *cred); static void fuse_interrupt_send(struct fuse_ticket *otick, int err); static struct fuse_ticket *fticket_alloc(struct fuse_data *data); static void fticket_refresh(struct fuse_ticket *ftick); static void fticket_destroy(struct fuse_ticket *ftick); static int fticket_wait_answer(struct fuse_ticket *ftick); static inline int fticket_aw_pull_uio(struct fuse_ticket *ftick, struct uio *uio); static int fuse_body_audit(struct fuse_ticket *ftick, size_t blen); static fuse_handler_t fuse_standard_handler; static counter_u64_t fuse_ticket_count; SYSCTL_COUNTER_U64(_vfs_fusefs_stats, OID_AUTO, ticket_count, CTLFLAG_RD, &fuse_ticket_count, "Number of allocated tickets"); static long fuse_iov_permanent_bufsize = 1 << 19; SYSCTL_LONG(_vfs_fusefs, OID_AUTO, iov_permanent_bufsize, CTLFLAG_RW, &fuse_iov_permanent_bufsize, 0, "limit for permanently stored buffer size for fuse_iovs"); static int fuse_iov_credit = 16; SYSCTL_INT(_vfs_fusefs, OID_AUTO, iov_credit, CTLFLAG_RW, &fuse_iov_credit, 0, "how many times is an oversized fuse_iov tolerated"); MALLOC_DEFINE(M_FUSEMSG, "fuse_msgbuf", "fuse message buffer"); static uma_zone_t ticket_zone; /* * TODO: figure out how to timeout INTERRUPT requests, because the daemon may * leagally never respond */ static int fuse_interrupt_callback(struct fuse_ticket *tick, struct uio *uio) { struct fuse_ticket *otick, *x_tick; struct fuse_interrupt_in *fii; struct fuse_data *data = tick->tk_data; bool found = false; fii = (struct fuse_interrupt_in*)((char*)tick->tk_ms_fiov.base + sizeof(struct fuse_in_header)); fuse_lck_mtx_lock(data->aw_mtx); TAILQ_FOREACH_SAFE(otick, &data->aw_head, tk_aw_link, x_tick) { if (otick->tk_unique == fii->unique) { found = true; break; } } fuse_lck_mtx_unlock(data->aw_mtx); if (!found) { /* Original is already complete. Just return */ return 0; } /* Clear the original ticket's interrupt association */ otick->irq_unique = 0; if (tick->tk_aw_ohead.error == ENOSYS) { fsess_set_notimpl(data->mp, FUSE_INTERRUPT); return 0; } else if (tick->tk_aw_ohead.error == EAGAIN) { /* * There are two reasons we might get this: * 1) the daemon received the INTERRUPT request before the * original, or * 2) the daemon received the INTERRUPT request after it * completed the original request. * In the first case we should re-send the INTERRUPT. In the * second, we should ignore it. */ /* Resend */ fuse_interrupt_send(otick, EINTR); return 0; } else { /* Illegal FUSE_INTERRUPT response */ return EINVAL; } } /* Interrupt the operation otick. Return err as its error code */ void fuse_interrupt_send(struct fuse_ticket *otick, int err) { struct fuse_dispatcher fdi; struct fuse_interrupt_in *fii; struct fuse_in_header *ftick_hdr; struct fuse_data *data = otick->tk_data; struct fuse_ticket *tick, *xtick; struct ucred reused_creds; gid_t reused_groups[1]; if (otick->irq_unique == 0) { /* * If the daemon hasn't yet received otick, then we can answer * it ourselves and return. */ fuse_lck_mtx_lock(data->ms_mtx); STAILQ_FOREACH_SAFE(tick, &otick->tk_data->ms_head, tk_ms_link, xtick) { if (tick == otick) { STAILQ_REMOVE(&otick->tk_data->ms_head, tick, fuse_ticket, tk_ms_link); otick->tk_data->ms_count--; otick->tk_ms_link.stqe_next = NULL; fuse_lck_mtx_unlock(data->ms_mtx); fuse_lck_mtx_lock(otick->tk_aw_mtx); if (!fticket_answered(otick)) { fticket_set_answered(otick); otick->tk_aw_errno = err; wakeup(otick); } fuse_lck_mtx_unlock(otick->tk_aw_mtx); fuse_ticket_drop(tick); return; } } fuse_lck_mtx_unlock(data->ms_mtx); /* * If the fuse daemon doesn't support interrupts, then there's * nothing more that we can do */ if (fsess_not_impl(data->mp, FUSE_INTERRUPT)) return; /* * If the fuse daemon has already received otick, then we must * send FUSE_INTERRUPT. */ ftick_hdr = fticket_in_header(otick); reused_creds.cr_uid = ftick_hdr->uid; reused_groups[0] = ftick_hdr->gid; reused_creds.cr_groups = reused_groups; fdisp_init(&fdi, sizeof(*fii)); fdisp_make_pid(&fdi, FUSE_INTERRUPT, data, ftick_hdr->nodeid, ftick_hdr->pid, &reused_creds); fii = fdi.indata; fii->unique = otick->tk_unique; fuse_insert_callback(fdi.tick, fuse_interrupt_callback); otick->irq_unique = fdi.tick->tk_unique; /* Interrupt ops should be delivered ASAP */ fuse_insert_message(fdi.tick, true); fdisp_destroy(&fdi); } else { /* This ticket has already been interrupted */ } } void fiov_init(struct fuse_iov *fiov, size_t size) { uint32_t msize = FU_AT_LEAST(size); fiov->len = 0; fiov->base = malloc(msize, M_FUSEMSG, M_WAITOK | M_ZERO); fiov->allocated_size = msize; fiov->credit = fuse_iov_credit; } void fiov_teardown(struct fuse_iov *fiov) { MPASS(fiov->base != NULL); free(fiov->base, M_FUSEMSG); } void fiov_adjust(struct fuse_iov *fiov, size_t size) { if (fiov->allocated_size < size || (fuse_iov_permanent_bufsize >= 0 && fiov->allocated_size - size > fuse_iov_permanent_bufsize && --fiov->credit < 0)) { fiov->base = realloc(fiov->base, FU_AT_LEAST(size), M_FUSEMSG, M_WAITOK | M_ZERO); if (!fiov->base) { panic("FUSE: realloc failed"); } fiov->allocated_size = FU_AT_LEAST(size); fiov->credit = fuse_iov_credit; /* Clear data buffer after reallocation */ bzero(fiov->base, size); } else if (size > fiov->len) { /* Clear newly extended portion of data buffer */ bzero((char*)fiov->base + fiov->len, size - fiov->len); } fiov->len = size; } /* Resize the fiov if needed, and clear it's buffer */ void fiov_refresh(struct fuse_iov *fiov) { fiov_adjust(fiov, 0); } static int fticket_ctor(void *mem, int size, void *arg, int flags) { struct fuse_ticket *ftick = mem; struct fuse_data *data = arg; FUSE_ASSERT_MS_DONE(ftick); FUSE_ASSERT_AW_DONE(ftick); ftick->tk_data = data; if (ftick->tk_unique != 0) fticket_refresh(ftick); /* May be truncated to 32 bits */ ftick->tk_unique = atomic_fetchadd_long(&data->ticketer, 1); if (ftick->tk_unique == 0) ftick->tk_unique = atomic_fetchadd_long(&data->ticketer, 1); ftick->irq_unique = 0; refcount_init(&ftick->tk_refcount, 1); counter_u64_add(fuse_ticket_count, 1); return 0; } static void fticket_dtor(void *mem, int size, void *arg) { #ifdef INVARIANTS struct fuse_ticket *ftick = mem; #endif FUSE_ASSERT_MS_DONE(ftick); FUSE_ASSERT_AW_DONE(ftick); counter_u64_add(fuse_ticket_count, -1); } static int fticket_init(void *mem, int size, int flags) { struct fuse_ticket *ftick = mem; bzero(ftick, sizeof(struct fuse_ticket)); fiov_init(&ftick->tk_ms_fiov, sizeof(struct fuse_in_header)); mtx_init(&ftick->tk_aw_mtx, "fuse answer delivery mutex", NULL, MTX_DEF); fiov_init(&ftick->tk_aw_fiov, 0); return 0; } static void fticket_fini(void *mem, int size) { struct fuse_ticket *ftick = mem; fiov_teardown(&ftick->tk_ms_fiov); fiov_teardown(&ftick->tk_aw_fiov); mtx_destroy(&ftick->tk_aw_mtx); } static inline struct fuse_ticket * fticket_alloc(struct fuse_data *data) { return uma_zalloc_arg(ticket_zone, data, M_WAITOK); } static inline void fticket_destroy(struct fuse_ticket *ftick) { return uma_zfree(ticket_zone, ftick); } static inline void fticket_refresh(struct fuse_ticket *ftick) { FUSE_ASSERT_MS_DONE(ftick); FUSE_ASSERT_AW_DONE(ftick); fiov_refresh(&ftick->tk_ms_fiov); bzero(&ftick->tk_aw_ohead, sizeof(struct fuse_out_header)); fiov_refresh(&ftick->tk_aw_fiov); ftick->tk_aw_errno = 0; ftick->tk_flag = 0; } /* Prepar the ticket to be reused, but don't clear its data buffers */ static inline void fticket_reset(struct fuse_ticket *ftick) { FUSE_ASSERT_MS_DONE(ftick); FUSE_ASSERT_AW_DONE(ftick); bzero(&ftick->tk_aw_ohead, sizeof(struct fuse_out_header)); ftick->tk_aw_errno = 0; ftick->tk_flag = 0; } static int fticket_wait_answer(struct fuse_ticket *ftick) { struct thread *td = curthread; sigset_t blockedset, oldset; int err = 0, stops_deferred; struct fuse_data *data = ftick->tk_data; bool interrupted = false; if (fsess_maybe_impl(ftick->tk_data->mp, FUSE_INTERRUPT) && data->dataflags & FSESS_INTR) { SIGEMPTYSET(blockedset); } else { /* Block all signals except (implicitly) SIGKILL */ SIGFILLSET(blockedset); } stops_deferred = sigdeferstop(SIGDEFERSTOP_SILENT); kern_sigprocmask(td, SIG_BLOCK, NULL, &oldset, 0); fuse_lck_mtx_lock(ftick->tk_aw_mtx); retry: if (fticket_answered(ftick)) { goto out; } if (fdata_get_dead(data)) { err = ENOTCONN; fticket_set_answered(ftick); goto out; } kern_sigprocmask(td, SIG_BLOCK, &blockedset, NULL, 0); err = msleep(ftick, &ftick->tk_aw_mtx, PCATCH, "fu_ans", data->daemon_timeout * hz); kern_sigprocmask(td, SIG_SETMASK, &oldset, NULL, 0); if (err == EWOULDBLOCK) { SDT_PROBE2(fusefs, , ipc, trace, 3, "fticket_wait_answer: EWOULDBLOCK"); #ifdef XXXIP /* die conditionally */ if (!fdata_get_dead(data)) { fdata_set_dead(data); } #endif err = ETIMEDOUT; fticket_set_answered(ftick); } else if ((err == EINTR || err == ERESTART)) { /* * Whether we get EINTR or ERESTART depends on whether * SA_RESTART was set by sigaction(2). * * Try to interrupt the operation and wait for an EINTR response * to the original operation. If the file system does not * support FUSE_INTERRUPT, then we'll just wait for it to * complete like normal. If it does support FUSE_INTERRUPT, * then it will either respond EINTR to the original operation, * or EAGAIN to the interrupt. */ sigset_t tmpset; SDT_PROBE2(fusefs, , ipc, trace, 4, "fticket_wait_answer: interrupt"); fuse_lck_mtx_unlock(ftick->tk_aw_mtx); fuse_interrupt_send(ftick, err); PROC_LOCK(td->td_proc); mtx_lock(&td->td_proc->p_sigacts->ps_mtx); tmpset = td->td_proc->p_siglist; SIGSETOR(tmpset, td->td_siglist); mtx_unlock(&td->td_proc->p_sigacts->ps_mtx); PROC_UNLOCK(td->td_proc); fuse_lck_mtx_lock(ftick->tk_aw_mtx); if (!interrupted && !SIGISMEMBER(tmpset, SIGKILL)) { /* * Block all signals while we wait for an interrupt * response. The protocol doesn't discriminate between * different signals. */ SIGFILLSET(blockedset); interrupted = true; goto retry; } else { /* * Return immediately for fatal signals, or if this is * the second interruption. We should only be * interrupted twice if the thread is stopped, for * example during sigexit. */ } } else if (err) { SDT_PROBE2(fusefs, , ipc, trace, 6, "fticket_wait_answer: other error"); } else { SDT_PROBE2(fusefs, , ipc, trace, 7, "fticket_wait_answer: OK"); } out: if (!(err || fticket_answered(ftick))) { SDT_PROBE2(fusefs, , ipc, trace, 1, "FUSE: requester was woken up but still no answer"); err = ENXIO; } fuse_lck_mtx_unlock(ftick->tk_aw_mtx); sigallowstop(stops_deferred); return err; } static inline int fticket_aw_pull_uio(struct fuse_ticket *ftick, struct uio *uio) { int err = 0; size_t len = uio_resid(uio); if (len) { fiov_adjust(fticket_resp(ftick), len); err = uiomove(fticket_resp(ftick)->base, len, uio); } return err; } int fticket_pull(struct fuse_ticket *ftick, struct uio *uio) { int err = 0; if (ftick->tk_aw_ohead.error) { return 0; } err = fuse_body_audit(ftick, uio_resid(uio)); if (!err) { err = fticket_aw_pull_uio(ftick, uio); } return err; } struct fuse_data * fdata_alloc(struct cdev *fdev, struct ucred *cred) { struct fuse_data *data; data = malloc(sizeof(struct fuse_data), M_FUSEMSG, M_WAITOK | M_ZERO); data->fdev = fdev; mtx_init(&data->ms_mtx, "fuse message list mutex", NULL, MTX_DEF); STAILQ_INIT(&data->ms_head); data->ms_count = 0; knlist_init_mtx(&data->ks_rsel.si_note, &data->ms_mtx); mtx_init(&data->aw_mtx, "fuse answer list mutex", NULL, MTX_DEF); TAILQ_INIT(&data->aw_head); data->daemoncred = crhold(cred); data->daemon_timeout = FUSE_DEFAULT_DAEMON_TIMEOUT; sx_init(&data->rename_lock, "fuse rename lock"); data->ref = 1; return data; } void fdata_trydestroy(struct fuse_data *data) { data->ref--; MPASS(data->ref >= 0); if (data->ref != 0) return; /* Driving off stage all that stuff thrown at device... */ sx_destroy(&data->rename_lock); crfree(data->daemoncred); mtx_destroy(&data->aw_mtx); knlist_delete(&data->ks_rsel.si_note, curthread, 0); knlist_destroy(&data->ks_rsel.si_note); mtx_destroy(&data->ms_mtx); free(data, M_FUSEMSG); } void fdata_set_dead(struct fuse_data *data) { FUSE_LOCK(); if (fdata_get_dead(data)) { FUSE_UNLOCK(); return; } fuse_lck_mtx_lock(data->ms_mtx); data->dataflags |= FSESS_DEAD; wakeup_one(data); selwakeuppri(&data->ks_rsel, PZERO + 1); wakeup(&data->ticketer); fuse_lck_mtx_unlock(data->ms_mtx); FUSE_UNLOCK(); } struct fuse_ticket * fuse_ticket_fetch(struct fuse_data *data) { int err = 0; struct fuse_ticket *ftick; ftick = fticket_alloc(data); if (!(data->dataflags & FSESS_INITED)) { /* Sleep until get answer for INIT messsage */ FUSE_LOCK(); if (!(data->dataflags & FSESS_INITED) && data->ticketer > 2) { err = msleep(&data->ticketer, &fuse_mtx, PCATCH | PDROP, "fu_ini", 0); if (err) fdata_set_dead(data); } else FUSE_UNLOCK(); } return ftick; } int fuse_ticket_drop(struct fuse_ticket *ftick) { int die; die = refcount_release(&ftick->tk_refcount); if (die) fticket_destroy(ftick); return die; } void fuse_insert_callback(struct fuse_ticket *ftick, fuse_handler_t * handler) { if (fdata_get_dead(ftick->tk_data)) { return; } ftick->tk_aw_handler = handler; fuse_lck_mtx_lock(ftick->tk_data->aw_mtx); fuse_aw_push(ftick); fuse_lck_mtx_unlock(ftick->tk_data->aw_mtx); } /* * Insert a new upgoing ticket into the message queue * * If urgent is true, insert at the front of the queue. Otherwise, insert in * FIFO order. */ void fuse_insert_message(struct fuse_ticket *ftick, bool urgent) { if (ftick->tk_flag & FT_DIRTY) { panic("FUSE: ticket reused without being refreshed"); } ftick->tk_flag |= FT_DIRTY; if (fdata_get_dead(ftick->tk_data)) { return; } fuse_lck_mtx_lock(ftick->tk_data->ms_mtx); if (urgent) fuse_ms_push_head(ftick); else fuse_ms_push(ftick); wakeup_one(ftick->tk_data); selwakeuppri(&ftick->tk_data->ks_rsel, PZERO + 1); KNOTE_LOCKED(&ftick->tk_data->ks_rsel.si_note, 0); fuse_lck_mtx_unlock(ftick->tk_data->ms_mtx); } static int fuse_body_audit(struct fuse_ticket *ftick, size_t blen) { int err = 0; enum fuse_opcode opcode; opcode = fticket_opcode(ftick); switch (opcode) { case FUSE_BMAP: err = (blen == sizeof(struct fuse_bmap_out)) ? 0 : EINVAL; break; case FUSE_LINK: case FUSE_LOOKUP: case FUSE_MKDIR: case FUSE_MKNOD: case FUSE_SYMLINK: if (fuse_libabi_geq(ftick->tk_data, 7, 9)) { err = (blen == sizeof(struct fuse_entry_out)) ? 0 : EINVAL; } else { err = (blen == FUSE_COMPAT_ENTRY_OUT_SIZE) ? 0 : EINVAL; } break; case FUSE_FORGET: panic("FUSE: a handler has been intalled for FUSE_FORGET"); break; case FUSE_GETATTR: case FUSE_SETATTR: if (fuse_libabi_geq(ftick->tk_data, 7, 9)) { err = (blen == sizeof(struct fuse_attr_out)) ? 0 : EINVAL; } else { err = (blen == FUSE_COMPAT_ATTR_OUT_SIZE) ? 0 : EINVAL; } break; case FUSE_READLINK: err = (PAGE_SIZE >= blen) ? 0 : EINVAL; break; case FUSE_UNLINK: err = (blen == 0) ? 0 : EINVAL; break; case FUSE_RMDIR: err = (blen == 0) ? 0 : EINVAL; break; case FUSE_RENAME: err = (blen == 0) ? 0 : EINVAL; break; case FUSE_OPEN: err = (blen == sizeof(struct fuse_open_out)) ? 0 : EINVAL; break; case FUSE_READ: err = (((struct fuse_read_in *)( (char *)ftick->tk_ms_fiov.base + sizeof(struct fuse_in_header) ))->size >= blen) ? 0 : EINVAL; break; case FUSE_WRITE: err = (blen == sizeof(struct fuse_write_out)) ? 0 : EINVAL; break; case FUSE_STATFS: if (fuse_libabi_geq(ftick->tk_data, 7, 4)) { err = (blen == sizeof(struct fuse_statfs_out)) ? 0 : EINVAL; } else { err = (blen == FUSE_COMPAT_STATFS_SIZE) ? 0 : EINVAL; } break; case FUSE_RELEASE: err = (blen == 0) ? 0 : EINVAL; break; case FUSE_FSYNC: err = (blen == 0) ? 0 : EINVAL; break; case FUSE_SETXATTR: err = (blen == 0) ? 0 : EINVAL; break; case FUSE_GETXATTR: case FUSE_LISTXATTR: /* * These can have varying response lengths, and 0 length * isn't necessarily invalid. */ err = 0; break; case FUSE_REMOVEXATTR: err = (blen == 0) ? 0 : EINVAL; break; case FUSE_FLUSH: err = (blen == 0) ? 0 : EINVAL; break; case FUSE_INIT: if (blen == sizeof(struct fuse_init_out) || blen == FUSE_COMPAT_INIT_OUT_SIZE || blen == FUSE_COMPAT_22_INIT_OUT_SIZE) { err = 0; } else { err = EINVAL; } break; case FUSE_OPENDIR: err = (blen == sizeof(struct fuse_open_out)) ? 0 : EINVAL; break; case FUSE_READDIR: err = (((struct fuse_read_in *)( (char *)ftick->tk_ms_fiov.base + sizeof(struct fuse_in_header) ))->size >= blen) ? 0 : EINVAL; break; case FUSE_RELEASEDIR: err = (blen == 0) ? 0 : EINVAL; break; case FUSE_FSYNCDIR: err = (blen == 0) ? 0 : EINVAL; break; case FUSE_GETLK: err = (blen == sizeof(struct fuse_lk_out)) ? 0 : EINVAL; break; case FUSE_SETLK: err = (blen == 0) ? 0 : EINVAL; break; case FUSE_SETLKW: err = (blen == 0) ? 0 : EINVAL; break; case FUSE_ACCESS: err = (blen == 0) ? 0 : EINVAL; break; case FUSE_CREATE: if (fuse_libabi_geq(ftick->tk_data, 7, 9)) { err = (blen == sizeof(struct fuse_entry_out) + sizeof(struct fuse_open_out)) ? 0 : EINVAL; } else { err = (blen == FUSE_COMPAT_ENTRY_OUT_SIZE + sizeof(struct fuse_open_out)) ? 0 : EINVAL; } break; case FUSE_DESTROY: err = (blen == 0) ? 0 : EINVAL; break; case FUSE_LSEEK: err = (blen == sizeof(struct fuse_lseek_out)) ? 0 : EINVAL; break; case FUSE_COPY_FILE_RANGE: err = (blen == sizeof(struct fuse_write_out)) ? 0 : EINVAL; break; default: panic("FUSE: opcodes out of sync (%d)\n", opcode); } return err; } static inline void fuse_setup_ihead(struct fuse_in_header *ihead, struct fuse_ticket *ftick, uint64_t nid, enum fuse_opcode op, size_t blen, pid_t pid, struct ucred *cred) { ihead->len = sizeof(*ihead) + blen; ihead->unique = ftick->tk_unique; ihead->nodeid = nid; ihead->opcode = op; ihead->pid = pid; ihead->uid = cred->cr_uid; ihead->gid = cred->cr_groups[0]; } /* * fuse_standard_handler just pulls indata and wakes up pretender. * Doesn't try to interpret data, that's left for the pretender. * Though might do a basic size verification before the pull-in takes place */ static int fuse_standard_handler(struct fuse_ticket *ftick, struct uio *uio) { int err = 0; err = fticket_pull(ftick, uio); fuse_lck_mtx_lock(ftick->tk_aw_mtx); if (!fticket_answered(ftick)) { fticket_set_answered(ftick); ftick->tk_aw_errno = err; wakeup(ftick); } fuse_lck_mtx_unlock(ftick->tk_aw_mtx); return err; } /* * Reinitialize a dispatcher from a pid and node id, without resizing or * clearing its data buffers */ static void fdisp_refresh_pid(struct fuse_dispatcher *fdip, enum fuse_opcode op, struct mount *mp, uint64_t nid, pid_t pid, struct ucred *cred) { MPASS(fdip->tick); MPASS2(sizeof(fdip->finh) + fdip->iosize <= fdip->tick->tk_ms_fiov.len, "Must use fdisp_make_pid to increase the size of the fiov"); fticket_reset(fdip->tick); FUSE_DIMALLOC(&fdip->tick->tk_ms_fiov, fdip->finh, fdip->indata, fdip->iosize); fuse_setup_ihead(fdip->finh, fdip->tick, nid, op, fdip->iosize, pid, cred); } /* Initialize a dispatcher from a pid and node id */ static void fdisp_make_pid(struct fuse_dispatcher *fdip, enum fuse_opcode op, struct fuse_data *data, uint64_t nid, pid_t pid, struct ucred *cred) { if (fdip->tick) { fticket_refresh(fdip->tick); } else { fdip->tick = fuse_ticket_fetch(data); } /* FUSE_DIMALLOC will bzero the fiovs when it enlarges them */ FUSE_DIMALLOC(&fdip->tick->tk_ms_fiov, fdip->finh, fdip->indata, fdip->iosize); fuse_setup_ihead(fdip->finh, fdip->tick, nid, op, fdip->iosize, pid, cred); } void fdisp_make(struct fuse_dispatcher *fdip, enum fuse_opcode op, struct mount *mp, uint64_t nid, struct thread *td, struct ucred *cred) { struct fuse_data *data = fuse_get_mpdata(mp); RECTIFY_TDCR(td, cred); return fdisp_make_pid(fdip, op, data, nid, td->td_proc->p_pid, cred); } void fdisp_make_vp(struct fuse_dispatcher *fdip, enum fuse_opcode op, struct vnode *vp, struct thread *td, struct ucred *cred) { struct mount *mp = vnode_mount(vp); struct fuse_data *data = fuse_get_mpdata(mp); RECTIFY_TDCR(td, cred); return fdisp_make_pid(fdip, op, data, VTOI(vp), td->td_proc->p_pid, cred); } /* Refresh a fuse_dispatcher so it can be reused, but don't zero its data */ void fdisp_refresh_vp(struct fuse_dispatcher *fdip, enum fuse_opcode op, struct vnode *vp, struct thread *td, struct ucred *cred) { RECTIFY_TDCR(td, cred); return fdisp_refresh_pid(fdip, op, vnode_mount(vp), VTOI(vp), td->td_proc->p_pid, cred); } void fdisp_refresh(struct fuse_dispatcher *fdip) { fticket_refresh(fdip->tick); } SDT_PROBE_DEFINE2(fusefs, , ipc, fdisp_wait_answ_error, "char*", "int"); int fdisp_wait_answ(struct fuse_dispatcher *fdip) { int err = 0; fdip->answ_stat = 0; fuse_insert_callback(fdip->tick, fuse_standard_handler); fuse_insert_message(fdip->tick, false); if ((err = fticket_wait_answer(fdip->tick))) { fuse_lck_mtx_lock(fdip->tick->tk_aw_mtx); if (fticket_answered(fdip->tick)) { /* * Just between noticing the interrupt and getting here, * the standard handler has completed his job. * So we drop the ticket and exit as usual. */ SDT_PROBE2(fusefs, , ipc, fdisp_wait_answ_error, "IPC: interrupted, already answered", err); fuse_lck_mtx_unlock(fdip->tick->tk_aw_mtx); goto out; } else { /* * So we were faster than the standard handler. * Then by setting the answered flag we get *him* * to drop the ticket. */ SDT_PROBE2(fusefs, , ipc, fdisp_wait_answ_error, "IPC: interrupted, setting to answered", err); fticket_set_answered(fdip->tick); fuse_lck_mtx_unlock(fdip->tick->tk_aw_mtx); return err; } } if (fdip->tick->tk_aw_errno == ENOTCONN) { /* The daemon died while we were waiting for a response */ err = ENOTCONN; goto out; } else if (fdip->tick->tk_aw_errno) { /* * There was some sort of communication error with the daemon * that the client wouldn't understand. */ SDT_PROBE2(fusefs, , ipc, fdisp_wait_answ_error, "IPC: explicit EIO-ing", fdip->tick->tk_aw_errno); err = EIO; goto out; } if ((err = fdip->tick->tk_aw_ohead.error)) { SDT_PROBE2(fusefs, , ipc, fdisp_wait_answ_error, "IPC: setting status", fdip->tick->tk_aw_ohead.error); /* * This means a "proper" fuse syscall error. * We record this value so the caller will * be able to know it's not a boring messaging * failure, if she wishes so (and if not, she can * just simply propagate the return value of this routine). * [XXX Maybe a bitflag would do the job too, * if other flags needed, this will be converted thusly.] */ fdip->answ_stat = err; goto out; } fdip->answ = fticket_resp(fdip->tick)->base; fdip->iosize = fticket_resp(fdip->tick)->len; return 0; out: return err; } void fuse_ipc_init(void) { ticket_zone = uma_zcreate("fuse_ticket", sizeof(struct fuse_ticket), fticket_ctor, fticket_dtor, fticket_init, fticket_fini, UMA_ALIGN_PTR, 0); fuse_ticket_count = counter_u64_alloc(M_WAITOK); } void fuse_ipc_destroy(void) { counter_u64_free(fuse_ticket_count); uma_zdestroy(ticket_zone); } + +SDT_PROBE_DEFINE3(fusefs,, ipc, warn, "struct fuse_data*", "unsigned", "char*"); +void +fuse_warn(struct fuse_data *data, unsigned flag, const char *msg) +{ + SDT_PROBE3(fusefs, , ipc, warn, data, flag, msg); + if (!(data->dataflags & flag)) { + printf("WARNING: FUSE protocol violation for server mounted at " + "%s: %s " + "This warning will not be repeated.\n", + data->mp->mnt_stat.f_mntonname, msg); + data->dataflags |= flag; + } +} diff --git a/sys/fs/fuse/fuse_ipc.h b/sys/fs/fuse/fuse_ipc.h index 71562d9193ec..1351499c0712 100644 --- a/sys/fs/fuse/fuse_ipc.h +++ b/sys/fs/fuse/fuse_ipc.h @@ -1,455 +1,463 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2007-2009 Google Inc. and Amit Singh * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. * * Neither the name of Google Inc. nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Copyright (C) 2005 Csaba Henk. * All rights reserved. * * Copyright (c) 2019 The FreeBSD Foundation * * Portions of this software were developed by BFF Storage Systems, LLC under * sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _FUSE_IPC_H_ #define _FUSE_IPC_H_ #include #include enum fuse_data_cache_mode { FUSE_CACHE_UC, FUSE_CACHE_WT, FUSE_CACHE_WB, }; struct fuse_iov { void *base; size_t len; size_t allocated_size; int credit; }; void fiov_init(struct fuse_iov *fiov, size_t size); void fiov_teardown(struct fuse_iov *fiov); void fiov_refresh(struct fuse_iov *fiov); void fiov_adjust(struct fuse_iov *fiov, size_t size); #define FUSE_DIMALLOC(fiov, spc1, spc2, amnt) do { \ fiov_adjust(fiov, (sizeof(*(spc1)) + (amnt))); \ (spc1) = (fiov)->base; \ (spc2) = (char *)(fiov)->base + (sizeof(*(spc1))); \ } while (0) #define FU_AT_LEAST(siz) max((siz), 160) #define FUSE_ASSERT_AW_DONE(ftick) \ KASSERT((ftick)->tk_aw_link.tqe_next == NULL && \ (ftick)->tk_aw_link.tqe_prev == NULL, \ ("FUSE: ticket still on answer delivery list %p", (ftick))) #define FUSE_ASSERT_MS_DONE(ftick) \ KASSERT((ftick)->tk_ms_link.stqe_next == NULL, \ ("FUSE: ticket still on message list %p", (ftick))) struct fuse_ticket; struct fuse_data; typedef int fuse_handler_t(struct fuse_ticket *ftick, struct uio *uio); struct fuse_ticket { /* fields giving the identity of the ticket */ uint64_t tk_unique; struct fuse_data *tk_data; int tk_flag; u_int tk_refcount; /* * If this ticket's operation has been interrupted, this will hold the * unique value of the FUSE_INTERRUPT operation. Otherwise, it will be * 0. */ uint64_t irq_unique; /* fields for initiating an upgoing message */ struct fuse_iov tk_ms_fiov; STAILQ_ENTRY(fuse_ticket) tk_ms_link; /* fields for handling answers coming from userspace */ struct fuse_iov tk_aw_fiov; struct fuse_out_header tk_aw_ohead; int tk_aw_errno; struct mtx tk_aw_mtx; fuse_handler_t *tk_aw_handler; TAILQ_ENTRY(fuse_ticket) tk_aw_link; }; #define FT_ANSW 0x01 /* request of ticket has already been answered */ #define FT_DIRTY 0x04 /* ticket has been used */ static inline struct fuse_iov * fticket_resp(struct fuse_ticket *ftick) { return (&ftick->tk_aw_fiov); } static inline bool fticket_answered(struct fuse_ticket *ftick) { mtx_assert(&ftick->tk_aw_mtx, MA_OWNED); return (ftick->tk_flag & FT_ANSW); } static inline void fticket_set_answered(struct fuse_ticket *ftick) { mtx_assert(&ftick->tk_aw_mtx, MA_OWNED); ftick->tk_flag |= FT_ANSW; } static inline struct fuse_in_header* fticket_in_header(struct fuse_ticket *ftick) { return (struct fuse_in_header *)(ftick->tk_ms_fiov.base); } static inline enum fuse_opcode fticket_opcode(struct fuse_ticket *ftick) { return fticket_in_header(ftick)->opcode; } int fticket_pull(struct fuse_ticket *ftick, struct uio *uio); /* * The data representing a FUSE session. */ struct fuse_data { struct cdev *fdev; struct mount *mp; struct vnode *vroot; struct ucred *daemoncred; int dataflags; int ref; struct mtx ms_mtx; STAILQ_HEAD(, fuse_ticket) ms_head; int ms_count; struct mtx aw_mtx; TAILQ_HEAD(, fuse_ticket) aw_head; /* * Holds the next value of the FUSE operation unique value. * Also, serves as a wakeup channel to prevent any operations from * being created before INIT completes. */ u_long ticketer; struct sx rename_lock; uint32_t fuse_libabi_major; uint32_t fuse_libabi_minor; uint32_t max_readahead_blocks; uint32_t max_write; uint32_t max_read; struct selinfo ks_rsel; int daemon_timeout; int linux_errnos; unsigned time_gran; /* A bitmask of FUSE RPCs that are not implemented by the server */ uint64_t notimpl; /* * A bitmask of FUSE RPCs that are implemented by the server. * If an operation is not present in either notimpl or isimpl, then it * may be implemented by the server, but the kernel doesn't know for * sure. */ uint64_t isimpl; uint64_t mnt_flag; enum fuse_data_cache_mode cache_mode; }; #define FSESS_DEAD 0x0001 /* session is to be closed */ #define FSESS_INITED 0x0004 /* session has been inited */ #define FSESS_DAEMON_CAN_SPY 0x0010 /* let non-owners access this fs */ /* (and being observed by the daemon) */ #define FSESS_PUSH_SYMLINKS_IN 0x0020 /* prefix absolute symlinks with mp */ #define FSESS_DEFAULT_PERMISSIONS 0x0040 /* kernel does permission checking */ #define FSESS_ASYNC_READ 0x1000 /* allow multiple reads of some file */ #define FSESS_POSIX_LOCKS 0x2000 /* daemon supports POSIX locks */ #define FSESS_EXPORT_SUPPORT 0x10000 /* daemon supports NFS-style lookups */ #define FSESS_INTR 0x20000 /* interruptible mounts */ +#define FSESS_WARN_SHORT_WRITE 0x40000 /* Short write without direct_io */ +#define FSESS_WARN_WROTE_LONG 0x80000 /* Wrote more data than provided */ +#define FSESS_WARN_LSEXTATTR_LONG 0x100000 /* Returned too many extattrs */ +#define FSESS_WARN_CACHE_INCOHERENT 0x200000 /* Read cache incoherent */ +#define FSESS_WARN_WB_CACHE_INCOHERENT 0x400000 /* WB cache incoherent */ #define FSESS_MNTOPTS_MASK ( \ FSESS_DAEMON_CAN_SPY | FSESS_PUSH_SYMLINKS_IN | \ FSESS_DEFAULT_PERMISSIONS | FSESS_INTR) extern int fuse_data_cache_mode; static inline struct fuse_data * fuse_get_mpdata(struct mount *mp) { return mp->mnt_data; } static inline bool fsess_is_impl(struct mount *mp, int opcode) { struct fuse_data *data = fuse_get_mpdata(mp); return ((data->isimpl & (1ULL << opcode)) != 0); } static inline bool fsess_maybe_impl(struct mount *mp, int opcode) { struct fuse_data *data = fuse_get_mpdata(mp); return ((data->notimpl & (1ULL << opcode)) == 0); } static inline bool fsess_not_impl(struct mount *mp, int opcode) { struct fuse_data *data = fuse_get_mpdata(mp); return ((data->notimpl & (1ULL << opcode)) != 0); } static inline void fsess_set_impl(struct mount *mp, int opcode) { struct fuse_data *data = fuse_get_mpdata(mp); data->isimpl |= (1ULL << opcode); } static inline void fsess_set_notimpl(struct mount *mp, int opcode) { struct fuse_data *data = fuse_get_mpdata(mp); data->notimpl |= (1ULL << opcode); } static inline bool fsess_opt_datacache(struct mount *mp) { struct fuse_data *data = fuse_get_mpdata(mp); return (data->cache_mode != FUSE_CACHE_UC); } static inline bool fsess_opt_mmap(struct mount *mp) { return (fsess_opt_datacache(mp)); } static inline bool fsess_opt_writeback(struct mount *mp) { struct fuse_data *data = fuse_get_mpdata(mp); return (data->cache_mode == FUSE_CACHE_WB); } /* Insert a new upgoing message */ static inline void fuse_ms_push(struct fuse_ticket *ftick) { mtx_assert(&ftick->tk_data->ms_mtx, MA_OWNED); refcount_acquire(&ftick->tk_refcount); STAILQ_INSERT_TAIL(&ftick->tk_data->ms_head, ftick, tk_ms_link); ftick->tk_data->ms_count++; } /* Insert a new upgoing message to the front of the queue */ static inline void fuse_ms_push_head(struct fuse_ticket *ftick) { mtx_assert(&ftick->tk_data->ms_mtx, MA_OWNED); refcount_acquire(&ftick->tk_refcount); STAILQ_INSERT_HEAD(&ftick->tk_data->ms_head, ftick, tk_ms_link); ftick->tk_data->ms_count++; } static inline struct fuse_ticket * fuse_ms_pop(struct fuse_data *data) { struct fuse_ticket *ftick = NULL; mtx_assert(&data->ms_mtx, MA_OWNED); if ((ftick = STAILQ_FIRST(&data->ms_head))) { STAILQ_REMOVE_HEAD(&data->ms_head, tk_ms_link); data->ms_count--; #ifdef INVARIANTS MPASS(data->ms_count >= 0); ftick->tk_ms_link.stqe_next = NULL; #endif } return (ftick); } static inline void fuse_aw_push(struct fuse_ticket *ftick) { mtx_assert(&ftick->tk_data->aw_mtx, MA_OWNED); refcount_acquire(&ftick->tk_refcount); TAILQ_INSERT_TAIL(&ftick->tk_data->aw_head, ftick, tk_aw_link); } static inline void fuse_aw_remove(struct fuse_ticket *ftick) { mtx_assert(&ftick->tk_data->aw_mtx, MA_OWNED); TAILQ_REMOVE(&ftick->tk_data->aw_head, ftick, tk_aw_link); #ifdef INVARIANTS ftick->tk_aw_link.tqe_next = NULL; ftick->tk_aw_link.tqe_prev = NULL; #endif } static inline struct fuse_ticket * fuse_aw_pop(struct fuse_data *data) { struct fuse_ticket *ftick; mtx_assert(&data->aw_mtx, MA_OWNED); if ((ftick = TAILQ_FIRST(&data->aw_head)) != NULL) fuse_aw_remove(ftick); return (ftick); } struct fuse_ticket *fuse_ticket_fetch(struct fuse_data *data); int fuse_ticket_drop(struct fuse_ticket *ftick); void fuse_insert_callback(struct fuse_ticket *ftick, fuse_handler_t *handler); void fuse_insert_message(struct fuse_ticket *ftick, bool irq); static inline bool fuse_libabi_geq(struct fuse_data *data, uint32_t abi_maj, uint32_t abi_min) { return (data->fuse_libabi_major > abi_maj || (data->fuse_libabi_major == abi_maj && data->fuse_libabi_minor >= abi_min)); } +/* Print msg as a warning to the console, but no more than once per session */ +void fuse_warn(struct fuse_data *data, unsigned flag, const char *msg); + struct fuse_data *fdata_alloc(struct cdev *dev, struct ucred *cred); void fdata_trydestroy(struct fuse_data *data); void fdata_set_dead(struct fuse_data *data); static inline bool fdata_get_dead(struct fuse_data *data) { return (data->dataflags & FSESS_DEAD); } struct fuse_dispatcher { struct fuse_ticket *tick; struct fuse_in_header *finh; void *indata; size_t iosize; uint64_t nodeid; int answ_stat; void *answ; }; static inline void fdisp_init(struct fuse_dispatcher *fdisp, size_t iosize) { fdisp->iosize = iosize; fdisp->tick = NULL; } static inline void fdisp_destroy(struct fuse_dispatcher *fdisp) { fuse_ticket_drop(fdisp->tick); #ifdef INVARIANTS fdisp->tick = NULL; #endif } void fdisp_refresh(struct fuse_dispatcher *fdip); void fdisp_make(struct fuse_dispatcher *fdip, enum fuse_opcode op, struct mount *mp, uint64_t nid, struct thread *td, struct ucred *cred); void fdisp_make_vp(struct fuse_dispatcher *fdip, enum fuse_opcode op, struct vnode *vp, struct thread *td, struct ucred *cred); void fdisp_refresh_vp(struct fuse_dispatcher *fdip, enum fuse_opcode op, struct vnode *vp, struct thread *td, struct ucred *cred); int fdisp_wait_answ(struct fuse_dispatcher *fdip); static inline int fdisp_simple_putget_vp(struct fuse_dispatcher *fdip, enum fuse_opcode op, struct vnode *vp, struct thread *td, struct ucred *cred) { fdisp_make_vp(fdip, op, vp, td, cred); return (fdisp_wait_answ(fdip)); } #endif /* _FUSE_IPC_H_ */ diff --git a/sys/fs/fuse/fuse_vfsops.c b/sys/fs/fuse/fuse_vfsops.c index 7f47f8800994..b188996066d5 100644 --- a/sys/fs/fuse/fuse_vfsops.c +++ b/sys/fs/fuse/fuse_vfsops.c @@ -1,699 +1,716 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2007-2009 Google Inc. and Amit Singh * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. * * Neither the name of Google Inc. nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Copyright (C) 2005 Csaba Henk. * All rights reserved. * * Copyright (c) 2019 The FreeBSD Foundation * * Portions of this software were developed by BFF Storage Systems, LLC under * sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "fuse.h" #include "fuse_node.h" #include "fuse_ipc.h" #include "fuse_internal.h" #include #include SDT_PROVIDER_DECLARE(fusefs); /* * Fuse trace probe: * arg0: verbosity. Higher numbers give more verbose messages * arg1: Textual message */ SDT_PROBE_DEFINE2(fusefs, , vfsops, trace, "int", "char*"); /* This will do for privilege types for now */ #ifndef PRIV_VFS_FUSE_ALLOWOTHER #define PRIV_VFS_FUSE_ALLOWOTHER PRIV_VFS_MOUNT_NONUSER #endif #ifndef PRIV_VFS_FUSE_MOUNT_NONUSER #define PRIV_VFS_FUSE_MOUNT_NONUSER PRIV_VFS_MOUNT_NONUSER #endif #ifndef PRIV_VFS_FUSE_SYNC_UNMOUNT #define PRIV_VFS_FUSE_SYNC_UNMOUNT PRIV_VFS_MOUNT_NONUSER #endif static vfs_fhtovp_t fuse_vfsop_fhtovp; static vfs_mount_t fuse_vfsop_mount; static vfs_unmount_t fuse_vfsop_unmount; static vfs_root_t fuse_vfsop_root; static vfs_statfs_t fuse_vfsop_statfs; static vfs_vget_t fuse_vfsop_vget; struct vfsops fuse_vfsops = { .vfs_fhtovp = fuse_vfsop_fhtovp, .vfs_mount = fuse_vfsop_mount, .vfs_unmount = fuse_vfsop_unmount, .vfs_root = fuse_vfsop_root, .vfs_statfs = fuse_vfsop_statfs, .vfs_vget = fuse_vfsop_vget, }; static int fuse_enforce_dev_perms = 0; SYSCTL_INT(_vfs_fusefs, OID_AUTO, enforce_dev_perms, CTLFLAG_RW, &fuse_enforce_dev_perms, 0, "enforce fuse device permissions for secondary mounts"); MALLOC_DEFINE(M_FUSEVFS, "fuse_filesystem", "buffer for fuse vfs layer"); static int fuse_getdevice(const char *fspec, struct thread *td, struct cdev **fdevp) { struct nameidata nd, *ndp = &nd; struct vnode *devvp; struct cdev *fdev; int err; /* * Not an update, or updating the name: look up the name * and verify that it refers to a sensible disk device. */ NDINIT(ndp, LOOKUP, FOLLOW, UIO_SYSSPACE, fspec, td); if ((err = namei(ndp)) != 0) return err; NDFREE(ndp, NDF_ONLY_PNBUF); devvp = ndp->ni_vp; if (devvp->v_type != VCHR) { vrele(devvp); return ENXIO; } fdev = devvp->v_rdev; dev_ref(fdev); if (fuse_enforce_dev_perms) { /* * Check if mounter can open the fuse device. * * This has significance only if we are doing a secondary mount * which doesn't involve actually opening fuse devices, but we * still want to enforce the permissions of the device (in * order to keep control over the circle of fuse users). * * (In case of primary mounts, we are either the superuser so * we can do anything anyway, or we can mount only if the * device is already opened by us, ie. we are permitted to open * the device.) */ #if 0 #ifdef MAC err = mac_check_vnode_open(td->td_ucred, devvp, VREAD | VWRITE); if (!err) #endif #endif /* 0 */ err = VOP_ACCESS(devvp, VREAD | VWRITE, td->td_ucred, td); if (err) { vrele(devvp); dev_rel(fdev); return err; } } /* * according to coda code, no extra lock is needed -- * although in sys/vnode.h this field is marked "v" */ vrele(devvp); if (!fdev->si_devsw || strcmp("fuse", fdev->si_devsw->d_name)) { dev_rel(fdev); return ENXIO; } *fdevp = fdev; return 0; } #define FUSE_FLAGOPT(fnam, fval) do { \ vfs_flagopt(opts, #fnam, &mntopts, fval); \ vfs_flagopt(opts, "__" #fnam, &__mntopts, fval); \ } while (0) SDT_PROBE_DEFINE1(fusefs, , vfsops, mntopts, "uint64_t"); SDT_PROBE_DEFINE4(fusefs, , vfsops, mount_err, "char*", "struct fuse_data*", "struct mount*", "int"); static int fuse_vfs_remount(struct mount *mp, struct thread *td, uint64_t mntopts, uint32_t max_read, int daemon_timeout) { int err = 0; struct fuse_data *data = fuse_get_mpdata(mp); /* Don't allow these options to be changed */ const static unsigned long long cant_update_opts = MNT_USER; /* Mount owner must be the user running the daemon */ FUSE_LOCK(); if ((mp->mnt_flag ^ data->mnt_flag) & cant_update_opts) { err = EOPNOTSUPP; SDT_PROBE4(fusefs, , vfsops, mount_err, "Can't change these mount options during remount", data, mp, err); goto out; } if (((data->dataflags ^ mntopts) & FSESS_MNTOPTS_MASK) || (data->max_read != max_read) || (data->daemon_timeout != daemon_timeout)) { // TODO: allow changing options where it makes sense err = EOPNOTSUPP; SDT_PROBE4(fusefs, , vfsops, mount_err, "Can't change fuse mount options during remount", data, mp, err); goto out; } if (fdata_get_dead(data)) { err = ENOTCONN; SDT_PROBE4(fusefs, , vfsops, mount_err, "device is dead during mount", data, mp, err); goto out; } /* Sanity + permission checks */ if (!data->daemoncred) panic("fuse daemon found, but identity unknown"); if (mntopts & FSESS_DAEMON_CAN_SPY) err = priv_check(td, PRIV_VFS_FUSE_ALLOWOTHER); if (err == 0 && td->td_ucred->cr_uid != data->daemoncred->cr_uid) /* are we allowed to do the first mount? */ err = priv_check(td, PRIV_VFS_FUSE_MOUNT_NONUSER); out: FUSE_UNLOCK(); return err; } static int fuse_vfsop_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp) { struct fuse_fid *ffhp = (struct fuse_fid *)fhp; struct fuse_vnode_data *fvdat; struct vnode *nvp; int error; if (!(fuse_get_mpdata(mp)->dataflags & FSESS_EXPORT_SUPPORT)) return EOPNOTSUPP; error = VFS_VGET(mp, ffhp->nid, LK_EXCLUSIVE, &nvp); if (error) { *vpp = NULLVP; return (error); } fvdat = VTOFUD(nvp); if (fvdat->generation != ffhp->gen ) { vput(nvp); *vpp = NULLVP; return (ESTALE); } *vpp = nvp; vnode_create_vobject(*vpp, 0, curthread); return (0); } static int fuse_vfsop_mount(struct mount *mp) { int err; uint64_t mntopts, __mntopts; uint32_t max_read; int linux_errnos; int daemon_timeout; int fd; struct cdev *fdev; struct fuse_data *data = NULL; struct thread *td; struct file *fp, *fptmp; char *fspec, *subtype; struct vfsoptlist *opts; subtype = NULL; max_read = ~0; linux_errnos = 0; err = 0; mntopts = 0; __mntopts = 0; td = curthread; /* Get the new options passed to mount */ opts = mp->mnt_optnew; if (!opts) return EINVAL; /* `fspath' contains the mount point (eg. /mnt/fuse/sshfs); REQUIRED */ if (!vfs_getopts(opts, "fspath", &err)) return err; /* * With the help of underscored options the mount program * can inform us from the flags it sets by default */ FUSE_FLAGOPT(allow_other, FSESS_DAEMON_CAN_SPY); FUSE_FLAGOPT(push_symlinks_in, FSESS_PUSH_SYMLINKS_IN); FUSE_FLAGOPT(default_permissions, FSESS_DEFAULT_PERMISSIONS); FUSE_FLAGOPT(intr, FSESS_INTR); (void)vfs_scanopt(opts, "max_read=", "%u", &max_read); (void)vfs_scanopt(opts, "linux_errnos", "%d", &linux_errnos); if (vfs_scanopt(opts, "timeout=", "%u", &daemon_timeout) == 1) { if (daemon_timeout < FUSE_MIN_DAEMON_TIMEOUT) daemon_timeout = FUSE_MIN_DAEMON_TIMEOUT; else if (daemon_timeout > FUSE_MAX_DAEMON_TIMEOUT) daemon_timeout = FUSE_MAX_DAEMON_TIMEOUT; } else { daemon_timeout = FUSE_DEFAULT_DAEMON_TIMEOUT; } subtype = vfs_getopts(opts, "subtype=", &err); SDT_PROBE1(fusefs, , vfsops, mntopts, mntopts); if (mp->mnt_flag & MNT_UPDATE) { return fuse_vfs_remount(mp, td, mntopts, max_read, daemon_timeout); } /* `from' contains the device name (eg. /dev/fuse0); REQUIRED */ fspec = vfs_getopts(opts, "from", &err); if (!fspec) return err; /* `fd' contains the filedescriptor for this session; REQUIRED */ if (vfs_scanopt(opts, "fd", "%d", &fd) != 1) return EINVAL; err = fuse_getdevice(fspec, td, &fdev); if (err != 0) return err; err = fget(td, fd, &cap_read_rights, &fp); if (err != 0) { SDT_PROBE2(fusefs, , vfsops, trace, 1, "invalid or not opened device"); goto out; } fptmp = td->td_fpop; td->td_fpop = fp; err = devfs_get_cdevpriv((void **)&data); td->td_fpop = fptmp; fdrop(fp, td); FUSE_LOCK(); if (err != 0 || data == NULL) { err = ENXIO; SDT_PROBE4(fusefs, , vfsops, mount_err, "invalid or not opened device", data, mp, err); FUSE_UNLOCK(); goto out; } if (fdata_get_dead(data)) { err = ENOTCONN; SDT_PROBE4(fusefs, , vfsops, mount_err, "device is dead during mount", data, mp, err); FUSE_UNLOCK(); goto out; } /* Sanity + permission checks */ if (!data->daemoncred) panic("fuse daemon found, but identity unknown"); if (mntopts & FSESS_DAEMON_CAN_SPY) err = priv_check(td, PRIV_VFS_FUSE_ALLOWOTHER); if (err == 0 && td->td_ucred->cr_uid != data->daemoncred->cr_uid) /* are we allowed to do the first mount? */ err = priv_check(td, PRIV_VFS_FUSE_MOUNT_NONUSER); if (err) { FUSE_UNLOCK(); goto out; } data->ref++; data->mp = mp; data->dataflags |= mntopts; data->max_read = max_read; data->daemon_timeout = daemon_timeout; data->linux_errnos = linux_errnos; data->mnt_flag = mp->mnt_flag & MNT_UPDATEMASK; FUSE_UNLOCK(); vfs_getnewfsid(mp); MNT_ILOCK(mp); mp->mnt_data = data; /* * FUSE file systems can be either local or remote, but the kernel * can't tell the difference. */ mp->mnt_flag &= ~MNT_LOCAL; mp->mnt_kern_flag |= MNTK_USES_BCACHE; /* * Disable nullfs cacheing because it can consume too many resources in * the FUSE server. */ mp->mnt_kern_flag |= MNTK_NULL_NOCACHE; MNT_IUNLOCK(mp); /* We need this here as this slot is used by getnewvnode() */ mp->mnt_stat.f_iosize = maxbcachebuf; if (subtype) { strlcat(mp->mnt_stat.f_fstypename, ".", MFSNAMELEN); strlcat(mp->mnt_stat.f_fstypename, subtype, MFSNAMELEN); } memset(mp->mnt_stat.f_mntfromname, 0, MNAMELEN); strlcpy(mp->mnt_stat.f_mntfromname, fspec, MNAMELEN); mp->mnt_iosize_max = maxphys; /* Now handshaking with daemon */ fuse_internal_send_init(data, td); out: if (err) { FUSE_LOCK(); if (data != NULL && data->mp == mp) { /* * Destroy device only if we acquired reference to * it */ SDT_PROBE4(fusefs, , vfsops, mount_err, "mount failed, destroy device", data, mp, err); data->mp = NULL; mp->mnt_data = NULL; fdata_trydestroy(data); } FUSE_UNLOCK(); dev_rel(fdev); } return err; } static int fuse_vfsop_unmount(struct mount *mp, int mntflags) { int err = 0; int flags = 0; struct cdev *fdev; struct fuse_data *data; struct fuse_dispatcher fdi; struct thread *td = curthread; if (mntflags & MNT_FORCE) { flags |= FORCECLOSE; } data = fuse_get_mpdata(mp); if (!data) { panic("no private data for mount point?"); } /* There is 1 extra root vnode reference (mp->mnt_data). */ FUSE_LOCK(); if (data->vroot != NULL) { struct vnode *vroot = data->vroot; data->vroot = NULL; FUSE_UNLOCK(); vrele(vroot); } else FUSE_UNLOCK(); err = vflush(mp, 0, flags, td); if (err) { return err; } if (fdata_get_dead(data)) { goto alreadydead; } if (fsess_maybe_impl(mp, FUSE_DESTROY)) { fdisp_init(&fdi, 0); fdisp_make(&fdi, FUSE_DESTROY, mp, 0, td, NULL); (void)fdisp_wait_answ(&fdi); fdisp_destroy(&fdi); } fdata_set_dead(data); alreadydead: FUSE_LOCK(); data->mp = NULL; fdev = data->fdev; fdata_trydestroy(data); FUSE_UNLOCK(); MNT_ILOCK(mp); mp->mnt_data = NULL; MNT_IUNLOCK(mp); dev_rel(fdev); return 0; } SDT_PROBE_DEFINE1(fusefs, , vfsops, invalidate_without_export, "struct mount*"); static int fuse_vfsop_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp) { struct fuse_data *data = fuse_get_mpdata(mp); uint64_t nodeid = ino; struct thread *td = curthread; struct fuse_dispatcher fdi; struct fuse_entry_out *feo; struct fuse_vnode_data *fvdat; const char dot[] = "."; off_t filesize; enum vtype vtyp; int error; if (!(data->dataflags & FSESS_EXPORT_SUPPORT)) { /* * Unreachable unless you do something stupid, like export a * nullfs mount of a fusefs file system. */ SDT_PROBE1(fusefs, , vfsops, invalidate_without_export, mp); return (EOPNOTSUPP); } error = fuse_internal_get_cached_vnode(mp, ino, flags, vpp); if (error || *vpp != NULL) return error; /* Do a LOOKUP, using nodeid as the parent and "." as filename */ fdisp_init(&fdi, sizeof(dot)); fdisp_make(&fdi, FUSE_LOOKUP, mp, nodeid, td, td->td_ucred); memcpy(fdi.indata, dot, sizeof(dot)); error = fdisp_wait_answ(&fdi); if (error) return error; feo = (struct fuse_entry_out *)fdi.answ; if (feo->nodeid == 0) { /* zero nodeid means ENOENT and cache it */ error = ENOENT; goto out; } vtyp = IFTOVT(feo->attr.mode); error = fuse_vnode_get(mp, feo, nodeid, NULL, vpp, NULL, vtyp); if (error) goto out; filesize = feo->attr.size; /* * In the case where we are looking up a FUSE node represented by an * existing cached vnode, and the true size reported by FUSE_LOOKUP * doesn't match the vnode's cached size, then any cached writes beyond * the file's current size are lost. * * We can get here: * * following attribute cache expiration, or * * due a bug in the daemon, or */ fvdat = VTOFUD(*vpp); if (vnode_isreg(*vpp) && filesize != fvdat->cached_attrs.va_size && fvdat->flag & FN_SIZECHANGE) { - printf("%s: WB cache incoherent on %s!\n", __func__, - vnode_mount(*vpp)->mnt_stat.f_mntonname); - + if (data->cache_mode == fuse_data_cache_mode) { + const char *msg; + + if (fuse_libabi_geq(data, 7, 23)) { + msg = "writeback cache incoherent!." + "To prevent data corruption, disable " + "the writeback cache according to your " + "FUSE server's documentation."; + } else { + msg = "writeback cache incoherent!." + "To prevent data corruption, disable " + "the writeback cache by setting " + "vfs.fusefs.data_cache_mode to 0 or 1."; + } + fuse_warn(data, FSESS_WARN_WB_CACHE_INCOHERENT, msg); + } else { + /* If we get here, it's likely a fusefs kernel bug */ + printf("%s: WB cache incoherent on %s!\n", __func__, + vnode_mount(*vpp)->mnt_stat.f_mntonname); + } fvdat->flag &= ~FN_SIZECHANGE; } fuse_internal_cache_attrs(*vpp, &feo->attr, feo->attr_valid, feo->attr_valid_nsec, NULL); fuse_validity_2_bintime(feo->entry_valid, feo->entry_valid_nsec, &fvdat->entry_cache_timeout); out: fdisp_destroy(&fdi); return error; } static int fuse_vfsop_root(struct mount *mp, int lkflags, struct vnode **vpp) { struct fuse_data *data = fuse_get_mpdata(mp); int err = 0; if (data->vroot != NULL) { err = vget(data->vroot, lkflags); if (err == 0) *vpp = data->vroot; } else { err = fuse_vnode_get(mp, NULL, FUSE_ROOT_ID, NULL, vpp, NULL, VDIR); if (err == 0) { FUSE_LOCK(); MPASS(data->vroot == NULL || data->vroot == *vpp); if (data->vroot == NULL) { SDT_PROBE2(fusefs, , vfsops, trace, 1, "new root vnode"); data->vroot = *vpp; FUSE_UNLOCK(); vref(*vpp); } else if (data->vroot != *vpp) { SDT_PROBE2(fusefs, , vfsops, trace, 1, "root vnode race"); FUSE_UNLOCK(); VOP_UNLOCK(*vpp); vrele(*vpp); vrecycle(*vpp); *vpp = data->vroot; } else FUSE_UNLOCK(); } } return err; } static int fuse_vfsop_statfs(struct mount *mp, struct statfs *sbp) { struct fuse_dispatcher fdi; int err = 0; struct fuse_statfs_out *fsfo; struct fuse_data *data; data = fuse_get_mpdata(mp); if (!(data->dataflags & FSESS_INITED)) goto fake; fdisp_init(&fdi, 0); fdisp_make(&fdi, FUSE_STATFS, mp, FUSE_ROOT_ID, NULL, NULL); err = fdisp_wait_answ(&fdi); if (err) { fdisp_destroy(&fdi); if (err == ENOTCONN) { /* * We want to seem a legitimate fs even if the daemon * is stiff dead... (so that, eg., we can still do path * based unmounting after the daemon dies). */ goto fake; } return err; } fsfo = fdi.answ; sbp->f_blocks = fsfo->st.blocks; sbp->f_bfree = fsfo->st.bfree; sbp->f_bavail = fsfo->st.bavail; sbp->f_files = fsfo->st.files; sbp->f_ffree = fsfo->st.ffree; /* cast from uint64_t to int64_t */ sbp->f_namemax = fsfo->st.namelen; sbp->f_bsize = fsfo->st.frsize; /* cast from uint32_t to uint64_t */ fdisp_destroy(&fdi); return 0; fake: sbp->f_blocks = 0; sbp->f_bfree = 0; sbp->f_bavail = 0; sbp->f_files = 0; sbp->f_ffree = 0; sbp->f_namemax = 0; sbp->f_bsize = S_BLKSIZE; return 0; } diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c index a51c1b15e7f0..c79d8d5b5223 100644 --- a/sys/fs/fuse/fuse_vnops.c +++ b/sys/fs/fuse/fuse_vnops.c @@ -1,2752 +1,2752 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2007-2009 Google Inc. and Amit Singh * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. * * Neither the name of Google Inc. nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Copyright (C) 2005 Csaba Henk. * All rights reserved. * * Copyright (c) 2019 The FreeBSD Foundation * * Portions of this software were developed by BFF Storage Systems, LLC under * sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "fuse.h" #include "fuse_file.h" #include "fuse_internal.h" #include "fuse_ipc.h" #include "fuse_node.h" #include "fuse_io.h" #include /* Maximum number of hardlinks to a single FUSE file */ #define FUSE_LINK_MAX UINT32_MAX SDT_PROVIDER_DECLARE(fusefs); /* * Fuse trace probe: * arg0: verbosity. Higher numbers give more verbose messages * arg1: Textual message */ SDT_PROBE_DEFINE2(fusefs, , vnops, trace, "int", "char*"); /* vnode ops */ static vop_access_t fuse_vnop_access; static vop_advlock_t fuse_vnop_advlock; static vop_bmap_t fuse_vnop_bmap; static vop_close_t fuse_fifo_close; static vop_close_t fuse_vnop_close; static vop_copy_file_range_t fuse_vnop_copy_file_range; static vop_create_t fuse_vnop_create; static vop_deleteextattr_t fuse_vnop_deleteextattr; static vop_fdatasync_t fuse_vnop_fdatasync; static vop_fsync_t fuse_vnop_fsync; static vop_getattr_t fuse_vnop_getattr; static vop_getextattr_t fuse_vnop_getextattr; static vop_inactive_t fuse_vnop_inactive; static vop_ioctl_t fuse_vnop_ioctl; static vop_link_t fuse_vnop_link; static vop_listextattr_t fuse_vnop_listextattr; static vop_lookup_t fuse_vnop_lookup; static vop_mkdir_t fuse_vnop_mkdir; static vop_mknod_t fuse_vnop_mknod; static vop_open_t fuse_vnop_open; static vop_pathconf_t fuse_vnop_pathconf; static vop_read_t fuse_vnop_read; static vop_readdir_t fuse_vnop_readdir; static vop_readlink_t fuse_vnop_readlink; static vop_reclaim_t fuse_vnop_reclaim; static vop_remove_t fuse_vnop_remove; static vop_rename_t fuse_vnop_rename; static vop_rmdir_t fuse_vnop_rmdir; static vop_setattr_t fuse_vnop_setattr; static vop_setextattr_t fuse_vnop_setextattr; static vop_strategy_t fuse_vnop_strategy; static vop_symlink_t fuse_vnop_symlink; static vop_write_t fuse_vnop_write; static vop_getpages_t fuse_vnop_getpages; static vop_print_t fuse_vnop_print; static vop_vptofh_t fuse_vnop_vptofh; struct vop_vector fuse_fifoops = { .vop_default = &fifo_specops, .vop_access = fuse_vnop_access, .vop_close = fuse_fifo_close, .vop_fsync = fuse_vnop_fsync, .vop_getattr = fuse_vnop_getattr, .vop_inactive = fuse_vnop_inactive, .vop_pathconf = fuse_vnop_pathconf, .vop_print = fuse_vnop_print, .vop_read = VOP_PANIC, .vop_reclaim = fuse_vnop_reclaim, .vop_setattr = fuse_vnop_setattr, .vop_write = VOP_PANIC, .vop_vptofh = fuse_vnop_vptofh, }; VFS_VOP_VECTOR_REGISTER(fuse_fifoops); struct vop_vector fuse_vnops = { .vop_allocate = VOP_EINVAL, .vop_default = &default_vnodeops, .vop_access = fuse_vnop_access, .vop_advlock = fuse_vnop_advlock, .vop_bmap = fuse_vnop_bmap, .vop_close = fuse_vnop_close, .vop_copy_file_range = fuse_vnop_copy_file_range, .vop_create = fuse_vnop_create, .vop_deleteextattr = fuse_vnop_deleteextattr, .vop_fsync = fuse_vnop_fsync, .vop_fdatasync = fuse_vnop_fdatasync, .vop_getattr = fuse_vnop_getattr, .vop_getextattr = fuse_vnop_getextattr, .vop_inactive = fuse_vnop_inactive, .vop_ioctl = fuse_vnop_ioctl, .vop_link = fuse_vnop_link, .vop_listextattr = fuse_vnop_listextattr, .vop_lookup = fuse_vnop_lookup, .vop_mkdir = fuse_vnop_mkdir, .vop_mknod = fuse_vnop_mknod, .vop_open = fuse_vnop_open, .vop_pathconf = fuse_vnop_pathconf, /* * TODO: implement vop_poll after upgrading to protocol 7.21. * FUSE_POLL was added in protocol 7.11, but it's kind of broken until * 7.21, which adds the ability for the client to choose which poll * events it wants, and for a client to deregister a file handle */ .vop_read = fuse_vnop_read, .vop_readdir = fuse_vnop_readdir, .vop_readlink = fuse_vnop_readlink, .vop_reclaim = fuse_vnop_reclaim, .vop_remove = fuse_vnop_remove, .vop_rename = fuse_vnop_rename, .vop_rmdir = fuse_vnop_rmdir, .vop_setattr = fuse_vnop_setattr, .vop_setextattr = fuse_vnop_setextattr, .vop_strategy = fuse_vnop_strategy, .vop_symlink = fuse_vnop_symlink, .vop_write = fuse_vnop_write, .vop_getpages = fuse_vnop_getpages, .vop_print = fuse_vnop_print, .vop_vptofh = fuse_vnop_vptofh, }; VFS_VOP_VECTOR_REGISTER(fuse_vnops); uma_zone_t fuse_pbuf_zone; /* Check permission for extattr operations, much like extattr_check_cred */ static int fuse_extattr_check_cred(struct vnode *vp, int ns, struct ucred *cred, struct thread *td, accmode_t accmode) { struct mount *mp = vnode_mount(vp); struct fuse_data *data = fuse_get_mpdata(mp); int default_permissions = data->dataflags & FSESS_DEFAULT_PERMISSIONS; /* * Kernel-invoked always succeeds. */ if (cred == NOCRED) return (0); /* * Do not allow privileged processes in jail to directly manipulate * system attributes. */ switch (ns) { case EXTATTR_NAMESPACE_SYSTEM: if (default_permissions) { return (priv_check_cred(cred, PRIV_VFS_EXTATTR_SYSTEM)); } return (0); case EXTATTR_NAMESPACE_USER: if (default_permissions) { return (fuse_internal_access(vp, accmode, td, cred)); } return (0); default: return (EPERM); } } /* Get a filehandle for a directory */ static int fuse_filehandle_get_dir(struct vnode *vp, struct fuse_filehandle **fufhp, struct ucred *cred, pid_t pid) { if (fuse_filehandle_get(vp, FREAD, fufhp, cred, pid) == 0) return 0; return fuse_filehandle_get(vp, FEXEC, fufhp, cred, pid); } /* Send FUSE_FLUSH for this vnode */ static int fuse_flush(struct vnode *vp, struct ucred *cred, pid_t pid, int fflag) { struct fuse_flush_in *ffi; struct fuse_filehandle *fufh; struct fuse_dispatcher fdi; struct thread *td = curthread; struct mount *mp = vnode_mount(vp); int err; if (fsess_not_impl(vnode_mount(vp), FUSE_FLUSH)) return 0; err = fuse_filehandle_getrw(vp, fflag, &fufh, cred, pid); if (err) return err; fdisp_init(&fdi, sizeof(*ffi)); fdisp_make_vp(&fdi, FUSE_FLUSH, vp, td, cred); ffi = fdi.indata; ffi->fh = fufh->fh_id; /* * If the file has a POSIX lock then we're supposed to set lock_owner. * If not, then lock_owner is undefined. So we may as well always set * it. */ ffi->lock_owner = td->td_proc->p_pid; err = fdisp_wait_answ(&fdi); if (err == ENOSYS) { fsess_set_notimpl(mp, FUSE_FLUSH); err = 0; } fdisp_destroy(&fdi); return err; } /* Close wrapper for fifos. */ static int fuse_fifo_close(struct vop_close_args *ap) { return (fifo_specops.vop_close(ap)); } /* Send FUSE_LSEEK for this node */ static int fuse_vnop_do_lseek(struct vnode *vp, struct thread *td, struct ucred *cred, pid_t pid, off_t *offp, int whence) { struct fuse_dispatcher fdi; struct fuse_filehandle *fufh; struct fuse_lseek_in *flsi; struct fuse_lseek_out *flso; struct mount *mp = vnode_mount(vp); int err; ASSERT_VOP_LOCKED(vp, __func__); err = fuse_filehandle_getrw(vp, FREAD, &fufh, cred, pid); if (err) return (err); fdisp_init(&fdi, sizeof(*flsi)); fdisp_make_vp(&fdi, FUSE_LSEEK, vp, td, cred); flsi = fdi.indata; flsi->fh = fufh->fh_id; flsi->offset = *offp; flsi->whence = whence; err = fdisp_wait_answ(&fdi); if (err == ENOSYS) { fsess_set_notimpl(mp, FUSE_LSEEK); } else if (err == 0) { fsess_set_impl(mp, FUSE_LSEEK); flso = fdi.answ; *offp = flso->offset; } fdisp_destroy(&fdi); return (err); } /* struct vnop_access_args { struct vnode *a_vp; #if VOP_ACCESS_TAKES_ACCMODE_T accmode_t a_accmode; #else int a_mode; #endif struct ucred *a_cred; struct thread *a_td; }; */ static int fuse_vnop_access(struct vop_access_args *ap) { struct vnode *vp = ap->a_vp; int accmode = ap->a_accmode; struct ucred *cred = ap->a_cred; struct fuse_data *data = fuse_get_mpdata(vnode_mount(vp)); int err; if (fuse_isdeadfs(vp)) { if (vnode_isvroot(vp)) { return 0; } return ENXIO; } if (!(data->dataflags & FSESS_INITED)) { if (vnode_isvroot(vp)) { if (priv_check_cred(cred, PRIV_VFS_ADMIN) || (fuse_match_cred(data->daemoncred, cred) == 0)) { return 0; } } return EBADF; } if (vnode_islnk(vp)) { return 0; } err = fuse_internal_access(vp, accmode, ap->a_td, ap->a_cred); return err; } /* * struct vop_advlock_args { * struct vop_generic_args a_gen; * struct vnode *a_vp; * void *a_id; * int a_op; * struct flock *a_fl; * int a_flags; * } */ static int fuse_vnop_advlock(struct vop_advlock_args *ap) { struct vnode *vp = ap->a_vp; struct flock *fl = ap->a_fl; struct thread *td = curthread; struct ucred *cred = td->td_ucred; pid_t pid = td->td_proc->p_pid; struct fuse_filehandle *fufh; struct fuse_dispatcher fdi; struct fuse_lk_in *fli; struct fuse_lk_out *flo; enum fuse_opcode op; int dataflags, err; int flags = ap->a_flags; dataflags = fuse_get_mpdata(vnode_mount(vp))->dataflags; if (fuse_isdeadfs(vp)) { return ENXIO; } switch(ap->a_op) { case F_GETLK: op = FUSE_GETLK; break; case F_SETLK: if (flags & F_WAIT) op = FUSE_SETLKW; else op = FUSE_SETLK; break; case F_UNLCK: op = FUSE_SETLK; break; default: return EINVAL; } if (!(dataflags & FSESS_POSIX_LOCKS)) return vop_stdadvlock(ap); /* FUSE doesn't properly support flock until protocol 7.17 */ if (flags & F_FLOCK) return vop_stdadvlock(ap); vn_lock(vp, LK_SHARED | LK_RETRY); err = fuse_filehandle_get_anyflags(vp, &fufh, cred, pid); if (err) goto out; fdisp_init(&fdi, sizeof(*fli)); fdisp_make_vp(&fdi, op, vp, td, cred); fli = fdi.indata; fli->fh = fufh->fh_id; fli->owner = fl->l_pid; fli->lk.start = fl->l_start; if (fl->l_len != 0) fli->lk.end = fl->l_start + fl->l_len - 1; else fli->lk.end = INT64_MAX; fli->lk.type = fl->l_type; fli->lk.pid = fl->l_pid; err = fdisp_wait_answ(&fdi); fdisp_destroy(&fdi); if (err == 0 && op == FUSE_GETLK) { flo = fdi.answ; fl->l_type = flo->lk.type; fl->l_pid = flo->lk.pid; if (flo->lk.type != F_UNLCK) { fl->l_start = flo->lk.start; if (flo->lk.end == INT64_MAX) fl->l_len = 0; else fl->l_len = flo->lk.end - flo->lk.start + 1; fl->l_start = flo->lk.start; } } out: VOP_UNLOCK(vp); return err; } /* { struct vnode *a_vp; daddr_t a_bn; struct bufobj **a_bop; daddr_t *a_bnp; int *a_runp; int *a_runb; } */ static int fuse_vnop_bmap(struct vop_bmap_args *ap) { struct vnode *vp = ap->a_vp; struct bufobj **bo = ap->a_bop; struct thread *td = curthread; struct mount *mp; struct fuse_dispatcher fdi; struct fuse_bmap_in *fbi; struct fuse_bmap_out *fbo; struct fuse_data *data; uint64_t biosize; off_t filesize; daddr_t lbn = ap->a_bn; daddr_t *pbn = ap->a_bnp; int *runp = ap->a_runp; int *runb = ap->a_runb; int error = 0; int maxrun; if (fuse_isdeadfs(vp)) { return ENXIO; } mp = vnode_mount(vp); data = fuse_get_mpdata(mp); biosize = fuse_iosize(vp); maxrun = MIN(vp->v_mount->mnt_iosize_max / biosize - 1, data->max_readahead_blocks); if (bo != NULL) *bo = &vp->v_bufobj; /* * The FUSE_BMAP operation does not include the runp and runb * variables, so we must guess. Report nonzero contiguous runs so * cluster_read will combine adjacent reads. It's worthwhile to reduce * upcalls even if we don't know the true physical layout of the file. * * FUSE file systems may opt out of read clustering in two ways: * * mounting with -onoclusterr * * Setting max_readahead <= maxbcachebuf during FUSE_INIT */ if (runb != NULL) *runb = MIN(lbn, maxrun); if (runp != NULL) { error = fuse_vnode_size(vp, &filesize, td->td_ucred, td); if (error == 0) *runp = MIN(MAX(0, filesize / (off_t)biosize - lbn - 1), maxrun); else *runp = 0; } if (fsess_maybe_impl(mp, FUSE_BMAP)) { fdisp_init(&fdi, sizeof(*fbi)); fdisp_make_vp(&fdi, FUSE_BMAP, vp, td, td->td_ucred); fbi = fdi.indata; fbi->block = lbn; fbi->blocksize = biosize; error = fdisp_wait_answ(&fdi); if (error == ENOSYS) { fdisp_destroy(&fdi); fsess_set_notimpl(mp, FUSE_BMAP); error = 0; } else { fbo = fdi.answ; if (error == 0 && pbn != NULL) *pbn = fbo->block; fdisp_destroy(&fdi); return error; } } /* If the daemon doesn't support BMAP, make up a sensible default */ if (pbn != NULL) *pbn = lbn * btodb(biosize); return (error); } /* struct vop_close_args { struct vnode *a_vp; int a_fflag; struct ucred *a_cred; struct thread *a_td; }; */ static int fuse_vnop_close(struct vop_close_args *ap) { struct vnode *vp = ap->a_vp; struct ucred *cred = ap->a_cred; int fflag = ap->a_fflag; struct thread *td = ap->a_td; pid_t pid = td->td_proc->p_pid; int err = 0; if (fuse_isdeadfs(vp)) return 0; if (vnode_isdir(vp)) return 0; if (fflag & IO_NDELAY) return 0; err = fuse_flush(vp, cred, pid, fflag); /* TODO: close the file handle, if we're sure it's no longer used */ if ((VTOFUD(vp)->flag & FN_SIZECHANGE) != 0) { fuse_vnode_savesize(vp, cred, td->td_proc->p_pid); } return err; } /* struct vop_copy_file_range_args { struct vop_generic_args a_gen; struct vnode *a_invp; off_t *a_inoffp; struct vnode *a_outvp; off_t *a_outoffp; size_t *a_lenp; unsigned int a_flags; struct ucred *a_incred; struct ucred *a_outcred; struct thread *a_fsizetd; } */ static int fuse_vnop_copy_file_range(struct vop_copy_file_range_args *ap) { struct vnode *invp = ap->a_invp; struct vnode *outvp = ap->a_outvp; struct mount *mp = vnode_mount(invp); struct fuse_dispatcher fdi; struct fuse_filehandle *infufh, *outfufh; struct fuse_copy_file_range_in *fcfri; struct ucred *incred = ap->a_incred; struct ucred *outcred = ap->a_outcred; struct fuse_write_out *fwo; struct thread *td; struct uio io; pid_t pid; int err; if (mp != vnode_mount(outvp)) goto fallback; if (incred->cr_uid != outcred->cr_uid) goto fallback; if (incred->cr_groups[0] != outcred->cr_groups[0]) goto fallback; if (fsess_not_impl(mp, FUSE_COPY_FILE_RANGE)) goto fallback; if (ap->a_fsizetd == NULL) td = curthread; else td = ap->a_fsizetd; pid = td->td_proc->p_pid; /* Lock both vnodes, avoiding risk of deadlock. */ do { err = vn_lock(outvp, LK_EXCLUSIVE); if (invp == outvp) break; if (err == 0) { err = vn_lock(invp, LK_SHARED | LK_NOWAIT); if (err == 0) break; VOP_UNLOCK(outvp); err = vn_lock(invp, LK_SHARED); if (err == 0) VOP_UNLOCK(invp); } } while (err == 0); if (err != 0) return (err); err = fuse_filehandle_getrw(invp, FREAD, &infufh, incred, pid); if (err) goto unlock; err = fuse_filehandle_getrw(outvp, FWRITE, &outfufh, outcred, pid); if (err) goto unlock; if (ap->a_fsizetd) { io.uio_offset = *ap->a_outoffp; io.uio_resid = *ap->a_lenp; err = vn_rlimit_fsize(outvp, &io, ap->a_fsizetd); if (err) goto unlock; } fdisp_init(&fdi, sizeof(*fcfri)); fdisp_make_vp(&fdi, FUSE_COPY_FILE_RANGE, invp, td, incred); fcfri = fdi.indata; fcfri->fh_in = infufh->fh_id; fcfri->off_in = *ap->a_inoffp; fcfri->nodeid_out = VTOI(outvp); fcfri->fh_out = outfufh->fh_id; fcfri->off_out = *ap->a_outoffp; fcfri->len = *ap->a_lenp; fcfri->flags = 0; err = fdisp_wait_answ(&fdi); if (err == 0) { fwo = fdi.answ; *ap->a_lenp = fwo->size; *ap->a_inoffp += fwo->size; *ap->a_outoffp += fwo->size; fuse_internal_clear_suid_on_write(outvp, outcred, td); } fdisp_destroy(&fdi); unlock: if (invp != outvp) VOP_UNLOCK(invp); VOP_UNLOCK(outvp); if (err == ENOSYS) { fsess_set_notimpl(mp, FUSE_COPY_FILE_RANGE); fallback: err = vn_generic_copy_file_range(ap->a_invp, ap->a_inoffp, ap->a_outvp, ap->a_outoffp, ap->a_lenp, ap->a_flags, ap->a_incred, ap->a_outcred, ap->a_fsizetd); } return (err); } static void fdisp_make_mknod_for_fallback( struct fuse_dispatcher *fdip, struct componentname *cnp, struct vnode *dvp, uint64_t parentnid, struct thread *td, struct ucred *cred, mode_t mode, enum fuse_opcode *op) { struct fuse_mknod_in *fmni; fdisp_init(fdip, sizeof(*fmni) + cnp->cn_namelen + 1); *op = FUSE_MKNOD; fdisp_make(fdip, *op, vnode_mount(dvp), parentnid, td, cred); fmni = fdip->indata; fmni->mode = mode; fmni->rdev = 0; memcpy((char *)fdip->indata + sizeof(*fmni), cnp->cn_nameptr, cnp->cn_namelen); ((char *)fdip->indata)[sizeof(*fmni) + cnp->cn_namelen] = '\0'; } /* struct vnop_create_args { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; }; */ static int fuse_vnop_create(struct vop_create_args *ap) { struct vnode *dvp = ap->a_dvp; struct vnode **vpp = ap->a_vpp; struct componentname *cnp = ap->a_cnp; struct vattr *vap = ap->a_vap; struct thread *td = cnp->cn_thread; struct ucred *cred = cnp->cn_cred; struct fuse_data *data; struct fuse_create_in *fci; struct fuse_entry_out *feo; struct fuse_open_out *foo; struct fuse_dispatcher fdi, fdi2; struct fuse_dispatcher *fdip = &fdi; struct fuse_dispatcher *fdip2 = NULL; int err; struct mount *mp = vnode_mount(dvp); data = fuse_get_mpdata(mp); uint64_t parentnid = VTOFUD(dvp)->nid; mode_t mode = MAKEIMODE(vap->va_type, vap->va_mode); enum fuse_opcode op; int flags; if (fuse_isdeadfs(dvp)) return ENXIO; /* FUSE expects sockets to be created with FUSE_MKNOD */ if (vap->va_type == VSOCK) return fuse_internal_mknod(dvp, vpp, cnp, vap); /* * VOP_CREATE doesn't tell us the open(2) flags, so we guess. Only a * writable mode makes sense, and we might as well include readability * too. */ flags = O_RDWR; bzero(&fdi, sizeof(fdi)); if (vap->va_type != VREG) return (EINVAL); if (fsess_not_impl(mp, FUSE_CREATE) || vap->va_type == VSOCK) { /* Fallback to FUSE_MKNOD/FUSE_OPEN */ fdisp_make_mknod_for_fallback(fdip, cnp, dvp, parentnid, td, cred, mode, &op); } else { /* Use FUSE_CREATE */ size_t insize; op = FUSE_CREATE; fdisp_init(fdip, sizeof(*fci) + cnp->cn_namelen + 1); fdisp_make(fdip, op, vnode_mount(dvp), parentnid, td, cred); fci = fdip->indata; fci->mode = mode; fci->flags = O_CREAT | flags; if (fuse_libabi_geq(data, 7, 12)) { insize = sizeof(*fci); fci->umask = td->td_proc->p_pd->pd_cmask; } else { insize = sizeof(struct fuse_open_in); } memcpy((char *)fdip->indata + insize, cnp->cn_nameptr, cnp->cn_namelen); ((char *)fdip->indata)[insize + cnp->cn_namelen] = '\0'; } err = fdisp_wait_answ(fdip); if (err) { if (err == ENOSYS && op == FUSE_CREATE) { fsess_set_notimpl(mp, FUSE_CREATE); fdisp_destroy(fdip); fdisp_make_mknod_for_fallback(fdip, cnp, dvp, parentnid, td, cred, mode, &op); err = fdisp_wait_answ(fdip); } if (err) goto out; } feo = fdip->answ; if ((err = fuse_internal_checkentry(feo, vap->va_type))) { goto out; } if (op == FUSE_CREATE) { foo = (struct fuse_open_out*)(feo + 1); } else { /* Issue a separate FUSE_OPEN */ struct fuse_open_in *foi; fdip2 = &fdi2; fdisp_init(fdip2, sizeof(*foi)); fdisp_make(fdip2, FUSE_OPEN, vnode_mount(dvp), feo->nodeid, td, cred); foi = fdip2->indata; foi->flags = flags; err = fdisp_wait_answ(fdip2); if (err) goto out; foo = fdip2->answ; } err = fuse_vnode_get(mp, feo, feo->nodeid, dvp, vpp, cnp, vap->va_type); if (err) { struct fuse_release_in *fri; uint64_t nodeid = feo->nodeid; uint64_t fh_id = foo->fh; fdisp_init(fdip, sizeof(*fri)); fdisp_make(fdip, FUSE_RELEASE, mp, nodeid, td, cred); fri = fdip->indata; fri->fh = fh_id; fri->flags = flags; fuse_insert_callback(fdip->tick, fuse_internal_forget_callback); fuse_insert_message(fdip->tick, false); goto out; } ASSERT_VOP_ELOCKED(*vpp, "fuse_vnop_create"); fuse_internal_cache_attrs(*vpp, &feo->attr, feo->attr_valid, feo->attr_valid_nsec, NULL); fuse_filehandle_init(*vpp, FUFH_RDWR, NULL, td, cred, foo); fuse_vnode_open(*vpp, foo->open_flags, td); /* * Purge the parent's attribute cache because the daemon should've * updated its mtime and ctime */ fuse_vnode_clear_attr_cache(dvp); cache_purge_negative(dvp); out: if (fdip2) fdisp_destroy(fdip2); fdisp_destroy(fdip); return err; } /* struct vnop_fdatasync_args { struct vop_generic_args a_gen; struct vnode * a_vp; struct thread * a_td; }; */ static int fuse_vnop_fdatasync(struct vop_fdatasync_args *ap) { struct vnode *vp = ap->a_vp; struct thread *td = ap->a_td; int waitfor = MNT_WAIT; int err = 0; if (fuse_isdeadfs(vp)) { return 0; } if ((err = vop_stdfdatasync_buf(ap))) return err; return fuse_internal_fsync(vp, td, waitfor, true); } /* struct vnop_fsync_args { struct vop_generic_args a_gen; struct vnode * a_vp; int a_waitfor; struct thread * a_td; }; */ static int fuse_vnop_fsync(struct vop_fsync_args *ap) { struct vnode *vp = ap->a_vp; struct thread *td = ap->a_td; int waitfor = ap->a_waitfor; int err = 0; if (fuse_isdeadfs(vp)) { return 0; } if ((err = vop_stdfsync(ap))) return err; return fuse_internal_fsync(vp, td, waitfor, false); } /* struct vnop_getattr_args { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; struct thread *a_td; }; */ static int fuse_vnop_getattr(struct vop_getattr_args *ap) { struct vnode *vp = ap->a_vp; struct vattr *vap = ap->a_vap; struct ucred *cred = ap->a_cred; struct thread *td = curthread; int err = 0; int dataflags; dataflags = fuse_get_mpdata(vnode_mount(vp))->dataflags; /* Note that we are not bailing out on a dead file system just yet. */ if (!(dataflags & FSESS_INITED)) { if (!vnode_isvroot(vp)) { fdata_set_dead(fuse_get_mpdata(vnode_mount(vp))); err = ENOTCONN; return err; } else { goto fake; } } err = fuse_internal_getattr(vp, vap, cred, td); if (err == ENOTCONN && vnode_isvroot(vp)) { /* see comment in fuse_vfsop_statfs() */ goto fake; } else { return err; } fake: bzero(vap, sizeof(*vap)); vap->va_type = vnode_vtype(vp); return 0; } /* struct vnop_inactive_args { struct vnode *a_vp; }; */ static int fuse_vnop_inactive(struct vop_inactive_args *ap) { struct vnode *vp = ap->a_vp; struct thread *td = curthread; struct fuse_vnode_data *fvdat = VTOFUD(vp); struct fuse_filehandle *fufh, *fufh_tmp; int need_flush = 1; LIST_FOREACH_SAFE(fufh, &fvdat->handles, next, fufh_tmp) { if (need_flush && vp->v_type == VREG) { if ((VTOFUD(vp)->flag & FN_SIZECHANGE) != 0) { fuse_vnode_savesize(vp, NULL, 0); } if ((fvdat->flag & FN_REVOKED) != 0) fuse_io_invalbuf(vp, td); else fuse_io_flushbuf(vp, MNT_WAIT, td); need_flush = 0; } fuse_filehandle_close(vp, fufh, td, NULL); } if ((fvdat->flag & FN_REVOKED) != 0) vrecycle(vp); return 0; } /* struct vnop_ioctl_args { struct vnode *a_vp; u_long a_command; caddr_t a_data; int a_fflag; struct ucred *a_cred; struct thread *a_td; }; */ static int fuse_vnop_ioctl(struct vop_ioctl_args *ap) { struct vnode *vp = ap->a_vp; struct mount *mp = vnode_mount(vp); struct ucred *cred = ap->a_cred; off_t *offp; pid_t pid = ap->a_td->td_proc->p_pid; int err; switch (ap->a_command) { case FIOSEEKDATA: case FIOSEEKHOLE: /* Call FUSE_LSEEK, if we can, or fall back to vop_stdioctl */ if (fsess_maybe_impl(mp, FUSE_LSEEK)) { int whence; offp = ap->a_data; if (ap->a_command == FIOSEEKDATA) whence = SEEK_DATA; else whence = SEEK_HOLE; vn_lock(vp, LK_SHARED | LK_RETRY); err = fuse_vnop_do_lseek(vp, ap->a_td, cred, pid, offp, whence); VOP_UNLOCK(vp); } if (fsess_not_impl(mp, FUSE_LSEEK)) err = vop_stdioctl(ap); break; default: /* TODO: implement FUSE_IOCTL */ err = ENOTTY; break; } return (err); } /* struct vnop_link_args { struct vnode *a_tdvp; struct vnode *a_vp; struct componentname *a_cnp; }; */ static int fuse_vnop_link(struct vop_link_args *ap) { struct vnode *vp = ap->a_vp; struct vnode *tdvp = ap->a_tdvp; struct componentname *cnp = ap->a_cnp; struct vattr *vap = VTOVA(vp); struct fuse_dispatcher fdi; struct fuse_entry_out *feo; struct fuse_link_in fli; int err; if (fuse_isdeadfs(vp)) { return ENXIO; } if (vnode_mount(tdvp) != vnode_mount(vp)) { return EXDEV; } /* * This is a seatbelt check to protect naive userspace filesystems from * themselves and the limitations of the FUSE IPC protocol. If a * filesystem does not allow attribute caching, assume it is capable of * validating that nlink does not overflow. */ if (vap != NULL && vap->va_nlink >= FUSE_LINK_MAX) return EMLINK; fli.oldnodeid = VTOI(vp); fdisp_init(&fdi, 0); fuse_internal_newentry_makerequest(vnode_mount(tdvp), VTOI(tdvp), cnp, FUSE_LINK, &fli, sizeof(fli), &fdi); if ((err = fdisp_wait_answ(&fdi))) { goto out; } feo = fdi.answ; err = fuse_internal_checkentry(feo, vnode_vtype(vp)); if (!err) { /* * Purge the parent's attribute cache because the daemon * should've updated its mtime and ctime */ fuse_vnode_clear_attr_cache(tdvp); fuse_internal_cache_attrs(vp, &feo->attr, feo->attr_valid, feo->attr_valid_nsec, NULL); } out: fdisp_destroy(&fdi); return err; } struct fuse_lookup_alloc_arg { struct fuse_entry_out *feo; struct componentname *cnp; uint64_t nid; enum vtype vtyp; }; /* Callback for vn_get_ino */ static int fuse_lookup_alloc(struct mount *mp, void *arg, int lkflags, struct vnode **vpp) { struct fuse_lookup_alloc_arg *flaa = arg; return fuse_vnode_get(mp, flaa->feo, flaa->nid, NULL, vpp, flaa->cnp, flaa->vtyp); } SDT_PROBE_DEFINE3(fusefs, , vnops, cache_lookup, "int", "struct timespec*", "struct timespec*"); -SDT_PROBE_DEFINE2(fusefs, , vnops, lookup_cache_incoherent, - "struct vnode*", "struct fuse_entry_out*"); /* struct vnop_lookup_args { struct vnodeop_desc *a_desc; struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; }; */ int fuse_vnop_lookup(struct vop_lookup_args *ap) { struct vnode *dvp = ap->a_dvp; struct vnode **vpp = ap->a_vpp; struct componentname *cnp = ap->a_cnp; struct thread *td = cnp->cn_thread; struct ucred *cred = cnp->cn_cred; int nameiop = cnp->cn_nameiop; int flags = cnp->cn_flags; int wantparent = flags & (LOCKPARENT | WANTPARENT); int islastcn = flags & ISLASTCN; struct mount *mp = vnode_mount(dvp); struct fuse_data *data = fuse_get_mpdata(mp); int default_permissions = data->dataflags & FSESS_DEFAULT_PERMISSIONS; int err = 0; int lookup_err = 0; struct vnode *vp = NULL; struct fuse_dispatcher fdi; bool did_lookup = false; struct fuse_entry_out *feo = NULL; enum vtype vtyp; /* vnode type of target */ off_t filesize; /* filesize of target */ uint64_t nid; if (fuse_isdeadfs(dvp)) { *vpp = NULL; return ENXIO; } if (!vnode_isdir(dvp)) return ENOTDIR; if (islastcn && vfs_isrdonly(mp) && (nameiop != LOOKUP)) return EROFS; if ((cnp->cn_flags & NOEXECCHECK) != 0) cnp->cn_flags &= ~NOEXECCHECK; else if ((err = fuse_internal_access(dvp, VEXEC, td, cred))) return err; if (flags & ISDOTDOT) { KASSERT(VTOFUD(dvp)->flag & FN_PARENT_NID, ("Looking up .. is TODO")); nid = VTOFUD(dvp)->parent_nid; if (nid == 0) return ENOENT; /* .. is obviously a directory */ vtyp = VDIR; filesize = 0; } else if (cnp->cn_namelen == 1 && *(cnp->cn_nameptr) == '.') { nid = VTOI(dvp); /* . is obviously a directory */ vtyp = VDIR; filesize = 0; } else { struct timespec now, timeout; int ncpticks; /* here to accomodate for API contract */ err = cache_lookup(dvp, vpp, cnp, &timeout, &ncpticks); getnanouptime(&now); SDT_PROBE3(fusefs, , vnops, cache_lookup, err, &timeout, &now); switch (err) { case -1: /* positive match */ if (timespeccmp(&timeout, &now, >)) { counter_u64_add(fuse_lookup_cache_hits, 1); } else { /* Cache timeout */ counter_u64_add(fuse_lookup_cache_misses, 1); bintime_clear( &VTOFUD(*vpp)->entry_cache_timeout); cache_purge(*vpp); if (dvp != *vpp) vput(*vpp); else vrele(*vpp); *vpp = NULL; break; } return 0; case 0: /* no match in cache */ counter_u64_add(fuse_lookup_cache_misses, 1); break; case ENOENT: /* negative match */ getnanouptime(&now); if (timespeccmp(&timeout, &now, <=)) { /* Cache timeout */ cache_purge_negative(dvp); break; } /* fall through */ default: return err; } nid = VTOI(dvp); fdisp_init(&fdi, cnp->cn_namelen + 1); fdisp_make(&fdi, FUSE_LOOKUP, mp, nid, td, cred); memcpy(fdi.indata, cnp->cn_nameptr, cnp->cn_namelen); ((char *)fdi.indata)[cnp->cn_namelen] = '\0'; lookup_err = fdisp_wait_answ(&fdi); did_lookup = true; if (!lookup_err) { /* lookup call succeeded */ feo = (struct fuse_entry_out *)fdi.answ; nid = feo->nodeid; if (nid == 0) { /* zero nodeid means ENOENT and cache it */ struct timespec timeout; fdi.answ_stat = ENOENT; lookup_err = ENOENT; if (cnp->cn_flags & MAKEENTRY) { fuse_validity_2_timespec(feo, &timeout); cache_enter_time(dvp, *vpp, cnp, &timeout, NULL); } } else if (nid == FUSE_ROOT_ID) { lookup_err = EINVAL; } vtyp = IFTOVT(feo->attr.mode); filesize = feo->attr.size; } if (lookup_err && (!fdi.answ_stat || lookup_err != ENOENT)) { fdisp_destroy(&fdi); return lookup_err; } } /* lookup_err, if non-zero, must be ENOENT at this point */ if (lookup_err) { /* Entry not found */ if ((nameiop == CREATE || nameiop == RENAME) && islastcn) { if (default_permissions) err = fuse_internal_access(dvp, VWRITE, td, cred); else err = 0; if (!err) { /* * Set the SAVENAME flag to hold onto the * pathname for use later in VOP_CREATE or * VOP_RENAME. */ cnp->cn_flags |= SAVENAME; err = EJUSTRETURN; } } else { err = ENOENT; } } else { /* Entry was found */ if (flags & ISDOTDOT) { struct fuse_lookup_alloc_arg flaa; flaa.nid = nid; flaa.feo = feo; flaa.cnp = cnp; flaa.vtyp = vtyp; err = vn_vget_ino_gen(dvp, fuse_lookup_alloc, &flaa, 0, &vp); *vpp = vp; } else if (nid == VTOI(dvp)) { vref(dvp); *vpp = dvp; } else { struct fuse_vnode_data *fvdat; struct vattr *vap; err = fuse_vnode_get(vnode_mount(dvp), feo, nid, dvp, &vp, cnp, vtyp); if (err) goto out; *vpp = vp; /* * In the case where we are looking up a FUSE node * represented by an existing cached vnode, and the * true size reported by FUSE_LOOKUP doesn't match * the vnode's cached size, then any cached writes * beyond the file's current size are lost. * * We can get here: * * following attribute cache expiration, or * * due a bug in the daemon, or */ fvdat = VTOFUD(vp); if (vnode_isreg(vp) && ((filesize != fvdat->cached_attrs.va_size && fvdat->flag & FN_SIZECHANGE) || ((vap = VTOVA(vp)) && filesize != vap->va_size))) { - SDT_PROBE2(fusefs, , vnops, lookup_cache_incoherent, vp, feo); fvdat->flag &= ~FN_SIZECHANGE; /* * The server changed the file's size even * though we had it cached, or had dirty writes * in the WB cache! */ - printf("%s: cache incoherent on %s! " - "Buggy FUSE server detected. To prevent " + fuse_warn(data, FSESS_WARN_CACHE_INCOHERENT, + "cache incoherent! " + "To prevent " "data corruption, disable the data cache " "by mounting with -o direct_io, or as " "directed otherwise by your FUSE server's " - "documentation\n", __func__, - vnode_mount(vp)->mnt_stat.f_mntonname); + "documentation."); int iosize = fuse_iosize(vp); v_inval_buf_range(vp, 0, INT64_MAX, iosize); } MPASS(feo != NULL); fuse_internal_cache_attrs(*vpp, &feo->attr, feo->attr_valid, feo->attr_valid_nsec, NULL); fuse_validity_2_bintime(feo->entry_valid, feo->entry_valid_nsec, &fvdat->entry_cache_timeout); if ((nameiop == DELETE || nameiop == RENAME) && islastcn && default_permissions) { struct vattr dvattr; err = fuse_internal_access(dvp, VWRITE, td, cred); if (err != 0) goto out; /* * if the parent's sticky bit is set, check * whether we're allowed to remove the file. * Need to figure out the vnode locking to make * this work. */ fuse_internal_getattr(dvp, &dvattr, cred, td); if ((dvattr.va_mode & S_ISTXT) && fuse_internal_access(dvp, VADMIN, td, cred) && fuse_internal_access(*vpp, VADMIN, td, cred)) { err = EPERM; goto out; } } if (islastcn && ( (nameiop == DELETE) || (nameiop == RENAME && wantparent))) { cnp->cn_flags |= SAVENAME; } } } out: if (err) { if (vp != NULL && dvp != vp) vput(vp); else if (vp != NULL) vrele(vp); *vpp = NULL; } if (did_lookup) fdisp_destroy(&fdi); return err; } /* struct vnop_mkdir_args { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; }; */ static int fuse_vnop_mkdir(struct vop_mkdir_args *ap) { struct vnode *dvp = ap->a_dvp; struct vnode **vpp = ap->a_vpp; struct componentname *cnp = ap->a_cnp; struct vattr *vap = ap->a_vap; struct fuse_mkdir_in fmdi; if (fuse_isdeadfs(dvp)) { return ENXIO; } fmdi.mode = MAKEIMODE(vap->va_type, vap->va_mode); fmdi.umask = curthread->td_proc->p_pd->pd_cmask; return (fuse_internal_newentry(dvp, vpp, cnp, FUSE_MKDIR, &fmdi, sizeof(fmdi), VDIR)); } /* struct vnop_mknod_args { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; }; */ static int fuse_vnop_mknod(struct vop_mknod_args *ap) { struct vnode *dvp = ap->a_dvp; struct vnode **vpp = ap->a_vpp; struct componentname *cnp = ap->a_cnp; struct vattr *vap = ap->a_vap; if (fuse_isdeadfs(dvp)) return ENXIO; return fuse_internal_mknod(dvp, vpp, cnp, vap); } /* struct vop_open_args { struct vnode *a_vp; int a_mode; struct ucred *a_cred; struct thread *a_td; int a_fdidx; / struct file *a_fp; }; */ static int fuse_vnop_open(struct vop_open_args *ap) { struct vnode *vp = ap->a_vp; int a_mode = ap->a_mode; struct thread *td = ap->a_td; struct ucred *cred = ap->a_cred; pid_t pid = td->td_proc->p_pid; struct fuse_vnode_data *fvdat; if (fuse_isdeadfs(vp)) return ENXIO; if (vp->v_type == VCHR || vp->v_type == VBLK || vp->v_type == VFIFO) return (EOPNOTSUPP); if ((a_mode & (FREAD | FWRITE | FEXEC)) == 0) return EINVAL; fvdat = VTOFUD(vp); if (fuse_filehandle_validrw(vp, a_mode, cred, pid)) { fuse_vnode_open(vp, 0, td); return 0; } return fuse_filehandle_open(vp, a_mode, NULL, td, cred); } static int fuse_vnop_pathconf(struct vop_pathconf_args *ap) { struct vnode *vp = ap->a_vp; struct mount *mp; switch (ap->a_name) { case _PC_FILESIZEBITS: *ap->a_retval = 64; return (0); case _PC_NAME_MAX: *ap->a_retval = NAME_MAX; return (0); case _PC_LINK_MAX: *ap->a_retval = MIN(LONG_MAX, FUSE_LINK_MAX); return (0); case _PC_SYMLINK_MAX: *ap->a_retval = MAXPATHLEN; return (0); case _PC_NO_TRUNC: *ap->a_retval = 1; return (0); case _PC_MIN_HOLE_SIZE: /* * The FUSE protocol provides no mechanism for a server to * report _PC_MIN_HOLE_SIZE. It's a protocol bug. Instead, * return EINVAL if the server does not support FUSE_LSEEK, or * 1 if it does. */ mp = vnode_mount(vp); if (!fsess_is_impl(mp, FUSE_LSEEK) && !fsess_not_impl(mp, FUSE_LSEEK)) { off_t offset = 0; /* Issue a FUSE_LSEEK to find out if it's implemented */ fuse_vnop_do_lseek(vp, curthread, curthread->td_ucred, curthread->td_proc->p_pid, &offset, SEEK_DATA); } if (fsess_is_impl(mp, FUSE_LSEEK)) { *ap->a_retval = 1; return (0); } else { /* * Probably FUSE_LSEEK is not implemented. It might * be, if the FUSE_LSEEK above returned an error like * EACCES, but in that case we can't tell, so it's * safest to report EINVAL anyway. */ return (EINVAL); } default: return (vop_stdpathconf(ap)); } } /* struct vnop_read_args { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; }; */ static int fuse_vnop_read(struct vop_read_args *ap) { struct vnode *vp = ap->a_vp; struct uio *uio = ap->a_uio; int ioflag = ap->a_ioflag; struct ucred *cred = ap->a_cred; pid_t pid = curthread->td_proc->p_pid; if (fuse_isdeadfs(vp)) { return ENXIO; } if (VTOFUD(vp)->flag & FN_DIRECTIO) { ioflag |= IO_DIRECT; } return fuse_io_dispatch(vp, uio, ioflag, cred, pid); } /* struct vnop_readdir_args { struct vnode *a_vp; struct uio *a_uio; struct ucred *a_cred; int *a_eofflag; int *a_ncookies; u_long **a_cookies; }; */ static int fuse_vnop_readdir(struct vop_readdir_args *ap) { struct vnode *vp = ap->a_vp; struct uio *uio = ap->a_uio; struct ucred *cred = ap->a_cred; struct fuse_filehandle *fufh = NULL; struct fuse_iov cookediov; int err = 0; u_long *cookies; off_t startoff; ssize_t tresid; int ncookies; bool closefufh = false; pid_t pid = curthread->td_proc->p_pid; if (ap->a_eofflag) *ap->a_eofflag = 0; if (fuse_isdeadfs(vp)) { return ENXIO; } if ( /* XXXIP ((uio_iovcnt(uio) > 1)) || */ (uio_resid(uio) < sizeof(struct dirent))) { return EINVAL; } tresid = uio->uio_resid; startoff = uio->uio_offset; err = fuse_filehandle_get_dir(vp, &fufh, cred, pid); if (err == EBADF && vnode_mount(vp)->mnt_flag & MNT_EXPORTED) { /* * nfsd will do VOP_READDIR without first doing VOP_OPEN. We * must implicitly open the directory here */ err = fuse_filehandle_open(vp, FREAD, &fufh, curthread, cred); if (err == 0) { /* * When a directory is opened, it must be read from * the beginning. Hopefully, the "startoff" still * exists as an offset cookie for the directory. * If not, it will read the entire directory without * returning any entries and just return eof. */ uio->uio_offset = 0; } closefufh = true; } if (err) return (err); if (ap->a_ncookies != NULL) { ncookies = uio->uio_resid / (offsetof(struct dirent, d_name) + 4) + 1; cookies = malloc(ncookies * sizeof(*cookies), M_TEMP, M_WAITOK); *ap->a_ncookies = ncookies; *ap->a_cookies = cookies; } else { ncookies = 0; cookies = NULL; } #define DIRCOOKEDSIZE FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + MAXNAMLEN + 1) fiov_init(&cookediov, DIRCOOKEDSIZE); err = fuse_internal_readdir(vp, uio, startoff, fufh, &cookediov, &ncookies, cookies); fiov_teardown(&cookediov); if (closefufh) fuse_filehandle_close(vp, fufh, curthread, cred); if (ap->a_ncookies != NULL) { if (err == 0) { *ap->a_ncookies -= ncookies; } else { free(*ap->a_cookies, M_TEMP); *ap->a_ncookies = 0; *ap->a_cookies = NULL; } } if (err == 0 && tresid == uio->uio_resid) *ap->a_eofflag = 1; return err; } /* struct vnop_readlink_args { struct vnode *a_vp; struct uio *a_uio; struct ucred *a_cred; }; */ static int fuse_vnop_readlink(struct vop_readlink_args *ap) { struct vnode *vp = ap->a_vp; struct uio *uio = ap->a_uio; struct ucred *cred = ap->a_cred; struct fuse_dispatcher fdi; int err; if (fuse_isdeadfs(vp)) { return ENXIO; } if (!vnode_islnk(vp)) { return EINVAL; } fdisp_init(&fdi, 0); err = fdisp_simple_putget_vp(&fdi, FUSE_READLINK, vp, curthread, cred); if (err) { goto out; } if (((char *)fdi.answ)[0] == '/' && fuse_get_mpdata(vnode_mount(vp))->dataflags & FSESS_PUSH_SYMLINKS_IN) { char *mpth = vnode_mount(vp)->mnt_stat.f_mntonname; err = uiomove(mpth, strlen(mpth), uio); } if (!err) { err = uiomove(fdi.answ, fdi.iosize, uio); } out: fdisp_destroy(&fdi); return err; } /* struct vnop_reclaim_args { struct vnode *a_vp; }; */ static int fuse_vnop_reclaim(struct vop_reclaim_args *ap) { struct vnode *vp = ap->a_vp; struct thread *td = curthread; struct fuse_vnode_data *fvdat = VTOFUD(vp); struct fuse_filehandle *fufh, *fufh_tmp; if (!fvdat) { panic("FUSE: no vnode data during recycling"); } LIST_FOREACH_SAFE(fufh, &fvdat->handles, next, fufh_tmp) { printf("FUSE: vnode being reclaimed with open fufh " "(type=%#x)", fufh->fufh_type); fuse_filehandle_close(vp, fufh, td, NULL); } if (!fuse_isdeadfs(vp) && fvdat->nlookup > 0) { fuse_internal_forget_send(vnode_mount(vp), td, NULL, VTOI(vp), fvdat->nlookup); } cache_purge(vp); vfs_hash_remove(vp); fuse_vnode_destroy(vp); return 0; } /* struct vnop_remove_args { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; }; */ static int fuse_vnop_remove(struct vop_remove_args *ap) { struct vnode *dvp = ap->a_dvp; struct vnode *vp = ap->a_vp; struct componentname *cnp = ap->a_cnp; int err; if (fuse_isdeadfs(vp)) { return ENXIO; } if (vnode_isdir(vp)) { return EPERM; } err = fuse_internal_remove(dvp, vp, cnp, FUSE_UNLINK); return err; } /* struct vnop_rename_args { struct vnode *a_fdvp; struct vnode *a_fvp; struct componentname *a_fcnp; struct vnode *a_tdvp; struct vnode *a_tvp; struct componentname *a_tcnp; }; */ static int fuse_vnop_rename(struct vop_rename_args *ap) { struct vnode *fdvp = ap->a_fdvp; struct vnode *fvp = ap->a_fvp; struct componentname *fcnp = ap->a_fcnp; struct vnode *tdvp = ap->a_tdvp; struct vnode *tvp = ap->a_tvp; struct componentname *tcnp = ap->a_tcnp; struct fuse_data *data; bool newparent = fdvp != tdvp; bool isdir = fvp->v_type == VDIR; int err = 0; if (fuse_isdeadfs(fdvp)) { return ENXIO; } if (fvp->v_mount != tdvp->v_mount || (tvp && fvp->v_mount != tvp->v_mount)) { SDT_PROBE2(fusefs, , vnops, trace, 1, "cross-device rename"); err = EXDEV; goto out; } cache_purge(fvp); /* * FUSE library is expected to check if target directory is not * under the source directory in the file system tree. * Linux performs this check at VFS level. */ /* * If source is a directory, and it will get a new parent, user must * have write permission to it, so ".." can be modified. */ data = fuse_get_mpdata(vnode_mount(tdvp)); if (data->dataflags & FSESS_DEFAULT_PERMISSIONS && isdir && newparent) { err = fuse_internal_access(fvp, VWRITE, tcnp->cn_thread, tcnp->cn_cred); if (err) goto out; } sx_xlock(&data->rename_lock); err = fuse_internal_rename(fdvp, fcnp, tdvp, tcnp); if (err == 0) { if (tdvp != fdvp) fuse_vnode_setparent(fvp, tdvp); if (tvp != NULL) fuse_vnode_setparent(tvp, NULL); } sx_unlock(&data->rename_lock); if (tvp != NULL && tvp != fvp) { cache_purge(tvp); } if (vnode_isdir(fvp)) { if ((tvp != NULL) && vnode_isdir(tvp)) { cache_purge(tdvp); } cache_purge(fdvp); } out: if (tdvp == tvp) { vrele(tdvp); } else { vput(tdvp); } if (tvp != NULL) { vput(tvp); } vrele(fdvp); vrele(fvp); return err; } /* struct vnop_rmdir_args { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; } *ap; */ static int fuse_vnop_rmdir(struct vop_rmdir_args *ap) { struct vnode *dvp = ap->a_dvp; struct vnode *vp = ap->a_vp; int err; if (fuse_isdeadfs(vp)) { return ENXIO; } if (VTOFUD(vp) == VTOFUD(dvp)) { return EINVAL; } err = fuse_internal_remove(dvp, vp, ap->a_cnp, FUSE_RMDIR); return err; } /* struct vnop_setattr_args { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; struct thread *a_td; }; */ static int fuse_vnop_setattr(struct vop_setattr_args *ap) { struct vnode *vp = ap->a_vp; struct vattr *vap = ap->a_vap; struct ucred *cred = ap->a_cred; struct thread *td = curthread; struct mount *mp; struct fuse_data *data; struct vattr old_va; int dataflags; int err = 0, err2; accmode_t accmode = 0; bool checkperm; bool drop_suid = false; gid_t cr_gid; mp = vnode_mount(vp); data = fuse_get_mpdata(mp); dataflags = data->dataflags; checkperm = dataflags & FSESS_DEFAULT_PERMISSIONS; if (cred->cr_ngroups > 0) cr_gid = cred->cr_groups[0]; else cr_gid = 0; if (fuse_isdeadfs(vp)) { return ENXIO; } if (vap->va_uid != (uid_t)VNOVAL) { if (checkperm) { /* Only root may change a file's owner */ err = priv_check_cred(cred, PRIV_VFS_CHOWN); if (err) { /* As a special case, allow the null chown */ err2 = fuse_internal_getattr(vp, &old_va, cred, td); if (err2) return (err2); if (vap->va_uid != old_va.va_uid) return err; else accmode |= VADMIN; drop_suid = true; } else accmode |= VADMIN; } else accmode |= VADMIN; } if (vap->va_gid != (gid_t)VNOVAL) { if (checkperm && priv_check_cred(cred, PRIV_VFS_CHOWN)) drop_suid = true; if (checkperm && !groupmember(vap->va_gid, cred)) { /* * Non-root users may only chgrp to one of their own * groups */ err = priv_check_cred(cred, PRIV_VFS_CHOWN); if (err) { /* As a special case, allow the null chgrp */ err2 = fuse_internal_getattr(vp, &old_va, cred, td); if (err2) return (err2); if (vap->va_gid != old_va.va_gid) return err; accmode |= VADMIN; } else accmode |= VADMIN; } else accmode |= VADMIN; } if (vap->va_size != VNOVAL) { switch (vp->v_type) { case VDIR: return (EISDIR); case VLNK: case VREG: if (vfs_isrdonly(mp)) return (EROFS); break; default: /* * According to POSIX, the result is unspecified * for file types other than regular files, * directories and shared memory objects. We * don't support shared memory objects in the file * system, and have dubious support for truncating * symlinks. Just ignore the request in other cases. */ return (0); } /* Don't set accmode. Permission to trunc is checked upstack */ } if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) { if (vap->va_vaflags & VA_UTIMES_NULL) accmode |= VWRITE; else accmode |= VADMIN; } if (drop_suid) { if (vap->va_mode != (mode_t)VNOVAL) vap->va_mode &= ~(S_ISUID | S_ISGID); else { err = fuse_internal_getattr(vp, &old_va, cred, td); if (err) return (err); vap->va_mode = old_va.va_mode & ~(S_ISUID | S_ISGID); } } if (vap->va_mode != (mode_t)VNOVAL) { /* Only root may set the sticky bit on non-directories */ if (checkperm && vp->v_type != VDIR && (vap->va_mode & S_ISTXT) && priv_check_cred(cred, PRIV_VFS_STICKYFILE)) return EFTYPE; if (checkperm && (vap->va_mode & S_ISGID)) { err = fuse_internal_getattr(vp, &old_va, cred, td); if (err) return (err); if (!groupmember(old_va.va_gid, cred)) { err = priv_check_cred(cred, PRIV_VFS_SETGID); if (err) return (err); } } accmode |= VADMIN; } if (vfs_isrdonly(mp)) return EROFS; if (checkperm) { err = fuse_internal_access(vp, accmode, td, cred); } else { err = 0; } if (err) return err; else return fuse_internal_setattr(vp, vap, td, cred); } /* struct vnop_strategy_args { struct vnode *a_vp; struct buf *a_bp; }; */ static int fuse_vnop_strategy(struct vop_strategy_args *ap) { struct vnode *vp = ap->a_vp; struct buf *bp = ap->a_bp; if (!vp || fuse_isdeadfs(vp)) { bp->b_ioflags |= BIO_ERROR; bp->b_error = ENXIO; bufdone(bp); return 0; } /* * VOP_STRATEGY always returns zero and signals error via bp->b_ioflags. * fuse_io_strategy sets bp's error fields */ (void)fuse_io_strategy(vp, bp); return 0; } /* struct vnop_symlink_args { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; char *a_target; }; */ static int fuse_vnop_symlink(struct vop_symlink_args *ap) { struct vnode *dvp = ap->a_dvp; struct vnode **vpp = ap->a_vpp; struct componentname *cnp = ap->a_cnp; const char *target = ap->a_target; struct fuse_dispatcher fdi; int err; size_t len; if (fuse_isdeadfs(dvp)) { return ENXIO; } /* * Unlike the other creator type calls, here we have to create a message * where the name of the new entry comes first, and the data describing * the entry comes second. * Hence we can't rely on our handy fuse_internal_newentry() routine, * but put together the message manually and just call the core part. */ len = strlen(target) + 1; fdisp_init(&fdi, len + cnp->cn_namelen + 1); fdisp_make_vp(&fdi, FUSE_SYMLINK, dvp, curthread, NULL); memcpy(fdi.indata, cnp->cn_nameptr, cnp->cn_namelen); ((char *)fdi.indata)[cnp->cn_namelen] = '\0'; memcpy((char *)fdi.indata + cnp->cn_namelen + 1, target, len); err = fuse_internal_newentry_core(dvp, vpp, cnp, VLNK, &fdi); fdisp_destroy(&fdi); return err; } /* struct vnop_write_args { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; }; */ static int fuse_vnop_write(struct vop_write_args *ap) { struct vnode *vp = ap->a_vp; struct uio *uio = ap->a_uio; int ioflag = ap->a_ioflag; struct ucred *cred = ap->a_cred; pid_t pid = curthread->td_proc->p_pid; if (fuse_isdeadfs(vp)) { return ENXIO; } if (VTOFUD(vp)->flag & FN_DIRECTIO) { ioflag |= IO_DIRECT; } return fuse_io_dispatch(vp, uio, ioflag, cred, pid); } static daddr_t fuse_gbp_getblkno(struct vnode *vp, vm_ooffset_t off) { const int biosize = fuse_iosize(vp); return (off / biosize); } static int fuse_gbp_getblksz(struct vnode *vp, daddr_t lbn) { off_t filesize; int blksz, err; const int biosize = fuse_iosize(vp); err = fuse_vnode_size(vp, &filesize, NULL, NULL); KASSERT(err == 0, ("vfs_bio_getpages can't handle errors here")); if (err) return biosize; if ((off_t)lbn * biosize >= filesize) { blksz = 0; } else if ((off_t)(lbn + 1) * biosize > filesize) { blksz = filesize - (off_t)lbn *biosize; } else { blksz = biosize; } return (blksz); } /* struct vnop_getpages_args { struct vnode *a_vp; vm_page_t *a_m; int a_count; int a_reqpage; }; */ static int fuse_vnop_getpages(struct vop_getpages_args *ap) { struct vnode *vp = ap->a_vp; if (!fsess_opt_mmap(vnode_mount(vp))) { SDT_PROBE2(fusefs, , vnops, trace, 1, "called on non-cacheable vnode??\n"); return (VM_PAGER_ERROR); } return (vfs_bio_getpages(vp, ap->a_m, ap->a_count, ap->a_rbehind, ap->a_rahead, fuse_gbp_getblkno, fuse_gbp_getblksz)); } static const char extattr_namespace_separator = '.'; /* struct vop_getextattr_args { struct vop_generic_args a_gen; struct vnode *a_vp; int a_attrnamespace; const char *a_name; struct uio *a_uio; size_t *a_size; struct ucred *a_cred; struct thread *a_td; }; */ static int fuse_vnop_getextattr(struct vop_getextattr_args *ap) { struct vnode *vp = ap->a_vp; struct uio *uio = ap->a_uio; struct fuse_dispatcher fdi; struct fuse_getxattr_in *get_xattr_in; struct fuse_getxattr_out *get_xattr_out; struct mount *mp = vnode_mount(vp); struct thread *td = ap->a_td; struct ucred *cred = ap->a_cred; char *prefix; char *attr_str; size_t len; int err; if (fuse_isdeadfs(vp)) return (ENXIO); if (fsess_not_impl(mp, FUSE_GETXATTR)) return EOPNOTSUPP; err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, VREAD); if (err) return err; /* Default to looking for user attributes. */ if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM) prefix = EXTATTR_NAMESPACE_SYSTEM_STRING; else prefix = EXTATTR_NAMESPACE_USER_STRING; len = strlen(prefix) + sizeof(extattr_namespace_separator) + strlen(ap->a_name) + 1; fdisp_init(&fdi, len + sizeof(*get_xattr_in)); fdisp_make_vp(&fdi, FUSE_GETXATTR, vp, td, cred); get_xattr_in = fdi.indata; /* * Check to see whether we're querying the available size or * issuing the actual request. If we pass in 0, we get back struct * fuse_getxattr_out. If we pass in a non-zero size, we get back * that much data, without the struct fuse_getxattr_out header. */ if (uio == NULL) get_xattr_in->size = 0; else get_xattr_in->size = uio->uio_resid; attr_str = (char *)fdi.indata + sizeof(*get_xattr_in); snprintf(attr_str, len, "%s%c%s", prefix, extattr_namespace_separator, ap->a_name); err = fdisp_wait_answ(&fdi); if (err != 0) { if (err == ENOSYS) { fsess_set_notimpl(mp, FUSE_GETXATTR); err = EOPNOTSUPP; } goto out; } get_xattr_out = fdi.answ; if (ap->a_size != NULL) *ap->a_size = get_xattr_out->size; if (uio != NULL) err = uiomove(fdi.answ, fdi.iosize, uio); out: fdisp_destroy(&fdi); return (err); } /* struct vop_setextattr_args { struct vop_generic_args a_gen; struct vnode *a_vp; int a_attrnamespace; const char *a_name; struct uio *a_uio; struct ucred *a_cred; struct thread *a_td; }; */ static int fuse_vnop_setextattr(struct vop_setextattr_args *ap) { struct vnode *vp = ap->a_vp; struct uio *uio = ap->a_uio; struct fuse_dispatcher fdi; struct fuse_setxattr_in *set_xattr_in; struct mount *mp = vnode_mount(vp); struct thread *td = ap->a_td; struct ucred *cred = ap->a_cred; char *prefix; size_t len; char *attr_str; int err; if (fuse_isdeadfs(vp)) return (ENXIO); if (fsess_not_impl(mp, FUSE_SETXATTR)) return EOPNOTSUPP; if (vfs_isrdonly(mp)) return EROFS; /* Deleting xattrs must use VOP_DELETEEXTATTR instead */ if (ap->a_uio == NULL) { /* * If we got here as fallback from VOP_DELETEEXTATTR, then * return EOPNOTSUPP. */ if (fsess_not_impl(mp, FUSE_REMOVEXATTR)) return (EOPNOTSUPP); else return (EINVAL); } err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, VWRITE); if (err) return err; /* Default to looking for user attributes. */ if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM) prefix = EXTATTR_NAMESPACE_SYSTEM_STRING; else prefix = EXTATTR_NAMESPACE_USER_STRING; len = strlen(prefix) + sizeof(extattr_namespace_separator) + strlen(ap->a_name) + 1; fdisp_init(&fdi, len + sizeof(*set_xattr_in) + uio->uio_resid); fdisp_make_vp(&fdi, FUSE_SETXATTR, vp, td, cred); set_xattr_in = fdi.indata; set_xattr_in->size = uio->uio_resid; attr_str = (char *)fdi.indata + sizeof(*set_xattr_in); snprintf(attr_str, len, "%s%c%s", prefix, extattr_namespace_separator, ap->a_name); err = uiomove((char *)fdi.indata + sizeof(*set_xattr_in) + len, uio->uio_resid, uio); if (err != 0) { goto out; } err = fdisp_wait_answ(&fdi); if (err == ENOSYS) { fsess_set_notimpl(mp, FUSE_SETXATTR); err = EOPNOTSUPP; } if (err == ERESTART) { /* Can't restart after calling uiomove */ err = EINTR; } out: fdisp_destroy(&fdi); return (err); } /* * The Linux / FUSE extended attribute list is simply a collection of * NUL-terminated strings. The FreeBSD extended attribute list is a single * byte length followed by a non-NUL terminated string. So, this allows * conversion of the Linux / FUSE format to the FreeBSD format in place. * Linux attribute names are reported with the namespace as a prefix (e.g. * "user.attribute_name"), but in FreeBSD they are reported without the * namespace prefix (e.g. "attribute_name"). So, we're going from: * * user.attr_name1\0user.attr_name2\0 * * to: * * attr_name1attr_name2 * * Where "" is a single byte number of characters in the attribute name. * * Args: * prefix - exattr namespace prefix string * list, list_len - input list with namespace prefixes * bsd_list, bsd_list_len - output list compatible with bsd vfs */ static int fuse_xattrlist_convert(char *prefix, const char *list, int list_len, char *bsd_list, int *bsd_list_len) { int len, pos, dist_to_next, prefix_len; pos = 0; *bsd_list_len = 0; prefix_len = strlen(prefix); while (pos < list_len && list[pos] != '\0') { dist_to_next = strlen(&list[pos]) + 1; if (bcmp(&list[pos], prefix, prefix_len) == 0 && list[pos + prefix_len] == extattr_namespace_separator) { len = dist_to_next - (prefix_len + sizeof(extattr_namespace_separator)) - 1; if (len >= EXTATTR_MAXNAMELEN) return (ENAMETOOLONG); bsd_list[*bsd_list_len] = len; memcpy(&bsd_list[*bsd_list_len + 1], &list[pos + prefix_len + sizeof(extattr_namespace_separator)], len); *bsd_list_len += len + 1; } pos += dist_to_next; } return (0); } /* * List extended attributes * * The FUSE_LISTXATTR operation is based on Linux's listxattr(2) syscall, which * has a number of differences compared to its FreeBSD equivalent, * extattr_list_file: * * - FUSE_LISTXATTR returns all extended attributes across all namespaces, * whereas listxattr(2) only returns attributes for a single namespace * - FUSE_LISTXATTR prepends each attribute name with "namespace." * - If the provided buffer is not large enough to hold the result, * FUSE_LISTXATTR should return ERANGE, whereas listxattr is expected to * return as many results as will fit. */ /* struct vop_listextattr_args { struct vop_generic_args a_gen; struct vnode *a_vp; int a_attrnamespace; struct uio *a_uio; size_t *a_size; struct ucred *a_cred; struct thread *a_td; }; */ static int fuse_vnop_listextattr(struct vop_listextattr_args *ap) { struct vnode *vp = ap->a_vp; struct uio *uio = ap->a_uio; struct fuse_dispatcher fdi; struct fuse_listxattr_in *list_xattr_in; struct fuse_listxattr_out *list_xattr_out; struct mount *mp = vnode_mount(vp); struct thread *td = ap->a_td; struct ucred *cred = ap->a_cred; char *prefix; char *bsd_list = NULL; char *linux_list; int bsd_list_len; int linux_list_len; int err; if (fuse_isdeadfs(vp)) return (ENXIO); if (fsess_not_impl(mp, FUSE_LISTXATTR)) return EOPNOTSUPP; err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, VREAD); if (err) return err; /* * Add space for a NUL and the period separator if enabled. * Default to looking for user attributes. */ if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM) prefix = EXTATTR_NAMESPACE_SYSTEM_STRING; else prefix = EXTATTR_NAMESPACE_USER_STRING; fdisp_init(&fdi, sizeof(*list_xattr_in)); fdisp_make_vp(&fdi, FUSE_LISTXATTR, vp, td, cred); /* * Retrieve Linux / FUSE compatible list size. */ list_xattr_in = fdi.indata; list_xattr_in->size = 0; err = fdisp_wait_answ(&fdi); if (err != 0) { if (err == ENOSYS) { fsess_set_notimpl(mp, FUSE_LISTXATTR); err = EOPNOTSUPP; } goto out; } list_xattr_out = fdi.answ; linux_list_len = list_xattr_out->size; if (linux_list_len == 0) { if (ap->a_size != NULL) *ap->a_size = linux_list_len; goto out; } /* * Retrieve Linux / FUSE compatible list values. */ fdisp_refresh_vp(&fdi, FUSE_LISTXATTR, vp, td, cred); list_xattr_in = fdi.indata; list_xattr_in->size = linux_list_len; err = fdisp_wait_answ(&fdi); if (err == ERANGE) { /* * Race detected. The attribute list must've grown since the * first FUSE_LISTXATTR call. Start over. Go all the way back * to userland so we can process signals, if necessary, before * restarting. */ err = ERESTART; goto out; } else if (err != 0) goto out; linux_list = fdi.answ; /* FUSE doesn't allow the server to return more data than requested */ if (fdi.iosize > linux_list_len) { - printf("WARNING: FUSE protocol violation. Server returned " + struct fuse_data *data = fuse_get_mpdata(mp); + + fuse_warn(data, FSESS_WARN_LSEXTATTR_LONG, + "server returned " "more extended attribute data than requested; " - "should've returned ERANGE instead"); + "should've returned ERANGE instead."); } else { /* But returning less data is fine */ linux_list_len = fdi.iosize; } /* * Retrieve the BSD compatible list values. * The Linux / FUSE attribute list format isn't the same * as FreeBSD's format. So we need to transform it into * FreeBSD's format before giving it to the user. */ bsd_list = malloc(linux_list_len, M_TEMP, M_WAITOK); err = fuse_xattrlist_convert(prefix, linux_list, linux_list_len, bsd_list, &bsd_list_len); if (err != 0) goto out; if (ap->a_size != NULL) *ap->a_size = bsd_list_len; if (uio != NULL) err = uiomove(bsd_list, bsd_list_len, uio); out: free(bsd_list, M_TEMP); fdisp_destroy(&fdi); return (err); } /* struct vop_deleteextattr_args { struct vop_generic_args a_gen; struct vnode *a_vp; int a_attrnamespace; const char *a_name; struct ucred *a_cred; struct thread *a_td; }; */ static int fuse_vnop_deleteextattr(struct vop_deleteextattr_args *ap) { struct vnode *vp = ap->a_vp; struct fuse_dispatcher fdi; struct mount *mp = vnode_mount(vp); struct thread *td = ap->a_td; struct ucred *cred = ap->a_cred; char *prefix; size_t len; char *attr_str; int err; if (fuse_isdeadfs(vp)) return (ENXIO); if (fsess_not_impl(mp, FUSE_REMOVEXATTR)) return EOPNOTSUPP; if (vfs_isrdonly(mp)) return EROFS; err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, VWRITE); if (err) return err; /* Default to looking for user attributes. */ if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM) prefix = EXTATTR_NAMESPACE_SYSTEM_STRING; else prefix = EXTATTR_NAMESPACE_USER_STRING; len = strlen(prefix) + sizeof(extattr_namespace_separator) + strlen(ap->a_name) + 1; fdisp_init(&fdi, len); fdisp_make_vp(&fdi, FUSE_REMOVEXATTR, vp, td, cred); attr_str = fdi.indata; snprintf(attr_str, len, "%s%c%s", prefix, extattr_namespace_separator, ap->a_name); err = fdisp_wait_answ(&fdi); if (err == ENOSYS) { fsess_set_notimpl(mp, FUSE_REMOVEXATTR); err = EOPNOTSUPP; } fdisp_destroy(&fdi); return (err); } /* struct vnop_print_args { struct vnode *a_vp; }; */ static int fuse_vnop_print(struct vop_print_args *ap) { struct fuse_vnode_data *fvdat = VTOFUD(ap->a_vp); printf("nodeid: %ju, parent nodeid: %ju, nlookup: %ju, flag: %#x\n", (uintmax_t)VTOILLU(ap->a_vp), (uintmax_t)fvdat->parent_nid, (uintmax_t)fvdat->nlookup, fvdat->flag); return 0; } /* * Get an NFS filehandle for a FUSE file. * * This will only work for FUSE file systems that guarantee the uniqueness of * nodeid:generation, which most don't. */ /* vop_vptofh { IN struct vnode *a_vp; IN struct fid *a_fhp; }; */ static int fuse_vnop_vptofh(struct vop_vptofh_args *ap) { struct vnode *vp = ap->a_vp; struct fuse_vnode_data *fvdat = VTOFUD(vp); struct fuse_fid *fhp = (struct fuse_fid *)(ap->a_fhp); _Static_assert(sizeof(struct fuse_fid) <= sizeof(struct fid), "FUSE fid type is too big"); struct mount *mp = vnode_mount(vp); struct fuse_data *data = fuse_get_mpdata(mp); struct vattr va; int err; if (!(data->dataflags & FSESS_EXPORT_SUPPORT)) return EOPNOTSUPP; err = fuse_internal_getattr(vp, &va, curthread->td_ucred, curthread); if (err) return err; /*ip = VTOI(ap->a_vp);*/ /*ufhp = (struct ufid *)ap->a_fhp;*/ fhp->len = sizeof(struct fuse_fid); fhp->nid = fvdat->nid; if (fvdat->generation <= UINT32_MAX) fhp->gen = fvdat->generation; else return EOVERFLOW; return (0); }